randpup.git

act.c

espurr
/* randpup - ngram text generator kernel module
 * Copyright (C) 2025 ArcNyxx
 * see LICENCE.MIT file for licensing information */

#ifdef __KERNEL__
#include <asm-generic/errno.h>
#include <linux/types.h>
#else /* __KERNEL__ */
#include <errno.h>
#include <stdlib.h>
#endif /* __KERNEL__ */

#include "act.h"
#include "ngram.h"
#include "util.h"

static ssize_t
pup_ngram(pup_ngram_act_t *act, char *buf, size_t len)
{
	size_t ind = 0;

	const pup_ngram_t *tngram = act->gen->ngram;
	const pup_choice_t *tchoice = act->gen->choice;
	const pup_start_t *tstart = act->gen->start;

	if (!act->start) {
		if (act->gen->prob == 0) {
			/* table has no start entries, choose ngram at random */
			act->ngram = pup_rand(0, act->gen->len);
		} else {
			const int rand = pup_rand(0, act->gen->prob);

			int start = 0;
			for (int prob = tstart[start].prob; rand >= prob;
					++start, prob = tstart[start].prob);

			CC(buf + ind, tstart[start].ch);
			++ind;
			act->ngram = tstart[start].ngram;
			--act->left;
		}
		act->start = true;
	}

	while (ind < len && act->left > 0) {
		const pup_ngram_t *ngram = &tngram[act->ngram];

		CC(buf + ind, ngram->ch);
		++ind;
		--act->left;

		const int rand = pup_rand(0, ngram->prob);
		int choice = ngram->choice;
		for (int prob = tchoice[choice].prob; rand >= prob;
				++choice, prob = tchoice[choice].prob);
		act->ngram = tchoice[choice].next;
	}

	return ind;
}

extern const int pup_strings_len;
extern const pup_string_t pup_strings[];
extern const int pup_repeats_len;
extern const pup_string_repeat_t pup_repeats[];
extern const int pup_gens_len;
extern const pup_gendef_t pup_gens[];

void
pup_reset(pup_state_t *st)
{
	const int sum = pup_strings_len + pup_repeats_len + pup_gens_len;
	int rand = pup_rand(0, sum - (st->last != -1));
	rand += rand == st->last;

	st->ind = 0;
	st->last = rand;

	if (rand < pup_strings_len) {
		st->act[0].str = pup_strings[rand].str;
		st->act[0].len = pup_strings[rand].len;
		st->act[0].ind = 0;

		st->type[0] = PUP_ACT_STRING;

		st->len = 1;
		return;
	}
	rand -= pup_strings_len;

	if (rand < pup_repeats_len) {
		st->act[0].str = pup_repeats[rand].str;
		st->act[0].len = pup_repeats[rand].len;
		st->act[0].ind = 0;

		st->act[1].ch = pup_repeats[rand].ch;
		st->act[1].left = pup_rand(pup_repeats[rand].min,
				pup_repeats[rand].max);

		st->type[0] = PUP_ACT_STRING;
		st->type[1] = PUP_ACT_REPEAT;

		st->len = 2;
		return;
	}
	rand -= pup_repeats_len;

	if (rand < pup_gens_len) {
		st->act[0].ngram.gen = pup_gens[rand].gen;
		st->act[0].ngram.left = pup_rand(pup_gens[rand].min,
				pup_gens[rand].max);
		st->act[0].ngram.start = false;

		st->type[0] = PUP_ACT_NGRAM;

		st->len = 1;
		return;
	}
	rand -= pup_gens_len;
}

ssize_t
pup_write(pup_state_t *st, char *buf, size_t len)
{
	const size_t save = len;
	while (len > 0) {
		if (st->ind == st->len) {
			CC(buf, ' ');
			++buf;
			--len;
			pup_reset(st);
		}

		while (st->ind < st->len && len > 0) {
			pup_act_t *act = &st->act[st->ind];
			switch (st->type[st->ind]) {
			default: break;
			case PUP_ACT_STRING: {
				const size_t amt = MIN((size_t)act->len -
						act->ind, len);
				CS(buf, act->str + act->ind, amt);
				act->ind += amt;
				buf += amt;
				len -= amt;

				if (act->ind == act->len)
					++st->ind;
				break;
			}
			case PUP_ACT_REPEAT: {
				const size_t amt = MIN((size_t)act->left, len);
				for (size_t i = 0; i < amt; ++i, ++buf)
					CC(buf, act->ch);
				act->left -= amt;
				len -= amt;

				if (act->left == 0)
					++st->ind;
				break;
			}
			case PUP_ACT_NGRAM: {
				const size_t amt = MIN((size_t)act->ngram.left,
						len);
				if (pup_ngram(&act->ngram, buf, amt) == -EFAULT)
					return -EFAULT;
				buf += amt;
				len -= amt;

				if (act->ngram.left == 0)
					++st->ind;
				break;
			}
			}
		}
	}
	return save;
}