/*
 * shaney.cc
 * Major rewrite 3. 
 * 
 * SHANEY - Statistics-driven text generation tool. 
 * 
 * Copyright (c) 2003 by Wolfgang Wieser (wwieser@gmx.de) 
 * 
 * This file may be distributed and/or modified under the terms of the 
 * GNU General Public License version 2 as published by the Free Software 
 * Foundation. 
 * 
 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
 * WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 * 
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <fcntl.h>
#include <ctype.h>
#include <signal.h>
#include <assert.h>


// Name of the program as of arg[0]. 
char *prg_name=NULL;


//------------------------------------------------------------------------------
// Standard (de)allocation with check. 
static void *Malloc(size_t size)
{
	if(!size) return(NULL);
	void *ptr=malloc(size);
	if(!ptr)
	{  fprintf(stderr,"%s: Allocation failure.\n",prg_name);  abort();  }
	return(ptr);
}

static inline void *Free(void *ptr)
{
	if(ptr) free(ptr);
	return(NULL);
}

static void *Realloc(void *ptr,size_t size)
{
	if(!ptr)  return(Malloc(size));
	if(!size)  return(Free(ptr));
	void *nptr=realloc(ptr,size);
	if(!nptr)
	{  fprintf(stderr,"%s: Allocation failure.\n",prg_name);  abort();  }
	return(nptr);
}

//------------------------------------------------------------------------------

volatile int stop_signal=0;

void sig_handler(int)
{
	++stop_signal;
}

//------------------------------------------------------------------------------
struct FollowWord;
struct HWord;

// Attached to HWord at hook pointer. 
struct FollowList
{
	// List of following words: 
	FollowWord *nextword;  // array[dimnext]
	int nnext;
	int dimnext;
	// Sum of all counters in the following words: 
	int sumcnt;
	
	// Tell the list that passed word follows this word. 
	// Returns list where to go on with next following word 
	// in this path. 
	inline FollowList *AddWord(HWord *word);
	// Randomly choose next FollowWord and return the index. 
	int ChooseWord(bool *had_choice);
	
	void *operator new(size_t size)  {  return(Malloc(size));  }
	void operator delete(void *ptr)  {  Free(ptr);  }
	FollowList()
		{  nextword=NULL; nnext=0; dimnext=0; sumcnt=0;  }
	~FollowList();
};


struct FollowWord
{
	HWord *word;   // word which follows
	int cnt;       // how often the word follows
	// List of following words: 
	FollowList follow;
	
	void *operator new[](size_t size)  {  return(Malloc(size));  }
	void operator delete[](void *ptr)  {  Free(ptr);  }
	// To call constructor manually: 
	void *operator new(size_t,void *ptr)  {  return(ptr);  }
	FollowWord() : follow()
		{  word=NULL; cnt=0;  }
	~FollowWord()
		{  word=NULL;  }
};

struct HWord
{
	// About this word. 
	HWord *next;       // next entry in hash chain
	char *word;        // the actual word, '\0'-terminated
	int len;           // length of the word
	int occurances;    // number of occurances in the text
	// Additionally: words to come next. 
	FollowList follow;
	
	HWord(const char *word_to_copy);
	~HWord();
	
	void *operator new(size_t size)
		{  return(Malloc(size));  }
	void operator delete(void *ptr)
		{  Free(ptr);  }
};


FollowList *FollowList::AddWord(HWord *word)
{
	for(int i=0; i<nnext; i++)
	{
		if(nextword[i].word!=word)  continue;
		// Okay, we already know that word following. 
		++nextword[i].cnt;
		++sumcnt;
		return(&nextword[i].follow);
	}
	// Do not yet know that the passed word follows this one. 
	if(nnext>=dimnext)
	{
		// Must re-allocate. 
		int oldn=dimnext;
		if(!dimnext) dimnext=2;
		else if(dimnext<32) dimnext*=2;
		else dimnext+=32;
		nextword=(FollowWord*)Realloc(nextword,sizeof(FollowWord)*dimnext);
		// Manually call constructor for new elements: 
		for(; oldn<dimnext; oldn++)
		{  new(&nextword[oldn]) FollowWord();  }
	}
	// Add entry: 
	nextword[nnext].word=word;
	nextword[nnext].cnt=1;
	++sumcnt;
	return(&nextword[nnext++].follow);
}


int FollowList::ChooseWord(bool *had_choice)
{
	assert(nnext);
	if(nnext==1)
	{  *had_choice=0;  return(0);  }
	*had_choice=1;
	int idx=rand()%sumcnt;
	for(int ni=0; ni<nnext; ni++)
	{
		idx-=nextword[ni].cnt;
		if(idx<0)  return(ni);
	}
	assert(0);
	return(-1);
}


FollowList::~FollowList()
{
	// Manually call destrzctor: 
	for(int i=0; i<dimnext; i++)
	{  nextword[i].FollowWord::~FollowWord();  }
	Free(nextword);
}


HWord::HWord(const char *wtc) : 
	follow()
{
	len=strlen(wtc);
	word=(char*)Malloc(len+1);
	strcpy(word,wtc);
	next=NULL;
	occurances=0;  // CORRECT. 
}

HWord::~HWord()
{
	word=(char*)Free(word);
	len=-1;
	next=NULL;
}


class WordHash
{
	private:
		// Size of the main hash; use a prime value. 
		// Some samples: 101, 307, 503, 1009, 5003, 10007, 20011, 
		//               30011, 40009, 65521
		int hash_size;
		HWord **hash;   // array of hash_size-many pointers
		
		// Complete number of entries in the hash: 
		size_t nentries;
		// Sum of all occurance numbers:
		size_t sum_occurances;
		
		// Comupute has value: 
		int _dohash(const char *x) const;
		// Find in given hash chain: 
		HWord *_dofind(HWord *head,const char *word) const;
	public:
		WordHash(int hash_size);
		~WordHash();
		
		// Query figures...
		size_t NEntries() const
			{  return(nentries);  }
		size_t SumOccurances() const
			{  return(sum_occurances);  }
		
		// Remove all entries: 
		void Clear();
		
		// Find word in hash: 
		inline HWord *Find(const char *word) const
			{  return(_dofind(hash[_dohash(word)],word));  }
		
		// Add input to the hash: 
		HWord *AddWord(const char *word);
};


int WordHash::_dohash(const char *x) const
{
	// Need '\0'-terminated string. 
	// Hash function borrowed from Qt. 
	unsigned int hv=0,g;
	for(const char *c=x; *c; c++)
	{
		hv=(hv<<4) + (int)(*((unsigned char*)c));
		g=hv&0xf0000000U;
		if(g)  hv^=g>>24;
		hv&=~g;
	}
	hv=hv%((unsigned int)hash_size);
//fprintf(stderr,"<%u>",hv);
	return((int)hv);
}


HWord *WordHash::_dofind(HWord *head,const char *word) const
{
	int wlen=strlen(word);
	for(HWord *w=head; w; w=w->next)
	{
		if(w->len!=wlen)  continue;
		if(strcmp(w->word,word))  continue;
		return(w);
	}
	return(NULL);
}


HWord *WordHash::AddWord(const char *word)
{
	HWord **head=&hash[_dohash(word)];
	HWord *w=_dofind(*head,word);
	if(!w)
	{
		// Allocate new entry. 
		w=new HWord(word);
		w->next=*head;
		*head=w;
		++nentries;
	}
	++w->occurances;
	++sum_occurances;
	return(w);
}


void WordHash::Clear()
{
	for(int i=0; i<hash_size; i++)
	{
		for(;;)
		{
			HWord *w=hash[i];
			if(!w) break;
			hash[i]=hash[i]->next;
			sum_occurances-=w->occurances;
			delete w;
			--nentries;
		}
	}
	assert(nentries==0);
	assert(sum_occurances==0);
}


WordHash::WordHash(int _size)
{
	hash_size=_size<=0 ? 10007 : _size;
	hash=(HWord**)Malloc(hash_size*sizeof(HWord*));
	for(int i=0; i<hash_size; i++)
	{  hash[i]=NULL;  }
	nentries=0;
	sum_occurances=0;
}

WordHash::~WordHash()
{
	Clear();
	hash=(HWord**)Free(hash);
}


//------------------------------------------------------------------------------

// Describes the whole text as a sequence of words. 
class HWordText
{
	private:
		// Note: A NULL word means "end". 
		// After ReadingDone() was called, the last word is 
		// ALWAYS NULL. 
		size_t nwords;  // including "NULL words"
		size_t dimwords;
		HWord **text;
		
		// Index of last NULL word: 
		size_t last_null_idx;
		// Length of shortest text chunk without any NULL 
		// inside it. 
		size_t min_chunk_len;
	public:
		HWordText();
		~HWordText();
		
		// Get number of words (and NULL words): 
		size_t NWords() const
			{  return(nwords);  }
		// Get word with passed index. 
		HWord *operator[](size_t i) const
			{  return(i>=nwords ? NULL : text[i]);  }
		
		// Get shortest chunk...
		size_t MinChunkLen() const
			{  return(min_chunk_len);  }
		
		// Add a word. Use word=NULL to add a separating END. 
		void AddWord(HWord *word);
		
		// Tell the class that reading is now done and 
		// no more words are to come. 
		void ReadingDone();
		
		// Dump the text to the passed FILE: 
		void DumpText(FILE *out);
};


void HWordText::AddWord(HWord *word)
{
	if(nwords>=dimwords)
	{
		// Need to allocate more word pointers. 
		if(!dimwords)  dimwords=1024;
		else if(dimwords<32768)  dimwords*=2;
		else dimwords+=32768;
		text=(HWord**)Realloc(text,sizeof(HWord*)*dimwords);
	}
	assert(nwords<dimwords);
	if(!word)
	{
		if(min_chunk_len==0xffffffff)
		{
			// First NULL occurance. 
			min_chunk_len=nwords;
		}
		else
		{
			ssize_t not_null_words=nwords-last_null_idx-1;
			if(min_chunk_len>not_null_words)
			{  min_chunk_len=not_null_words;  }
		}
		last_null_idx=nwords;
	}
	text[nwords++]=word;
	//fprintf(stderr,"ADD<%s>\n",word ? word->word : NULL);
}


void HWordText::ReadingDone()
{
	// Add a NULL word if necessary: 
	if(!nwords || text[nwords-1])  AddWord(NULL);
	// Make the pointer array smaller to fit size: 
	if(dimwords!=nwords)
	{
		dimwords=nwords;
		text=(HWord**)Realloc(text,sizeof(HWord*)*dimwords);
	}
}


void HWordText::DumpText(FILE *out)
{
	for(size_t i=0; i<nwords; i++)
	{
		if(text[i])
		{  fprintf(out,"%s ",text[i]->word);  }
		else
		{  fprintf(out,"<END>\n\n");  }
	}
}


HWordText::HWordText()
{
	text=NULL;
	nwords=0;
	dimwords=0;
	last_null_idx=0;
	min_chunk_len=0xffffffff;
}

HWordText::~HWordText()
{
	text=(HWord**)Free(text);
	nwords=0;
	dimwords=0;
}

//------------------------------------------------------------------------------

size_t tot_bytes_read=0;

class Shaney
{
	private:
		// Hash of all words: 
		WordHash hash;
		// The complete text: 
		HWordText text;
		
		// Number of already written words: 
		size_t nwords_written;
		// Current position in line: 
		int lpos;
		
		int _WriteWord(HWord *word,bool had_choice);
	public:
		Shaney(int hash_size);
		~Shaney();
		
		// User-tunable parameters: 
		int order;   // Must be >0. 
		// Word limit; 0 for unlimited. 
		size_t word_limit;
		// Number of columns on terminal; -1 -> assume \infty
		int columns;
		// Restart text production after END?
		int do_restart : 1;
		// Insert "> " into the text where there was a choice for 
		// the next word. 
		int mark_choice : 1;
		
		// Feed Shaney with the passed word. 
		// Use NULL for "end" / separation,see HWordText. 
		void Feed(const char *word);
		
		// Call this when reading is done: 
		void FeedDone();
		
		void DumpText(FILE *out)
			{  text.DumpText(out);  }
		
		// Must be called after feeding and before text prduction: 
		void ComputeStatistics();
		
		// Do it hehe...
		void ProduceText();
};


void Shaney::Feed(const char *word)
{
	if(word)
	{
		// Add to the hash: 
		HWord *hw=hash.AddWord(word);
		
		// Add to the text flow: 
		text.AddWord(hw);
		
		//fprintf(stderr,"Add >%s<\n",hw->word);
	}
	else
	{  text.AddWord(NULL);  }
}


void Shaney::FeedDone()
{
	text.ReadingDone();
	
	fprintf(stderr,"Input: %u words (%u different) in %u kb\n",
		hash.SumOccurances(),hash.NEntries(),(tot_bytes_read+512)/1024);
}


void Shaney::ComputeStatistics()
{
	fprintf(stderr,"Creating word path/context tree for order %d.",order);
	
	int hist_filled=0;
	HWord *history[order+1];
	// history[0]: current word; [1] previous, ...
	for(size_t widx=0; widx<text.NWords(); widx++)
	{
		if(hist_filled>0 && !history[0])
		{  hist_filled=0;  }
		
		HWord *word=text[widx];
		if(!word)
		{
			// End word. 
			if(hist_filled<order)
			{
				// Not enough words in this block for only one 
				// single follow path. 
				hist_filled=0;
				continue;
			}
			if(hist_filled>order)
			{
				// Shift history: 
				for(int i=order; i>0; i--)
				{  history[i]=history[i-1];  }
			}
			else ++hist_filled;
			history[0]=word;
		}
		else
		{
			if(hist_filled<=order)
			{
				history[order-hist_filled]=word;
				++hist_filled;
			}
			else
			{
				// Shift history: 
				for(int i=order; i>0; i--)
				{  history[i]=history[i-1];  }
				history[0]=word;
			}
		}
		
		if(hist_filled<=order) continue;
		// hist_filled>order and no NULL entry other than history[0]. 
		
		// The current path is: history[order]...history[0]. 
		FollowList *flist=&history[order]->follow;
		for(int wi=order-1; wi>=0; wi--)
		{  flist=flist->AddWord(history[wi]);  }
		
		if(!(widx%1000))
		{  fprintf(stderr,".");  }
	}
	fprintf(stderr,"OK\n");
}


int Shaney::_WriteWord(HWord *word,bool had_choice)
{
	int len=strlen(word->word);
	if(had_choice && mark_choice)  len+=2;  // "> "
	char *begstr;
	if(lpos+len+(lpos ? 1 : 0)>columns)
	{
		begstr="\n";
		lpos=len;
	}
	else if(lpos)
	{
		begstr=" ";
		lpos+=len+1;
	}
	else
	{  begstr="";  lpos+=len;  }
	printf("%s%s%s",begstr,
		(had_choice && mark_choice) ? "> " : "",
		word->word);
	++nwords_written;
	if(word_limit && nwords_written>=word_limit)
	{  /*printf("<LIMIT>");  fflush(stdout);*/  return(1);  }
	if(stop_signal)
	{  /*printf("<SIGNAL>");  fflush(stdout);*/  return(1);  }
	return(0);
}


void Shaney::ProduceText()
{
	if(size_t(order)>text.MinChunkLen())
	{
		fprintf(stderr,"Cannot produce text. Min chunk len %u < order %d\n",
			text.MinChunkLen(),order);
		return;
	}
	
	HWord *history[order+1];
	nwords_written=0;
	for(;;)
	{
		restart:;
		// Randomly choose first word, but not NULL. 
		while(!(history[0]=text[rand()%text.NWords()]));
		if(_WriteWord(history[0],0))  goto breakall;
		// Choose any following word: 
		FollowList *flist=&history[0]->follow;
		for(int wi=1; wi<=order; wi++)
		{
			//assert(flist->sumcnt);
			if(!flist->sumcnt)
			{
				// We chose a start word which has only a path 
				// to "end". And this path is too short. 
				// Try to restart. 
				goto restart;
			}
			bool had_choice;
			int ni=flist->ChooseWord(&had_choice);
			history[wi]=flist->nextword[ni].word;
			if(!history[wi])
			{
				/*printf("<END>");  fflush(stdout);*/
				if(do_restart)
				{  printf("\n");  lpos=0;  goto restart;  }
				goto breakall;
			}
			if(_WriteWord(history[wi],had_choice))  goto breakall;
			flist=&flist->nextword[ni].follow;
		}
		
		// Okay, full history. 
		for(;;)
		{
			// Rotate by 1: 
			for(int i=0; i<order; i++)
			{  history[i]=history[i+1];  }
			// Now, follow the path history[0]..history[order-1]
			// and choose randomly at end. 
			FollowList *flist=&history[0]->follow;
			for(int wi=1; wi<order; wi++)
			{
				// Find history[wi] in flist. 
				for(int i=0; i<flist->nnext; i++)
				{
					if(flist->nextword[i].word!=history[wi]) continue;
					flist=&flist->nextword[i].follow;
					goto found;
				}
				assert(0);  // internal error...
				found:;
			}
			bool had_choice;
			int ni=flist->ChooseWord(&had_choice);
			history[order]=flist->nextword[ni].word;
			if(!history[order])
			{
				/*printf("<END>");  fflush(stdout);*/
				if(do_restart)
				{  printf("\n");  lpos=0;  goto restart;  }
				goto breakall;
			}
			if(_WriteWord(history[order],had_choice))  goto breakall;
		}
	}
	breakall:;
	
	printf("\n");  fflush(stdout);
	fprintf(stderr,"%s: Done; Written %u words; stopped by %s.\n",
		prg_name,nwords_written,
		(word_limit && nwords_written>=word_limit) ? "word limit" : 
			stop_signal ? "signal" : "EOF");
}


Shaney::Shaney(int _hash_size) : 
	hash(_hash_size),
	text()
{
	nwords_written=0;
	order=1;
	word_limit=0;
	do_restart=0;
	columns=-1;
	lpos=0;
}

Shaney::~Shaney()
{
}

//------------------------------------------------------------------------------

// Read a file; returns error count. 
static int ReadFile(Shaney *sh,const char *file)
{
	int fd=file ? open(file,O_RDONLY) : 0;
	if(fd<0)
	{
		fprintf(stderr,"%s: failed to open \"%s\": %s\n",
			prg_name,file,strerror(errno));
		return(1);
	}
	
	// Length of read buffer. No word may be longer than 
	// this length. If you have trouble with that, increase 
	// it or implement auto-grow. 
	const size_t buflen=4096;
	char buf[buflen];
	size_t bufuse=0;
	
	int nerr=0;
	int caught_eof=0;
	while(!caught_eof)
	{
		// Read as much as we can: 
		while(!caught_eof)
		{
			size_t want=buflen-bufuse;
			if(!want) break;
			ssize_t rd;
			do
			{  rd=read(fd,buf+bufuse,want);  }
			while(rd<0 && errno==EINTR);
			if(rd<0)
			{
				fprintf(stderr,"%s: while reading \"%s\": %s\n",
					prg_name,file,strerror(errno));
				++nerr;
				goto breakout;
			}
			if(!rd)
			{  caught_eof=1;  break;  }
			bufuse+=rd;
			tot_bytes_read+=rd;
		}
		
		// Replace all whitespace by normal space. 
		for(char *c=buf,*cend=buf+bufuse; c<cend; c++)
		{  if(isspace(*c) || !*c || *c=='\b' || *c==127)  *c=' ';  }
		
		// Parse as many words as we can: 
		// First, do the input conversion like eliminating punctation 
		// chars, etc. 
		// This is done by replacing all those by space ' '. 
		#warning "Not yet implemented."
		
		// Then, parse the words: 
		char *c=buf,*cend=buf+bufuse;
		// First, skip all spaces until we find the first word: 
		while(c<cend && *c==' ') ++c;
		if(c>=cend)
		{  bufuse=0;  continue;  }
		// Actually parse the words: 
		for(;;)
		{
			char *word=c;
			// Find end of word: 
			while(c<cend && *c!=' ') ++c;
			if(c>=cend)  // Reached end of buffer.
			{
				if(c==buf)
				{
					fprintf(stderr,"%s: oops, word too long (>%u chars)\n",
						prg_name,buflen);
					++nerr;
					goto breakout;
				}
				bufuse=c-word;
				memmove(buf,word,bufuse);
				break;
			}
			// '\0'-terminate the word.  
			*c='\0';
			
			// Feed shaney: 
			sh->Feed(word);
			
			// Skip all whitespace: 
			++c;  // Skip the '\0'. 
			while(c<cend && *c==' ')  ++c;
			if(c>=cend)
			{  bufuse=0;  break;  }
		}
	}
	breakout:;
	
	close(fd);
	return(nerr);
}


static void _PrintHelp()
{
	fprintf(stderr,"USAGE: %s [options] [files] ...\n"
		"options:\n"
		"   -h --help     print this\n"
		"      --version  print version information\n"
		"   -T            text dump after reading\n"
		"   -s  (no sep)  do not separate files using EOF words, see below\n"
		"   -r  (restart) restart text production after reaching EOF word\n"
		"   -m  (mark)    write \"> \" mark where there was a choice on how\n"
		"                 to continue with the text\n"
		"   -o=NUM  (order)  set order to NUM, see below\n"
		"   -S=NUM  (seed)   set random seed to NUM\n"
		"   -l=NUM  (limit)  do not write more than NUM words (0 -> unlimited)\n"
		"   -c=NUM  (column) format for NUM columns on terminal\n"
		"files...  files to be read in; use \"-\" for stdin\n"
		"\n"
		"Theory of operation: This program generates text based upon statistics\n"
		"derived from input files. The input files are read in and split into\n"
		"words by cutting the text when whitespace is encountered (hence, \"he\"\n"
		"and \"he.\" are different words). Then, a table is computed which\n"
		"describes which word comes how often after a context of N words. This\n"
		"parameter N is called the \"order\" and can be tuned using -o=N.\n"
		"When reading file end, an EOF word is stored which means that the\n"
		"generated text ends as soon as an EOF word is found. If you use option\n"
		"-s, the EOF word is only inserted at the end of the last file.\n"
		"\n"
		"SHANEY - (c) 2003 by Wolfgang Wieser\n"
		"This program may be copied under the terms of the GNU GPL version 2.\n"
		,prg_name);
	exit(0);
}

static void _PrintVersion()
{
	fprintf(stderr,"%s version 3.0\n",prg_name);
	exit(0);
}


int main(int argc,char **arg)
{
	prg_name=strrchr(arg[0],'/');
	if(*prg_name) ++prg_name;
	else prg_name=arg[0];
	
	int errors=0;
	int hash_size=-1;  // use default
	int dump_text=0;
	int seed=0;
	int nosep=0;
	
	Shaney shaney(hash_size);
	
	for(int i=1; i<argc; i++)
	{
		if(*arg[i]!='-') continue;
		if(arg[i][1]=='\0') continue;
		// Parse args: 
		if(!strcmp(arg[i],"--help"))
		{  _PrintHelp();  }
		else if(!strcmp(arg[i],"--version"))
		{  _PrintVersion();  }
		else if(!strncmp(arg[i],"-o=",3))
		{  shaney.order=atoi(arg[i]+3);  }
		else if(!strncmp(arg[i],"-S=",3))
		{  seed=atoi(arg[i]+3);  }
		else if(!strncmp(arg[i],"-c=",3))
		{  shaney.columns=atoi(arg[i]+3);  }
		else if(!strncmp(arg[i],"-l=",3))
		{  shaney.word_limit=strtoul(arg[i]+3,NULL,10);  }
		else for(char *c=arg[i]+1; *c; c++) switch(*c)
		{
			case 'T':  dump_text=1;  break;
			case 'h':  _PrintHelp();  break;
			case 's':  nosep=1;  break;
			case 'r':  shaney.do_restart=1;  break;
			case 'm':  shaney.mark_choice=1;  break;
			default:
				fprintf(stderr,"%s: Illegal option '%c' in arg \"%s\".\n",
					prg_name,*c,arg[i]);
				++errors;
		}
	}
	
	if(shaney.order<=0)
	{  fprintf(stderr,"Cannot use order<=0 (%d)\n",shaney.order);  ++errors;  }
	
	if(argc==1)
	{
		fprintf(stderr,"%s: No input files. Try %s --help.\n",
			prg_name,prg_name);
		return(1);
	}
	
	if(errors) return(1);
	
	// Read in files: 
	for(int i=1; i<argc; i++)
	{
		if(*arg[i]!='-')
		{  errors+=ReadFile(&shaney,arg[i]);  }
		else if(arg[i][1]=='\0')
		{  errors+=ReadFile(&shaney,NULL);  }  // <-- read stdin
		else continue;
		if(!nosep)
		{
			// Normally, we separate the files, because it 
			// makes no sense to assume that the last word in 
			// the first text is the predecessor of the first 
			// word in the next text. 
			shaney.Feed(NULL);
		}
	}
	
	if(errors) return(1);
	
	shaney.FeedDone();
	
	if(dump_text)
	{  shaney.DumpText(stdout);  }
	
	// Okay, now we have all the information we need. 
	// The complete text flow is stored in text. 
	// For minimum memory footprint, all the words were hashed in hash. 
	
	// Create the follow tables/trees for the specified order. 
	shaney.ComputeStatistics();
	
	srand(seed);
	
	struct sigaction sact;
	memset(&sact,0,sizeof(sact));
	sigemptyset(&sact.sa_mask);
	sact.sa_handler=&sig_handler;
	sact.sa_flags=0;
	sigaction(SIGINT,&sact,NULL);
	
	shaney.ProduceText();
	
	return(0);
}
