You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

386 lines
8.2 KiB

/*
* $Id: text.c,v 1.6 2006/07/31 17:23:09 jms Exp $
*
* Revision History
* ===================
* $Log: text.c,v $
* Revision 1.6 2006/07/31 17:23:09 jms
* fix to parallelism problem
*
* Revision 1.5 2006/05/18 23:50:00 jms
* commit text generation change with larger buffer
*
* Revision 1.4 2006/05/16 16:26:51 jms
* remove calls to FAKE_V_STR
*
* Revision 1.3 2006/05/16 15:55:58 jms
* first cut to Meikel
*
* Revision 1.2 2005/01/03 20:08:59 jms
* change line terminations
*
* Revision 1.1.1.1 2004/11/24 23:31:47 jms
* re-establish external server
*
* Revision 1.1.1.1 2003/08/07 17:58:34 jms
* recreation after CVS crash
*
* Revision 1.2 2003/08/07 17:58:34 jms
* Convery RNG to 64bit space as preparation for new large scale RNG
*
* Revision 1.1.1.1 2003/04/03 18:54:21 jms
* initial checkin
*
*
*/
/*
* text.c --- pseaudo text generator for use in DBGEN 2.0
*
* Defined Routines:
* dbg_text() -- select and translate a sentance form
*/
#ifdef TEXT_TEST
#define DECLARER
#endif /* TEST */
#include "config.h"
#include <stdlib.h>
#if (defined(_POSIX_)||!defined(WIN32)) /* Change for Windows NT */
#include <unistd.h>
#include <sys/wait.h>
#endif /* WIN32 */
#include <stdio.h> /* */
#include <limits.h>
#include <math.h>
#include <ctype.h>
#include <signal.h>
#include <string.h>
#include <errno.h>
#ifdef HP
#include <strings.h>
#endif
#if (defined(WIN32)&&!defined(_POSIX_))
#include <process.h>
#pragma warning(disable:4201)
#pragma warning(disable:4214)
#pragma warning(disable:4514)
#define WIN32_LEAN_AND_MEAN
#define NOATOM
#define NOGDICAPMASKS
#define NOMETAFILE
#define NOMINMAX
#define NOMSG
#define NOOPENFILE
#define NORASTEROPS
#define NOSCROLL
#define NOSOUND
#define NOSYSMETRICS
#define NOTEXTMETRIC
#define NOWH
#define NOCOMM
#define NOKANJI
#define NOMCX
#include <windows.h>
#pragma warning(default:4201)
#pragma warning(default:4214)
#endif
#define TEXT_POOL_SIZE (300 * 1024 * 1024) /* 300MiB */
#include "dss.h"
#include "dsstypes.h"
/*
* txt_vp() --
* generate a verb phrase by
* 1) selecting a verb phrase form
* 2) parsing it to select parts of speech
* 3) selecting appropriate words
* 4) adding punctuation as required
*
* Returns: length of generated phrase
* Called By: txt_sentence()
* Calls: pick_str()
*/
static int
txt_vp(char *dest, int sd)
{
char syntax[MAX_GRAMMAR_LEN + 1],
*cptr,
*parse_target;
distribution *src;
int i,
res = 0;
pick_str(&vp, sd, &syntax[0]);
parse_target = syntax;
while ((cptr = strtok(parse_target, " ")) != NULL)
{
src = NULL;
switch(*cptr)
{
case 'D':
src = &adverbs;
break;
case 'V':
src = &verbs;
break;
case 'X':
src = &auxillaries;
break;
} /* end of POS switch statement */
i = pick_str(src, sd, dest);
i = (int)strlen(DIST_MEMBER(src, i));
dest += i;
res += i;
if (*(++cptr)) /* miscelaneous fillagree, like punctuation */
{
dest += 1;
res += 1;
*dest = *cptr;
}
*dest = ' ';
dest++;
res++;
parse_target = NULL;
} /* end of while loop */
return(res);
}
/*
* txt_np() --
* generate a noun phrase by
* 1) selecting a noun phrase form
* 2) parsing it to select parts of speech
* 3) selecting appropriate words
* 4) adding punctuation as required
*
* Returns: length of generated phrase
* Called By: txt_sentence()
* Calls: pick_str(),
*/
static int
txt_np(char *dest, int sd)
{
char syntax[MAX_GRAMMAR_LEN + 1],
*cptr,
*parse_target;
distribution *src;
int i,
res = 0;
pick_str(&np, sd, &syntax[0]);
parse_target = syntax;
while ((cptr = strtok(parse_target, " ")) != NULL)
{
src = NULL;
switch(*cptr)
{
case 'A':
src = &articles;
break;
case 'J':
src = &adjectives;
break;
case 'D':
src = &adverbs;
break;
case 'N':
src = &nouns;
break;
} /* end of POS switch statement */
i = pick_str(src, sd, dest);
i = (int)strlen(DIST_MEMBER(src, i));
dest += i;
res += i;
if (*(++cptr)) /* miscelaneous fillagree, like punctuation */
{
*dest = *cptr;
dest += 1;
res += 1;
}
*dest = ' ';
dest++;
res++;
parse_target = NULL;
} /* end of while loop */
return(res);
}
/*
* txt_sentence() --
* generate a sentence by
* 1) selecting a sentence form
* 2) parsing it to select parts of speech or phrase types
* 3) selecting appropriate words
* 4) adding punctuation as required
*
* Returns: length of generated sentence
* Called By: dbg_text()
* Calls: pick_str(), txt_np(), txt_vp()
*/
static int
txt_sentence(char *dest, int sd)
{
char syntax[MAX_GRAMMAR_LEN + 1],
*cptr;
int i,
res = 0,
len = 0;
pick_str(&grammar, sd, syntax);
cptr = syntax;
next_token: /* I hate goto's, but can't seem to have parent and child use strtok() */
while (*cptr && *cptr == ' ')
cptr++;
if (*cptr == '\0')
goto done;
switch(*cptr)
{
case 'V':
len = txt_vp(dest, sd);
break;
case 'N':
len = txt_np(dest, sd);
break;
case 'P':
i = pick_str(&prepositions, sd, dest);
len = (int)strlen(DIST_MEMBER(&prepositions, i));
strcpy((dest + len), " the ");
len += 5;
len += txt_np(dest + len, sd);
break;
case 'T':
i = pick_str(&terminators, sd, --dest); /*terminators should abut previous word */
len = (int)strlen(DIST_MEMBER(&terminators, i));
break;
} /* end of POS switch statement */
dest += len;
res += len;
cptr++;
if (*cptr && *cptr != ' ') /* miscelaneous fillagree, like punctuation */
{
dest += 1;
res += 1;
*dest = *cptr;
}
goto next_token;
done:
*dest = '\0';
return(--res);
}
/*
* dbg_text() --
* produce ELIZA-like text of random, bounded length, truncating the last
* generated sentence as required
*/
void
dbg_text(char *tgt, int min, int max, int sd)
{
DSS_HUGE hgLength = 0,
hgOffset,
wordlen = 0,
s_len,
needed;
char sentence[MAX_SENT_LEN + 1],
*cp;
static char szTextPool[TEXT_POOL_SIZE + 1];
static int bInit = 0;
int nLifeNoise = 0;
if (!bInit)
{
cp = &szTextPool[0];
if (verbose > 0)
fprintf(stderr, "\nPreloading text ... ");
while (wordlen < TEXT_POOL_SIZE)
{
if ((verbose > 0) && (wordlen > nLifeNoise))
{
nLifeNoise += 200000;
fprintf(stderr, "%3.0f%%\b\b\b\b", (100.0 * wordlen)/TEXT_POOL_SIZE);
}
s_len = txt_sentence(sentence, 5);
if ( s_len < 0)
INTERNAL_ERROR("Bad sentence formation");
needed = TEXT_POOL_SIZE - wordlen;
if (needed >= (s_len + 1)) /* need the entire sentence */
{
strcpy(cp, sentence);
cp += s_len;
wordlen += s_len + 1;
*(cp++) = ' ';
}
else /* chop the new sentence off to match the length target */
{
sentence[needed] = '\0';
strcpy(cp, sentence);
wordlen += needed;
cp += needed;
}
}
*cp = '\0';
bInit = 1;
if (verbose > 0)
fprintf(stderr, "\n");
}
RANDOM(hgOffset, 0, TEXT_POOL_SIZE - max, sd);
RANDOM(hgLength, min, max, sd);
strncpy(&tgt[0], &szTextPool[hgOffset], (int)hgLength);
tgt[hgLength] = '\0';
return;
}
#ifdef TEXT_TEST
tdef tdefs[1] = { NULL };
distribution nouns,
verbs,
adjectives,
adverbs,
auxillaries,
terminators,
articles,
prepositions,
grammar,
np,
vp;
main()
{
char prattle[401];
verbose = 1;
read_dist (env_config (DIST_TAG, DIST_DFLT), "nouns", &nouns);
read_dist (env_config (DIST_TAG, DIST_DFLT), "verbs", &verbs);
read_dist (env_config (DIST_TAG, DIST_DFLT), "adjectives", &adjectives);
read_dist (env_config (DIST_TAG, DIST_DFLT), "adverbs", &adverbs);
read_dist (env_config (DIST_TAG, DIST_DFLT), "auxillaries", &auxillaries);
read_dist (env_config (DIST_TAG, DIST_DFLT), "terminators", &terminators);
read_dist (env_config (DIST_TAG, DIST_DFLT), "articles", &articles);
read_dist (env_config (DIST_TAG, DIST_DFLT), "prepositions", &prepositions);
read_dist (env_config (DIST_TAG, DIST_DFLT), "grammar", &grammar);
read_dist (env_config (DIST_TAG, DIST_DFLT), "np", &np);
read_dist (env_config (DIST_TAG, DIST_DFLT), "vp", &vp);
while (1)
{
dbg_text(&prattle[0], 300, 400, 0);
printf("<%s>\n", prattle);
}
return(0);
}
#endif /* TEST */