/* * $Id: text.c,v 1.6 2006/07/31 17:23:09 jms Exp $ * * Revision History * =================== * $Log: text.c,v $ * Revision 1.6 2006/07/31 17:23:09 jms * fix to parallelism problem * * Revision 1.5 2006/05/18 23:50:00 jms * commit text generation change with larger buffer * * Revision 1.4 2006/05/16 16:26:51 jms * remove calls to FAKE_V_STR * * Revision 1.3 2006/05/16 15:55:58 jms * first cut to Meikel * * Revision 1.2 2005/01/03 20:08:59 jms * change line terminations * * Revision 1.1.1.1 2004/11/24 23:31:47 jms * re-establish external server * * Revision 1.1.1.1 2003/08/07 17:58:34 jms * recreation after CVS crash * * Revision 1.2 2003/08/07 17:58:34 jms * Convery RNG to 64bit space as preparation for new large scale RNG * * Revision 1.1.1.1 2003/04/03 18:54:21 jms * initial checkin * * */ /* * text.c --- pseaudo text generator for use in DBGEN 2.0 * * Defined Routines: * dbg_text() -- select and translate a sentance form */ #ifdef TEXT_TEST #define DECLARER #endif /* TEST */ #include "config.h" #include #if (defined(_POSIX_)||!defined(WIN32)) /* Change for Windows NT */ #include #include #endif /* WIN32 */ #include /* */ #include #include #include #include #include #include #ifdef HP #include #endif #if (defined(WIN32)&&!defined(_POSIX_)) #include #pragma warning(disable:4201) #pragma warning(disable:4214) #pragma warning(disable:4514) #define WIN32_LEAN_AND_MEAN #define NOATOM #define NOGDICAPMASKS #define NOMETAFILE #define NOMINMAX #define NOMSG #define NOOPENFILE #define NORASTEROPS #define NOSCROLL #define NOSOUND #define NOSYSMETRICS #define NOTEXTMETRIC #define NOWH #define NOCOMM #define NOKANJI #define NOMCX #include #pragma warning(default:4201) #pragma warning(default:4214) #endif #define TEXT_POOL_SIZE (300 * 1024 * 1024) /* 300MiB */ #include "dss.h" #include "dsstypes.h" /* * txt_vp() -- * generate a verb phrase by * 1) selecting a verb phrase form * 2) parsing it to select parts of speech * 3) selecting appropriate words * 4) adding punctuation as required * * Returns: length of generated phrase * Called By: txt_sentence() * Calls: pick_str() */ static int txt_vp(char *dest, int sd) { char syntax[MAX_GRAMMAR_LEN + 1], *cptr, *parse_target; distribution *src; int i, res = 0; pick_str(&vp, sd, &syntax[0]); parse_target = syntax; while ((cptr = strtok(parse_target, " ")) != NULL) { src = NULL; switch(*cptr) { case 'D': src = &adverbs; break; case 'V': src = &verbs; break; case 'X': src = &auxillaries; break; } /* end of POS switch statement */ i = pick_str(src, sd, dest); i = (int)strlen(DIST_MEMBER(src, i)); dest += i; res += i; if (*(++cptr)) /* miscelaneous fillagree, like punctuation */ { dest += 1; res += 1; *dest = *cptr; } *dest = ' '; dest++; res++; parse_target = NULL; } /* end of while loop */ return(res); } /* * txt_np() -- * generate a noun phrase by * 1) selecting a noun phrase form * 2) parsing it to select parts of speech * 3) selecting appropriate words * 4) adding punctuation as required * * Returns: length of generated phrase * Called By: txt_sentence() * Calls: pick_str(), */ static int txt_np(char *dest, int sd) { char syntax[MAX_GRAMMAR_LEN + 1], *cptr, *parse_target; distribution *src; int i, res = 0; pick_str(&np, sd, &syntax[0]); parse_target = syntax; while ((cptr = strtok(parse_target, " ")) != NULL) { src = NULL; switch(*cptr) { case 'A': src = &articles; break; case 'J': src = &adjectives; break; case 'D': src = &adverbs; break; case 'N': src = &nouns; break; } /* end of POS switch statement */ i = pick_str(src, sd, dest); i = (int)strlen(DIST_MEMBER(src, i)); dest += i; res += i; if (*(++cptr)) /* miscelaneous fillagree, like punctuation */ { *dest = *cptr; dest += 1; res += 1; } *dest = ' '; dest++; res++; parse_target = NULL; } /* end of while loop */ return(res); } /* * txt_sentence() -- * generate a sentence by * 1) selecting a sentence form * 2) parsing it to select parts of speech or phrase types * 3) selecting appropriate words * 4) adding punctuation as required * * Returns: length of generated sentence * Called By: dbg_text() * Calls: pick_str(), txt_np(), txt_vp() */ static int txt_sentence(char *dest, int sd) { char syntax[MAX_GRAMMAR_LEN + 1], *cptr; int i, res = 0, len = 0; pick_str(&grammar, sd, syntax); cptr = syntax; next_token: /* I hate goto's, but can't seem to have parent and child use strtok() */ while (*cptr && *cptr == ' ') cptr++; if (*cptr == '\0') goto done; switch(*cptr) { case 'V': len = txt_vp(dest, sd); break; case 'N': len = txt_np(dest, sd); break; case 'P': i = pick_str(&prepositions, sd, dest); len = (int)strlen(DIST_MEMBER(&prepositions, i)); strcpy((dest + len), " the "); len += 5; len += txt_np(dest + len, sd); break; case 'T': i = pick_str(&terminators, sd, --dest); /*terminators should abut previous word */ len = (int)strlen(DIST_MEMBER(&terminators, i)); break; } /* end of POS switch statement */ dest += len; res += len; cptr++; if (*cptr && *cptr != ' ') /* miscelaneous fillagree, like punctuation */ { dest += 1; res += 1; *dest = *cptr; } goto next_token; done: *dest = '\0'; return(--res); } /* * dbg_text() -- * produce ELIZA-like text of random, bounded length, truncating the last * generated sentence as required */ void dbg_text(char *tgt, int min, int max, int sd) { DSS_HUGE hgLength = 0, hgOffset, wordlen = 0, s_len, needed; char sentence[MAX_SENT_LEN + 1], *cp; static char szTextPool[TEXT_POOL_SIZE + 1]; static int bInit = 0; int nLifeNoise = 0; if (!bInit) { cp = &szTextPool[0]; if (verbose > 0) fprintf(stderr, "\nPreloading text ... "); while (wordlen < TEXT_POOL_SIZE) { if ((verbose > 0) && (wordlen > nLifeNoise)) { nLifeNoise += 200000; fprintf(stderr, "%3.0f%%\b\b\b\b", (100.0 * wordlen)/TEXT_POOL_SIZE); } s_len = txt_sentence(sentence, 5); if ( s_len < 0) INTERNAL_ERROR("Bad sentence formation"); needed = TEXT_POOL_SIZE - wordlen; if (needed >= (s_len + 1)) /* need the entire sentence */ { strcpy(cp, sentence); cp += s_len; wordlen += s_len + 1; *(cp++) = ' '; } else /* chop the new sentence off to match the length target */ { sentence[needed] = '\0'; strcpy(cp, sentence); wordlen += needed; cp += needed; } } *cp = '\0'; bInit = 1; if (verbose > 0) fprintf(stderr, "\n"); } RANDOM(hgOffset, 0, TEXT_POOL_SIZE - max, sd); RANDOM(hgLength, min, max, sd); strncpy(&tgt[0], &szTextPool[hgOffset], (int)hgLength); tgt[hgLength] = '\0'; return; } #ifdef TEXT_TEST tdef tdefs[1] = { NULL }; distribution nouns, verbs, adjectives, adverbs, auxillaries, terminators, articles, prepositions, grammar, np, vp; main() { char prattle[401]; verbose = 1; read_dist (env_config (DIST_TAG, DIST_DFLT), "nouns", &nouns); read_dist (env_config (DIST_TAG, DIST_DFLT), "verbs", &verbs); read_dist (env_config (DIST_TAG, DIST_DFLT), "adjectives", &adjectives); read_dist (env_config (DIST_TAG, DIST_DFLT), "adverbs", &adverbs); read_dist (env_config (DIST_TAG, DIST_DFLT), "auxillaries", &auxillaries); read_dist (env_config (DIST_TAG, DIST_DFLT), "terminators", &terminators); read_dist (env_config (DIST_TAG, DIST_DFLT), "articles", &articles); read_dist (env_config (DIST_TAG, DIST_DFLT), "prepositions", &prepositions); read_dist (env_config (DIST_TAG, DIST_DFLT), "grammar", &grammar); read_dist (env_config (DIST_TAG, DIST_DFLT), "np", &np); read_dist (env_config (DIST_TAG, DIST_DFLT), "vp", &vp); while (1) { dbg_text(&prattle[0], 300, 400, 0); printf("<%s>\n", prattle); } return(0); } #endif /* TEST */