#! /usr/bin/perl # Copyright (c) 2007 Michael H. Burkhardt, All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # 3. All advertising materials mentioning features or use of this software # must display the following acknowledgement: # This product includes software developed by Michael H. Burkhardt # 4. The name of Michael H. Burkhardt may not be used to endorse or promote # products derived from this software without specific prior written # permission. # # THIS SOFTWARE IS PROVIDED BY MICHAEL H. BURKHARDT ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL MICHAEL H. BURKHARDT BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. use Getopt::Std; $USAGE = < 1, "k" => 1024, "m" => 1048576, "g" => 1073741824 ); getopts('o:er:z:sh',\%opt); die $USAGE if ( $opt{'h'} ); $OUTFILE = ( $opt{'o'} or "outfile.txt" ); $PSEUDO_ENGLISH = $opt{'e'}; $SUPPRESS_SPACES = $opt{'s'}; $MAX_RECORDS = undef; $MAX_LENGTH = 102400; if ( $opt{'z'} ) { if ( $opt{'z'} =~ /^([\d\.]+)([bkmg]?)$/i ) { ($MAX_OUT,$OUT_UNITS) = ($1,$2); $MAX_RECORDS = ( ! $OUT_UNITS ? $MAX_OUT : undef ); $MAX_LENGTH = ( $OUT_UNITS ? $MAX_OUT * $BYTE_MULT{lc($OUT_UNITS)} : undef ); } else { printf("Error in -z option ('%s' is not valid)\n",$opt{'z'}); die $USAGE; } } $VARIABLE = undef; $RECORD_LENGTH = 80; if ( $opt{'r'} ) { if ( $opt{'r'} =~ /^-?\d+$/ ) { $VARIABLE = ( $opt{'r'} < 0 ); $RECORD_LENGTH = abs($opt{'r'}); } else { printf("Error in -r option ('%s' is not valid)\n",$opt{'r'}); die $USAGE; } } printf("OUTFILE: %s\n",$OUTFILE); printf("PSEUDO_ENGLIGH: %s\n",($PSEUDO_ENGLIGH?"YES":"NO")); printf("SUPPRESS_SPACES: %s\n",($SUPPRESS_SPACES?"YES":"NO")); printf("MAX_LENGTH: %d\n",$MAX_LENGTH) if ( $MAX_LENGTH ); printf("MAX_RECORDS: %f\n",$MAX_RECORDS) if ( $MAX_RECORDS ); printf("OUT_UNITS: %s\n",$OUT_UNITS); printf("VARIABLE: %s\n",($VARIABLE?"YES":"NO")); printf("RECORD_LENGTH: %d\n",$RECORD_LENGTH); # # GLOBAL VARIABLES # $MEGASTRING = ( $PSEUDO_ENGLISH ? init_pseudo_english() : init_random() ); $MEGALEN = length($MEGASTRING); $WORD_START = "ttttttttttttttttaaaaaaaaaaaaaaaaiiiiiiiissssssssoooooooccccccmmmmffffppppwwwwbdeghjklnqruvxyz"; $WORD_END = "eeeeeeeeeeeeeeeeeeessssssssssssssdddddddddtttttttttnnnnnnnnyyyyyyyrrrrrrrooooolllllffffabcghijkmpqsuvwxyz"; $sowlen = length($WORD_START); $eowlen = length($WORD_END); $BYTE_COUNT = 0; $RECORD_COUNT = 0; # # MAIN PROGRAM # open(OUT,">$OUTFILE") or die "Cannot open ${OUTFILE}: $!\n"; while ( ($MAX_LENGTH && ($BYTE_COUNT<$MAX_LENGTH)) || ($MAX_RECORDS && ($RECORD_COUNT<$MAX_RECORDS)) ) { $string = sprintf("%s\n",generate_record()); $BYTE_COUNT += length($string); $RECORD_COUNT++; print OUT $string; } close(OUT); # # END OF MAIN # sub init_pseudo_english() { my @alphabet = qw/ a b c d e f g h i j k l m n o p q r s t u v w x y z /; my @freq = qw/ 82 15 28 43 127 22 20 61 70 2 8 40 24 67 75 19 1 60 63 91 28 10 24 2 20 1 /; my $i; my $j; for ( $i = 0 ; $i < scalar(@alphabet) ; $i++ ) { for ( $j = 0 ; $j < $freq[$i] ; $j++ ) { push(@mega,$alphabet[$i]); } } push(@mega,1,2,3,4,5,6,7,8,9,0); return(join('',@mega)); } sub init_random() { return "abcdefghijklmnopqrstuvwxyz0123456789"; } sub generate_record() { my $string = ( $PSEUDO_ENGLISH ? generate_record_pseudo_english() : generate_record_random() ); if ( $VARIABLE ) { $string = substr($string,0,int(rand() * $RECORD_LENGTH)+1); } return($string); } sub generate_record_pseudo_english() { my $line = ""; while ( length($line) < $RECORD_LENGTH ) { if ( length($line) > 0 && (!$SUPPRESS_SPACES) ) { $line .= " "; } my $wordlen = int(rand() * 6); $line .= substr($WORD_START,int(rand() * $sowlen),1); for ( $j = 0 ; $j < $wordlen ; $j++ ) { $line .= substr($MEGASTRING,int(rand() * $MEGALEN),1); } $line .= substr($WORD_END,int(rand() * $eowlen),1); } return(substr($line,0,$RECORD_LENGTH)); } sub generate_record_random() { my $string = ""; for ( my $i = 0 ; $i < $RECORD_LENGTH ; $i++ ) { $string .= substr($MEGASTRING,int(rand() * $MEGALEN),1); } return $string; }