#!/usr/bin/perl -w # see usage string below use strict 'subs'; sub usage { print(<<"EOF"); usage: $0 [-o outname] file.tok [extension.tok [...]] This script processes a master token description and produces several files: - a .h file with the enumeration listing all the tokens - a .cc file with a table of spellings, and table of flags - a .ids file with grammar token names, ids, and aliases The filenames are named with the same base as the input .tok file, or with whatever base is specified with the -o option. EOF } $baseName = ""; $myCommand = "$0 " . join(' ', @ARGV); while (@ARGV != 0 && $ARGV[0] =~ m/^-/) { my $arg = $ARGV[0]; shift @ARGV; if ($arg eq "-o") { $baseName = $ARGV[0]; shift @ARGV; } else { print("unknown option: $arg\n"); usage(); exit(2); } } if (@ARGV < 1) { usage(); exit(0); } $fname = $ARGV[0]; if (!$baseName) { $baseName = $fname; $baseName =~ s|\.[^.]*$||; # strip extension } # open the output files open(H, ">$baseName.h") or die("cannot open $baseName.h: $!\n"); open(CC, ">$baseName.cc") or die("cannot open $baseName.cc: $!\n"); open(IDS, ">$baseName.ids") or die("cannot open $baseName.ids: $!\n"); # write the preambles $latch = "$baseName.h"; $latch =~ tr|a-z./|A-Z__|; print H (<<"EOF"); // $baseName.h // do not edit; this file automatically generated by // $myCommand #ifndef $latch #define $latch // token flags enum TokenFlag { TF_MULTISPELL = 0x01, // token has multiple spellings TF_NONSEPARATOR = 0x02, // token is a nonseparator TF_CPLUSPLUS = 0x04, // token is a keyword in C++, but an identifier in C ALL_TOKEN_FLAGS = 0x07 // bitwise OR of above }; enum TokenType { EOF print CC (<<"EOF"); // $baseName.cc // do not edit; this file automatically generated by // $myCommand #include "$baseName.h" // this module; defines TokenFlag char const * const tokenNameTable[] = { EOF # I'll have to accumulate the flags in a big list and then # emit them after I close the 'tokenNames' array @flagsList = (); print IDS (<<"EOF"); // $baseName.ids // do not edit; this file automatically generated by // $myCommand // form: // : [] ; EOF # process the input file(s), effectively a concatenation of all the # files supplied on the command line $nextId = 0; while (@ARGV > 0) { $fname = $ARGV[0]; $lineNum = 0; shift @ARGV; # open the input file open(IN, "<$fname") or die("cannot open $fname: $!\n"); # process it while (defined($line = )) { $lineNum++; # blank lines and comment lines are copied to the output verbatim, # once we've seen the first line which is neither if ($line =~ m|^\s*$| or $line =~ m|^\s*//|) { if ($nextId) { # indent unindented comments if ($line =~ m|^//|) { $line = " " . $line; } print H ($line); print CC ($line); push @flagsList, $line; print IDS ($line); } next; } # parse the line chomp($line); my ($enumerator, $spelling, $flags) = ($line =~ m|^\s*([a-zA-Z_0-9]+),\s*(\"[^\"]*\")\s*,\s*:(.*)$|); #print("enumerator: $enumerator\n"); #print("spelling: $spelling\n"); #print("flags: $flags\n"); #exit(0); if (!defined($flags)) { die("$fname:$lineNum: malformed line\n"); } # parse the flags $multiSpell = ($flags =~ m|m|); $nonsep = ($flags =~ m|n|); $cpp = ($flags =~ m|p|); # emit to each file print H (" $enumerator,\n"); printf CC (" %-40s // $enumerator\n", "$spelling,"); my @f = ("0"); if ($multiSpell) { push @f, "TF_MULTISPELL"; } if ($nonsep) { push @f, "TF_NONSEPARATOR"; } if ($cpp) { push @f, "TF_CPLUSPLUS"; } push @flagsList, sprintf(" %-40s // $enumerator\n", join(' | ', @f) . ","); printf IDS (" %3d : %-30s %s;\n", $nextId, $enumerator, ($multiSpell? "" : $spelling)); $nextId++; } close(IN) or die; } # print the epilogues print H (<<"EOF"); NUM_TOKEN_TYPES }; // enum TokenType // map TokenType to its spelling or description extern char const * const tokenNameTable[]; extern int const tokenNameTableSize; // map TokenType to a bitwise OR of TokenFlags extern unsigned char tokenFlagTable[]; #endif // $latch EOF $flagsList = join('', @flagsList); print CC (<<"EOF"); }; // tokenNameTable[] // this is provided to allow a consistency check between the generated // .h file and generated .cc file int const tokenNameTableSize = sizeof(tokenNameTable) / sizeof(tokenNameTable[0]); unsigned char tokenFlagTable[] = { $flagsList }; EOF # the IDS file has no epilogue # close the files close(H) or die; close(CC) or die; close(IDS) or die; exit(0);