type pos = int type svalue = Tokens.svalue type ('a,'b) token = ('a,'b) Tokens.token type lexresult = (svalue,pos) token val lineNum = ErrorMsg.lineNum val linePos = ErrorMsg.linePos val commentDepth = ref 0; val stringAcc = ref ""; val stringBeg = ref 0; fun err(p1,p2) = ErrorMsg.error p1 local fun exp (n, 0) = 1 | exp (n, y) = n * exp (n, y-1); in fun stol (#"\\"::tail) = stol(tail) |stol (n::[]) = ord (n) - ord (#"0") | stol (n::tail) = (ord(n) - ord (#"0")) * exp (10, length(tail)) + stol(tail) | stol ([]) = 0; end; fun accumulateString (s) = (stringAcc := String.concat [!stringAcc, s]); fun eof() = let val pos = hd(!linePos) in Tokens.EOF(pos,pos) end %% alnum=[A-Za-z0-9_]; digit=[0-9]; %s COMMENT; %s STRING; %header (functor TigerLexFun(structure Tokens : Tiger_TOKENS)); %% "\n" => (lineNum := !lineNum+1; linePos := yypos :: !linePos; continue()); " " => (lex()); "\t" => (lex()); {digit}+ => (let val strlist= explode yytext in Tokens.INT(stol(strlist), yypos, length strlist) end); "," => (Tokens.COMMA(yypos,yypos+1)); "type" => (Tokens.TYPE(yypos,yypos+4)); "var" => (Tokens.VAR(yypos,yypos+3)); "function" => (Tokens.FUNCTION(yypos,yypos+8)); "break" => (Tokens.BREAK(yypos,yypos+5)); "of" => (Tokens.OF(yypos,yypos+2)); "end" => (Tokens.END(yypos,yypos+3)); "in" => (Tokens.IN(yypos,yypos+2)); "nil" => (Tokens.NIL(yypos,yypos+3)); "let" => (Tokens.LET(yypos,yypos+3)); "do" => (Tokens.DO(yypos,yypos+2)); "to" => (Tokens.TO(yypos,yypos+2)); "for" => (Tokens.FOR(yypos,yypos+3)); "while" => (Tokens.WHILE(yypos,yypos+5)); "else" => (Tokens.ELSE(yypos,yypos+4)); "then" => (Tokens.THEN(yypos,yypos+4)); "if" => (Tokens.IF(yypos,yypos+2)); "array" => (Tokens.ARRAY(yypos,yypos+5)); [A-Za-z]{alnum}* => (Tokens.ID(yytext,yypos, yypos + (length (explode yytext)))); "|" => (Tokens.OR(yypos,yypos+1)); "&" => (Tokens.AND(yypos,yypos+1)); "=" => (Tokens.EQ(yypos,yypos+1)); ":=" => (Tokens.ASSIGN(yypos,yypos+2)); ":" => (Tokens.COLON(yypos, yypos+1)); "+" => (Tokens.PLUS(yypos, yypos+1)); "-" => (Tokens.MINUS(yypos, yypos+1)); "*" => (Tokens.TIMES(yypos, yypos+1)); "/" => (Tokens.DIVIDE(yypos, yypos+1)); "<>" => (Tokens.NEQ(yypos, yypos+1)); ">" => (Tokens.GT(yypos, yypos+1)); "<" => (Tokens.LT(yypos, yypos+1)); ">=" => (Tokens.GE(yypos, yypos+1)); "<=" => (Tokens.LE(yypos, yypos+1)); "[" => (Tokens.LBRACK(yypos, yypos+1)); "{" => (Tokens.LBRACE(yypos, yypos+1)); "(" => (Tokens.LPAREN(yypos, yypos+1)); ")" => (Tokens.RPAREN(yypos, yypos+1)); "}" => (Tokens.RBRACE(yypos, yypos+1)); "]" => (Tokens.RBRACK(yypos, yypos+1)); "." => (Tokens.DOT(yypos, yypos+1)); ";" => (Tokens.SEMICOLON(yypos, yypos+1) (* note the sign of cancer *)); "/*" => (YYBEGIN COMMENT; commentDepth := 1; lex()); "*/" => (if !commentDepth = 1 then (YYBEGIN INITIAL; lex()) else (commentDepth := !commentDepth-1; lex())); "\n" => (lineNum := !lineNum+1; lex()); . => (lex()); "\"" => (YYBEGIN STRING; stringAcc := ""; stringBeg := yypos; lex()); "\"" => (YYBEGIN INITIAL; Tokens.STRING(!stringAcc, !stringBeg, !stringBeg +length (explode (!stringAcc)))); "\\a" => (accumulateString("\a"); lex()); "\\b" => (accumulateString("\b"); lex()); "\\f" => (accumulateString("\f"); lex()); "\\n" => (accumulateString("\n"); lex()); "\\r" => (accumulateString("\r"); lex()); "\\t" => (accumulateString("\t"); lex()); "\\\\" => (accumulateString("\\"); lex()); "\\^"[@-Z] => ( accumulateString( implode [chr(ord(hd(tl(tl(explode yytext)))) - 64)]); lex()); "\\^"[a-z] => ( accumulateString( implode [chr(ord(hd(tl (tl(explode yytext)))) - 96)]); lex()); "\\v" => (accumulateString("\v"); lex()); . => (accumulateString(yytext); lex()); \\ {digit}{3} => (accumulateString(implode ([chr(stol (explode yytext))])); lex()); \\(\n | \t | \ | \014 )+\\ => (lex()); . => (ErrorMsg.error yypos ("illegal character " ^ yytext); continue());