[八卦] 来个d语言的lex分析器

verystrange 2009-06-03
1.编译dlex: main.d是主文件,生成文件名设为delx.exe
dlex.rar在这里下载:
http://verystrange.iteye.com/topics/download/5ea872bd-01f3-30f9-8bb9-a205060abeff
2.新建文件名称为test,内容如下:
=====================================
import std.stdio;
import std.stream;
void main(char[][] args){
Stream file=new BufferedFile(args[1]);
Yylex yy=new Yylex(file);
Yytoken t;
while ((t = yy.yylex()) !is null){
    writefln("Token #",t.m_index,": ",t.m_text," (line ",t.m_line,")");
    }
}


class Utility {
  public static void ASSERT
    (
     bool expr
     )
      {
if (false == expr) {
  throw (new Error("Error: Assertion failed."));
}
}
 
  private const char[][] errorMsg = [
    "Error: Unmatched end-of-comment punctuation.",
    "Error: Unmatched start-of-comment punctuation.",
    "Error: Unclosed string.",
    "Error: Illegal character."
    ];
 
  public const int E_ENDCOMMENT = 0;
  public const int E_STARTCOMMENT = 1;
  public const int E_UNCLOSEDSTR = 2;
  public const int E_UNMATCHED = 3;

  public static void error
    (
     int code
     )
      {
writefln(errorMsg);
      }
}

class Yytoken {
  this
    (
     int index,
     char[] text,
     int line,
     int charBegin,
     int charEnd
     )
      {
m_index = index;
m_text = text.dup;
m_line = line;
m_charBegin = charBegin;
m_charEnd = charEnd;
      }

  public int m_index;
  public char[] m_text;
  public int m_line;
  public int m_charBegin;
  public int m_charEnd;
 
}

%%

%{
  private int comment_count = 0;
%}
%line
%char
%state COMMENT

ALPHA=[A-Za-z]
DIGIT=[0-9]
NONNEWLINE_WHITE_SPACE_CHAR=[\ \t\b]
WHITE_SPACE_CHAR=[\n\ \t\b\012\015]
STRING_TEXT=(\\\"|[^\n\"]|\\{WHITE_SPACE_CHAR}+\\)*
COMMENT_TEXT=([^/*\n]|[^*\n]"/"[^*\n]|[^/\n]"*"[^/\n]|"*"[^/\n]|"/"[^*\n])*


%%

<YYINITIAL> "," { return (new Yytoken(0,yytext(),yyline,yychar,yychar+1)); }
<YYINITIAL> ":" { return (new Yytoken(1,yytext(),yyline,yychar,yychar+1)); }
<YYINITIAL> ";" { return (new Yytoken(2,yytext(),yyline,yychar,yychar+1)); }
<YYINITIAL> "(" { return (new Yytoken(3,yytext(),yyline,yychar,yychar+1)); }
<YYINITIAL> ")" { return (new Yytoken(4,yytext(),yyline,yychar,yychar+1)); }
<YYINITIAL> "[" { return (new Yytoken(5,yytext(),yyline,yychar,yychar+1)); }
<YYINITIAL> "]" { return (new Yytoken(6,yytext(),yyline,yychar,yychar+1)); }
<YYINITIAL> "{" { return (new Yytoken(7,yytext(),yyline,yychar,yychar+1)); }
<YYINITIAL> "}" { return (new Yytoken(8,yytext(),yyline,yychar,yychar+1)); }
<YYINITIAL> "." { return (new Yytoken(9,yytext(),yyline,yychar,yychar+1)); }
<YYINITIAL> "+" { return (new Yytoken(10,yytext(),yyline,yychar,yychar+1)); }
<YYINITIAL> "-" { return (new Yytoken(11,yytext(),yyline,yychar,yychar+1)); }
<YYINITIAL> "*" { return (new Yytoken(12,yytext(),yyline,yychar,yychar+1)); }
<YYINITIAL> "/" { return (new Yytoken(13,yytext(),yyline,yychar,yychar+1)); }
<YYINITIAL> "=" { return (new Yytoken(14,yytext(),yyline,yychar,yychar+1)); }
<YYINITIAL> "<>" { return (new Yytoken(15,yytext(),yyline,yychar,yychar+2)); }
<YYINITIAL> "<"  { return (new Yytoken(16,yytext(),yyline,yychar,yychar+1)); }
<YYINITIAL> "<=" { return (new Yytoken(17,yytext(),yyline,yychar,yychar+2)); }
<YYINITIAL> ">"  { return (new Yytoken(18,yytext(),yyline,yychar,yychar+1)); }
<YYINITIAL> ">=" { return (new Yytoken(19,yytext(),yyline,yychar,yychar+2)); }
<YYINITIAL> "&"  { return (new Yytoken(20,yytext(),yyline,yychar,yychar+1)); }
<YYINITIAL> "|"  { return (new Yytoken(21,yytext(),yyline,yychar,yychar+1)); }
<YYINITIAL> ":=" { return (new Yytoken(22,yytext(),yyline,yychar,yychar+2)); }

<YYINITIAL> {NONNEWLINE_WHITE_SPACE_CHAR}+ { }

<YYINITIAL,COMMENT> \n { }
<YYINITIAL,COMMENT> \012 { }
<YYINITIAL,COMMENT> \015 { }
<YYINITIAL> "/*" { yybegin(COMMENT); comment_count = comment_count + 1; }

<COMMENT> "/*" { comment_count = comment_count + 1; }
<COMMENT> "*/" {
comment_count = comment_count - 1;
Utility.ASSERT(comment_count >= 0);
if (comment_count == 0) {
    yybegin(YYINITIAL);
}
}
<COMMENT> {COMMENT_TEXT} { }

<YYINITIAL> \"{STRING_TEXT}\" {
char[] str =  yytext[1..length - 1];

Utility.ASSERT(str.length == yytext.length - 2);
return (new Yytoken(40,str,yyline,yychar,yychar + str.length));
}
<YYINITIAL> \"{STRING_TEXT} {
char[] str =  yytext[1..yytext.length];

Utility.error(Utility.E_UNCLOSEDSTR);
Utility.ASSERT(str.length == yytext.length - 1);
return (new Yytoken(41,str,yyline,yychar,yychar + str.length));
}
<YYINITIAL> {DIGIT}+ {
return (new Yytoken(42,yytext(),yyline,yychar,yychar + yytext.length));
}
<YYINITIAL> {ALPHA}({ALPHA}|{DIGIT}|_)* {
return (new Yytoken(43,yytext(),yyline,yychar,yychar + yytext.length));
}
<YYINITIAL,COMMENT> . {
       writefln("Illegal character: <" , yytext(),">");
Utility.error(Utility.E_UNMATCHED);
}
==========================================================
3.在cmd中运行dlex test生成test.d文件
4.编译test.d,生成test.exe文件
5.新建文件aaa.l,内容如下:
===========================================================
{ /* comment */ a := b & c; }
===========================================================
6.在cmd中运行test aaa.l,输出如下:
Token #7: { (line 0)
Token #43: a (line 0)
Token #22: := (line 0)
Token #43: b (line 0)
Token #20: & (line 0)
Token #43: c (line 0)
Token #2: ; (line 0)
Token #8: } (line 0)
7.说明:根据jlex改编,例子是根据jlex的sample改编.
另:似乎我用的editplus编辑器回车的代码和\n的不一样,所以对规则做了一点小小的改动,使它可以正确解析用editplus编辑的文件.
Global site tag (gtag.js) - Google Analytics