79 lines
1.2 KiB
Python
79 lines
1.2 KiB
Python
import ply.lex as lex
|
|
|
|
|
|
reserved = {
|
|
"function": "FUNC",
|
|
"program": "PROG",
|
|
"while": "WHILE",
|
|
"for": "FOR",
|
|
"in": "IN",
|
|
"if": "IF",
|
|
"else": "ELSE",
|
|
"int": "INT",
|
|
"float": "FLOAT",
|
|
}
|
|
|
|
states = (
|
|
('mlcom', "exclusive"),
|
|
)
|
|
|
|
tokens = ["ID", "NUM","GT", "LT", "COMMENT","OPENCOM","CLOSECOM","DDOT"] + list(reserved.values())
|
|
literals = ",=*+-;{}[]()"
|
|
|
|
|
|
def t_ID(t):
|
|
r"[a-zA-Z_][a-zA-Z_0-9]*"
|
|
t.type = reserved.get(t.value, "ID")
|
|
return t
|
|
|
|
def t_NUM(t):
|
|
r"\d+(\.\d+)?"
|
|
if "." in t.value:
|
|
t.value = float(t.value)
|
|
else:
|
|
t.value = int(t.value)
|
|
return t
|
|
def t_DDOT(t):
|
|
r"\.\."
|
|
return t
|
|
|
|
def t_GT(t):
|
|
r">"
|
|
return t
|
|
|
|
def t_LT(t):
|
|
r"<"
|
|
return t
|
|
|
|
def t_INITIAL_OPENCOM(t):
|
|
r"\/\*"
|
|
t.lexer.begin('mlcom')
|
|
return t
|
|
|
|
def t_mlcom_CLOSECOM(t):
|
|
r"\*\/"
|
|
t.lexer.begin("INITIAL")
|
|
return t
|
|
|
|
def t_COMMENT(t):
|
|
r"\/\/.*"
|
|
pass
|
|
|
|
def t_mlcom_COMMENT(t):
|
|
r"[^*]+"
|
|
pass
|
|
|
|
t_ANY_ignore = " \t\n"
|
|
|
|
def t_ANY_error(t):
|
|
print("Illegal character '%s'" % t.value[0])
|
|
t.lexer.skip(1)
|
|
|
|
lexer = lex.lex()
|
|
|
|
f = open("example2","r")
|
|
|
|
lexer.input(f.read())
|
|
|
|
for tok in lexer:
|
|
print(tok)
|