import ply.lex as lex reserved = { "function": "FUNC", "program": "PROG", "while": "WHILE", "for": "FOR", "in": "IN", "if": "IF", "else": "ELSE", "int": "INT", "float": "FLOAT", } states = ( ('mlcom', "exclusive"), ) tokens = ["ID", "NUM","GT", "LT", "COMMENT","OPENCOM","CLOSECOM","DDOT"] + list(reserved.values()) literals = ",=*+-;{}[]()" def t_ID(t): r"[a-zA-Z_][a-zA-Z_0-9]*" t.type = reserved.get(t.value, "ID") return t def t_NUM(t): r"\d+(\.\d+)?" if "." in t.value: t.value = float(t.value) else: t.value = int(t.value) return t def t_DDOT(t): r"\.\." return t def t_GT(t): r">" return t def t_LT(t): r"<" return t def t_INITIAL_OPENCOM(t): r"\/\*" t.lexer.begin('mlcom') return t def t_mlcom_CLOSECOM(t): r"\*\/" t.lexer.begin("INITIAL") return t def t_COMMENT(t): r"\/\/.*" pass def t_mlcom_COMMENT(t): r"[^*]+" pass t_ANY_ignore = " \t\n" def t_ANY_error(t): print("Illegal character '%s'" % t.value[0]) t.lexer.skip(1) lexer = lex.lex() f = open("example2","r") lexer.input(f.read()) for tok in lexer: print(tok)