急求用c++编写一个简单的词法分析器!!!!!!!!!!

2024-12-30 04:31:05
推荐回答(1个)
回答1:

#include "globals.h"
#include "util.h"
#include "scan.h"

typedef enum {
START, INID, INNUM, INCOMMENT, INCOMMENT_ASTERISK,
INDIV, INLT, INGT, INASSIGN, INNOT,
DONE
} StateType;

char token_string[MAXTOKENLEN+1];

#define BUFLEN 256

static char linebuf[BUFLEN];
static int linepos = 0;
static int bufsize = 0;
static int eof = 0;

static char get_next_char()
{
if(eof)
return EOF;
if(linepos < bufsize)
return linebuf[linepos++];
else {
lineno++;
if(fgets(linebuf, BUFLEN-1, source)) {
if(echo_source)
fprintf(listing, "%4d: %s", lineno, linebuf);
bufsize = strlen(linebuf);
linepos = 0;
return linebuf[linepos++];
} else {
eof = 1;
return EOF;
}
}
}
static void unget_char()
{
if(!eof)
linepos--;
}

static struct {
char *str;
TokenType tok;
} reserved_words[MAXRESERVED] = {
{"if", IF}, {"else", ELSE},
{"int", INT}, {"return", RETURN},
{"void", VOID}, {"while", WHILE}
};

static TokenType reserved_lokkup(const char *s)
{
int i;

for ( i = 0; i if(!strcmp(reserved_words[i].str, s))
return reserved_words[i].tok;
}
return ID;
}

TokenType get_token()
{
int index = 0;
TokenType cur_token;
StateType state = START;
int save;

while(state != DONE) {
char c = get_next_char();
save = TRUE;
switch(state) {
case START:
if(isdigit(c))
state = INNUM;
else if(isalpha(c))
state = INID;
else if(c == '/')
state = INDIV;
else if(c == '<')
state = INLT;
else if(c == '>')
state = INGT;
else if(c == '=')
state = INASSIGN;
else if(c == '!')
state = INNOT;
else if(isblank(c) || c == '\n')
save = FALSE;
else {
state = DONE;
switch(c) {
case EOF:
save = FALSE;
cur_token = ENDFILE;
break;
case '+': cur_token = PLUS; break;
case '-': cur_token = MINUS; break;
case '*':
cur_token = MUL;
if (get_next_char() == '/') {
cur_token = ERROR;
} else unget_char();
break;
case ';': cur_token = SEMI; break;
case ',': cur_token = COMMA; break;
case '(': cur_token = LPAREN; break;
case ')': cur_token = RPAREN; break;
case '[': cur_token = LBRACKET; break;
case ']': cur_token = RBRACKET; break;
case '{': cur_token = LBRACE; break;
case '}': cur_token = RBRACE; break;
default:
cur_token = ERROR;
break;
}
}
break;
case INCOMMENT:
save = FALSE;
if(c == '*')
state = INCOMMENT_ASTERISK;
else if(c == EOF) {
cur_token = ERROR;
state = DONE;
}
break;
case INCOMMENT_ASTERISK:
save = FALSE;
if(c == '/') {
state = START;
index = 0;
}else if(c == '*'){
}else if(c == EOF) {
cur_token = ERROR;
state = DONE;
}else{
state = INCOMMENT;
}
break;
case INID:
if(!isalpha(c)) {
unget_char();
state = DONE;
save = FALSE;
cur_token = ID;
}
break;
case INNUM:
if(!isdigit(c)) {
unget_char();
state = DONE;
save = FALSE;
cur_token = NUM;
}
break;
case INDIV:
if(c == '*') {
state = INCOMMENT;
save = FALSE;
} else {
unget_char();
save = FALSE;
state = DONE;
cur_token = DIV;
}
break;
case INLT:
if(c == '=') {
state = DONE;
cur_token = LE;
} else {
unget_char();
state = DONE;
save = FALSE;
cur_token = LT;
}
break;
case INGT:
if(c == '=') {
state = DONE;
cur_token = GE;
} else {
unget_char();
save = FALSE;
state = DONE;
cur_token = GT;
}
break;
case INASSIGN:
if(c == '=') {
state = DONE;
cur_token = EQ;
} else {
unget_char();
save = FALSE;
state = DONE;
cur_token = ASSIGN;
}
break;
case INNOT:
state = DONE;
if(c == '=') {
cur_token = NEQ;
} else {
unget_char();
save = FALSE;
cur_token = ERROR;
}
break;
case DONE:
default:
fprintf(listing, "Scanner Bug: state = %d\n", state);
state = DONE;
cur_token = ERROR;
break;
}
if(save && (index < MAXTOKENLEN))
token_string[index++] = c;
if(state == DONE) {
token_string[index] = '\0';
if(cur_token == ID)
cur_token = reserved_lokkup(token_string);
}
}
if(trace_scan) {
fprintf(listing, "\t%d: ", lineno);
print_token(cur_token, token_string);
}
return cur_token;
}