测试文件:
运行结果:
单词种别码表:
源代码
lex.h
#ifndef LEX_H_INCLUDED
#define LEX_H_INCLUDED
#include <iostream>
#include <ctype.h>
#include <string>
#include <cstdio>
#include <stdlib.h>
using namespace std;
typedef struct /* 单词二元组的结构 */
{
int typenum; /* 种别码 */
string token; /* 自身字符串 */
} WORD;
WORD* scaner(FILE *fp); // 对文件扫描读取
int keyOrIdentifier(string token); // 判断是关键字还是标识符
#endif // LEX_H_INCLUDED
lex.cpp
#include "lex.h"
#define _KEY_WORD_END "waiting for your expanding" /* 定义关键字结束标志 */
string KEY_WORDS[]= {"main","return","if","else","for","include","printf","int","char",_KEY_WORD_END}; /*可扩充的关键字词组*/
int line_num=1;
/* 判断是标识符还是关键字*/
int keyOrIdentifier(string token)
{
int i=0;
while(KEY_WORDS[i]!=_KEY_WORD_END)
{
if(KEY_WORDS[i]==token)
{
return i+1;
}
i=i+1;
}
return 10;
}
WORD* scaner(FILE *fp)
{
char ch;
string token="";
WORD* myword=new WORD;
myword->typenum=0;
myword->token="";
ch=fgetc(fp);
/* 判断回车 */
if(int(ch)==10)
{
line_num++;
return (myword);
}
/* 判断空格 */
else if(isspace(ch))
{
return (myword);
}
/* 标识符及关键字 */
else if(isalpha(ch)) //如果首字符是字母
{
while(isalpha(ch)||isdigit(ch))
{
token=token+ch;
ch=fgetc(fp);
}
fseek( fp, -1, SEEK_CUR ); //当获取的字符既不是字母也不是数字,则指针依然指向该字符
myword->typenum=keyOrIdentifier(token); // 判断获取的字符串是关键字还是标识符,获取它对应的类型
myword->token=token;
return(myword);
}
/* 数字 */
else if(isdigit(ch))
{
while(isdigit(ch))
{
token=token+ch;
ch=fgetc(fp);
}
fseek( fp, 0, SEEK_CUR );
myword->typenum=20;
myword->token=token;
return(myword);
}
else
switch(ch)
{
/* 运算符 */
case '+':
ch=fgetc(fp);
if (ch=='+')
{
myword->typenum=16;
myword->token="++";
return(myword);
}
fseek( fp, 0, SEEK_CUR );
myword->typenum=12;
myword->token="+";
return(myword);
break;
case '-':
ch=fgetc(fp);
if (ch=='-')
{
myword->typenum=17;
myword->token="--";
return(myword);
}
fseek( fp, 0, SEEK_CUR );
myword->typenum=13;
myword->token="-";
return(myword);
break;
case '*':
myword->typenum=14;
myword->token="*";
return(myword);
break;
/* 除号和注释 */
case '/':
ch=fgetc(fp);
if (ch=='/')
{
ch=fgetc(fp);
while(ch!=10&&ch!=EOF) // 发现注释符号,则一直读取,直到遇到回车为止
{
ch=fgetc(fp);
}
if(int(ch)==10)
{
line_num++;
}
}
fseek( fp, 0, SEEK_CUR );
myword->typenum=15;
myword->token="/";
return(myword);
break;
case '(':
myword->typenum=21;
myword->token="(";
return(myword);
break;
case '=':
ch=fgetc(fp);
if (ch=='=')
{
myword->typenum=18;
myword->token="==";
return(myword);
}
fseek( fp, 0, SEEK_CUR );
myword->typenum=20;
myword->token="=";
return(myword);
break;
case '!':
ch=fgetc(fp);
if (ch=='=')
{
myword->typenum=19;
myword->token="!=";
return(myword);
}
fseek( fp, 0, SEEK_CUR );
myword->typenum=-1;
myword->token="ERROR";
return(myword);
break;
case '>':
ch=fgetc(fp);
if (ch=='=')
{
myword->typenum=37;
myword->token=">=";
return(myword);
}
fseek( fp, 0, SEEK_CUR );
myword->typenum=35;
myword->token=">";
return(myword);
break;
case '<':
ch=fgetc(fp);
if (ch=='=')
{
myword->typenum=38;
myword->token="<=";
return(myword);
}
fseek( fp, 0, SEEK_CUR );
myword->typenum=36;
myword->token="<";
return(myword);
break;
/* 界符 */
case ')':
myword->typenum=22;
myword->token=")";
return(myword);
break;
case '[':
myword->typenum=23;
myword->token="[";
return(myword);
break;
case ']':
myword->typenum=24;
myword->token="]";
return(myword);
break;
case '{':
myword->typenum=25;
myword->token="{";
return(myword);
break;
case '}':
myword->typenum=26;
myword->token="}";
return(myword);
break;
case ',':
myword->typenum=27;
myword->token=",";
return(myword);
break;
case ':':
myword->typenum=28;
myword->token=":";
return(myword);
break;
case ';':
myword->typenum=29;
myword->token=";";
return(myword);
break;
case '"':
myword->typenum=34;
myword->token="\"";
return(myword);
break;
/* 结束符*/
case EOF:
myword->typenum=1000;
myword->token="OVER";
return(myword);
break;
/* 错误*/
default:
myword->typenum=-1;
myword->token="ERROR";
cout<<"第"<<line_num<<"行发生了错误!"<<endl;
return(myword);
}
}
main.cpp
/*
输入:所给文法的源程序字符串
输出:二元组(syn,token 或 sum)构成的序列
其中,syn为单词种别码;token为存放的单词自身字符串;sum为整型常数
*/
#include "lex.h"
int main()
{
WORD* oneword=new WORD;
FILE *fp;
int over=1; //如果词法解析出现错误,则结束读取
if( (fp=fopen("input.txt","rt")) == NULL ){
cout<<"Cannot open file, press any key to exit!"<<endl;
}
while(over<1000&&over!=-1)
{
oneword=scaner(fp);
if(oneword->typenum<1000&&oneword->typenum>0)
cout<<"("<<oneword->typenum<<","<<oneword->token<<")"<<endl;
over=oneword->typenum;
}
fclose(fp);
system("pause");
return 0;
}