实验一 词法分析器+
要求:代码的高级功能
更多的关键字(运算符)
需要编写keywords.txt
更多的常数(科学计数法 浮点数 字符串常量)
需要重写analyzer
更多的功能(过滤无效字符、数值转换、宏展开、预包含处理)
需要重写analyzer
还有
出错位置没有行数
需要修改loadInput()逻辑
使其每读入一行,就进行语法分析处理
并且需要row行数属性来配合
下面实现C语言的词法分析
keywords.txt
除了32个关键字,还有用户预编译单词
有可能仍有遗漏
1 include
2 define
3 ifdef
4 ifndef
5 auto
6 break
7 case
8 char
9 const
10 continue
11 default
12 do
13 double
14 else
15 enum
16 extern
17 float
18 for
19 goto
20 if
21 int
22 long
23 register
24 return
25 short
26 signed
27 sizeof
28 static
29 struct
30 switch
31 typedef
32 union
33 unsigned
34 void
35 volatile
36 while
37 identifierList
38 constantList
operators.txt
新建一个运算符表
并加入34种运算符
注意:
虽然有重复的但是他们的功能不一样
需要根据上下文环境确定具体功能
1 #
2 $
3 ‘
4 “
5 \
6 {
7 }
8 ;
9 (
10 )
11 [
12 ]
13 ->
14 .
15 !
16 ~
17 ++
18 --
19 +
20 -
21 *
22 &
23 *
24 /
25 %
26 +
27 -
28 <<
29 >>
30 <
31 <=
32 >
33 >=
34 ==
35 !=
36 &
37 ^
38 |
39 &&
40 ||
41 ?
42 :
43 =
44 +=
45 -=
46 *=
47 /=
48 %=
49 <<=
50 >>=
51 &=
52 ^=
53 |=
54 ,
测试操作符表
package s1;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class SeniorLexicalAnalyzer extends AbstractLexicalAnalyzer{
public List<Symbol> operatorList=new ArrayList<>();//运算符表
//创建关键字表 根据表3.1实现
{
File file = new File("operators.txt");
if (file.exists()) {
System.out.println("operators.txt文件存在");
System.out.println("读取文件,加载运算符表");
loadOperatorList();
} else {
System.out.println("operators.txt文件不存在");
System.out.println("采用默认的运算符表");
initOperatorList();
}
}
//读取operators.txt文件的内容加载到operatorList中
public void loadOperatorList() {
try {
BufferedReader reader = new BufferedReader(new FileReader("operators.txt"));
String line;
while ((line = reader.readLine()) != null) {
String[] arr = line.split("\\s+");
operatorList.add(new Symbol(Integer.parseInt(arr[0]), arr[1]));
}
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
//默认的运算符表
public void initOperatorList(){
keywordList.add(new Symbol(1,"="));
keywordList.add(new Symbol(2,"+"));
keywordList.add(new Symbol(3,"*"));
keywordList.add(new Symbol(4,"**"));
keywordList.add(new Symbol(5,","));
keywordList.add(new Symbol(6,"("));
keywordList.add(new Symbol(7,")"));
}
//重写 状态转移图实现
@Override
public void analyzer() {
}
//测试词法分析器
private void testAnalyzers(){
}
//重写出错处理,跳过1下,继续分析
@Override
public void procError(){
System.out.println("词法分析出错"+"\n"+"出错位置为"+position);
System.out.println("跳过错误,继续分析");
position++;
if(ch!=EOF){
ch=' ';
}
}
//测试关键词表的赋值
private void testKeywordList(){
for (Symbol s:keywordList) {
System.out.println(s);
}
}
//测试运算符表的赋值
private void testOperatorList(){
for (Symbol s:operatorList) {
System.out.println(s);
}
}
public static void main(String[] args) {
SeniorLexicalAnalyzer lex = new SeniorLexicalAnalyzer();
//测试
lex.testKeywordList();
lex.testOperatorList();
//测试
// new SeniorLexicalAnalyzer().testAnalyzers();
}
}
重申:
关键词表中的code没有什么意义
只有单词表中的code有意义
所以运算符表中就没有设置code
测试结果
keywords.txt文件存在
读取文件,加载关键词表
operators.txt文件存在
读取文件,加载运算符表
Symbol{id=1, value='include', code=0}
Symbol{id=2, value='define', code=1}
Symbol{id=3, value='ifdef', code=2}
Symbol{id=4, value='ifndef', code=3}
Symbol{id=5, value='auto', code=4}
Symbol{id=6, value='break', code=5}
Symbol{id=7, value='case', code=6}
Symbol{id=8, value='char', code=7}
Symbol{id=9, value='const', code=8}
Symbol{id=10, value='continue', code=9}
Symbol{id=11, value='default', code=10}
Symbol{id=12, value='do', code=11}
Symbol{id=13, value='double', code=12}
Symbol{id=14, value='else', code=13}
Symbol{id=15, value='enum', code=14}
Symbol{id=16, value='extern', code=15}
Symbol{id=17, value='float', code=16}
Symbol{id=18, value='for', code=17}
Symbol{id=19, value='goto', code=18}
Symbol{id=20, value='if', code=19}
Symbol{id=21, value='int', code=20}
Symbol{id=22, value='long', code=21}
Symbol{id=23, value='register', code=22}
Symbol{id=24, value='return', code=23}
Symbol{id=25, value='short', code=24}
Symbol{id=26, value='signed', code=25}
Symbol{id=27, value='sizeof', code=26}
Symbol{id=28, value='static', code=27}
Symbol{id=29, value='struct', code=28}
Symbol{id=30, value='switch', code=29}
Symbol{id=31, value='typedef', code=30}
Symbol{id=32, value='union', code=31}
Symbol{id=33, value='unsigned', code=32}
Symbol{id=34, value='void', code=33}
Symbol{id=35, value='volatile', code=34}
Symbol{id=36, value='while', code=35}
Symbol{id=1, value='#', code=0}
Symbol{id=2, value='$', code=0}
Symbol{id=3, value='‘', code=0}
Symbol{id=4, value='“', code=0}
Symbol{id=5, value='\', code=0}
Symbol{id=6, value='{', code=0}
Symbol{id=7, value='}', code=0}
Symbol{id=8, value=';', code=0}
Symbol{id=9, value='(', code=0}
Symbol{id=10, value=')', code=0}
Symbol{id=11, value='[', code=0}
Symbol{id=12, value=']', code=0}
Symbol{id=13, value='->', code=0}
Symbol{id=14, value='.', code=0}
Symbol{id=15, value='!', code=0}
Symbol{id=16, value='~', code=0}
Symbol{id=17, value='++', code=0}
Symbol{id=18, value='--', code=0}
Symbol{id=19, value='+', code=0}
Symbol{id=20, value='-', code=0}
Symbol{id=21, value='*', code=0}
Symbol{id=22, value='&', code=0}
Symbol{id=23, value='*', code=0}
Symbol{id=24, value='/', code=0}
Symbol{id=25, value='%', code=0}
Symbol{id=26, value='+', code=0}
Symbol{id=27, value='-', code=0}
Symbol{id=28, value='<<', code=0}
Symbol{id=29, value='>>', code=0}
Symbol{id=30, value='<', code=0}
Symbol{id=31, value='<=', code=0}
Symbol{id=32, value='>', code=0}
Symbol{id=33, value='>=', code=0}
Symbol{id=34, value='==', code=0}
Symbol{id=35, value='!=', code=0}
Symbol{id=36, value='&', code=0}
Symbol{id=37, value='^', code=0}
Symbol{id=38, value='|', code=0}
Symbol{id=39, value='&&', code=0}
Symbol{id=40, value='||', code=0}
Symbol{id=41, value='?', code=0}
Symbol{id=42, value=':', code=0}
Symbol{id=43, value='=', code=0}
Symbol{id=44, value='+=', code=0}
Symbol{id=45, value='-=', code=0}
Symbol{id=46, value='*=', code=0}
Symbol{id=47, value='/=', code=0}
Symbol{id=48, value='%=', code=0}
Symbol{id=49, value='<<=', code=0}
Symbol{id=50, value='>>=', code=0}
Symbol{id=51, value='&=', code=0}
Symbol{id=52, value='^=', code=0}
Symbol{id=53, value='|=', code=0}
Symbol{id=54, value=',', code=0}
词法分析器的状态转移图
需知
C语言的标识符由字母、数字、下划线组成,并且首字母不能是数字(美元符号$也可以作为标识符)
C语言的常量
C语言中的运算符大全(内附优先级表)
状态转移图
代码实现
AbstractSeniorLexicalAnalyzer
高级词法分析器继承基础的词法分析器
另外
新定义了两个属性
List<String> inputList=new ArrayList<>();//输入串 按行
public static int row=1;//行号
新增对inputList初始化的方法
//初始化inputList
public List<String> initInputList(){
}
//读入input.txt到input
public List<String> loadInputList(){
}
//没有input.txt随机初始化input
public List<String> randomInputList(){
}
以及对ch的判断
设计思想
重写了analyzers方法
遍历inputList
借用了input属性
对其按行编译
analyzer()分析input
@Override
public void analyzers() {
for (String input : inputList) {
this.input=input;//给对象的input赋值
ch=' ';
analyzer();
row++;
position=0;
}
System.out.println("词法分析结束");
}
AbstractSeniorLexicalAnalyzer
package s1;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
/**
* 词法分析器抽象类
*/
public abstract class AbstractSeniorLexicalAnalyzer extends AbstractLexicalAnalyzer{
List<String> inputList=new ArrayList<>();//输入串 按行
public List<Symbol> operatorList=new ArrayList<>();//运算符表
protected AbstractSeniorLexicalAnalyzer() {
}
public AbstractSeniorLexicalAnalyzer(List<String> inputList) {
this.inputList = inputList;
}
public void setInputList(List<String> inputList) {
this.inputList = inputList;
}
//创建运算符表
{
File file = new File("operators.txt");
if (file.exists()) {
System.out.println("operators.txt文件存在");
System.out.println("读取文件,加载运算符表");
loadOperatorList();
} else {
System.out.println("operators.txt文件不存在");
System.out.println("采用默认的运算符表");
initOperatorList();
}
}
//读取operators.txt文件的内容加载到operatorList中
public void loadOperatorList() {
try {
BufferedReader reader = new BufferedReader(new FileReader("operators.txt"));
String line;
while ((line = reader.readLine()) != null) {
String[] arr = line.split("\\s+");
operatorList.add(new Symbol(Integer.parseInt(arr[0]), arr[1]));
}
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
//默认的运算符表
public void initOperatorList(){
operatorList.add(new Symbol(1,"="));
operatorList.add(new Symbol(2,"+"));
operatorList.add(new Symbol(3,"*"));
operatorList.add(new Symbol(4,"**"));
operatorList.add(new Symbol(5,","));
operatorList.add(new Symbol(6,"("));
operatorList.add(new Symbol(7,")"));
}
public void toStringSymbolList(){
for (Symbol s:symbolList) {
// System.out.println(s.toStringIdCode());
System.out.println(s.toString());
}
}
public static int row=1;//行号
//其余函数
//当调用了getChar(positiop已经+1)之后 取到下一个字符 但不偏移扫描指针
public char getAfterChar() {
if(position<input.length()){
return input.charAt(position);
}
return EOF;
}
public boolean AfterCharisDigit(char ch) {
return ch >= '0' && ch <= '9';
}
public boolean is_(){
return ch =='_';
}
public boolean is$(){
return ch =='$';
}
public boolean isPlus(){
return ch =='+';
}
public boolean isSub(){
return ch =='-';
}
public boolean isPoint(){
return ch =='.';
}
public boolean isE(){
return ch =='E';
}
public boolean isD(){
return ch =='D';
}
public boolean isDigitEx0(){
return ch >'0' && ch<='9';
}
public boolean isDigitB0A7(){
return ch >='0' && ch<='7';
}
public boolean is0(){
return ch =='0';
}
public boolean isx(){
return ch=='x'||ch=='X';
}
public boolean isHex(){
return (ch>='0' && ch<='9')||(ch>='a'&&ch<='f')||(ch>='A'&&ch<='F');
}
public boolean escape(){
return ch=='n'||ch=='t'||ch=='b'||ch=='r'||ch=='f'||ch=='\\'||ch=='\''||ch=='"';
}
@Override
public void analyzers() {
for (String input : inputList) {
this.input=input;//给对象的input赋值
ch=' ';
analyzer();
row++;
position=0;
}
System.out.println("词法分析结束");
}
//状态转移图由子类实现
public abstract void analyzer();
//初始化inputList
public List<String> initInputList(){
File file = new File("input.txt");
if (file.exists()) {
System.out.println("input.txt文件存在");
System.out.println("读取文件,input");
return loadInputList();
} else {
System.out.println("input.txt文件不存在");
System.out.println("随机初始化input");
return randomInputList();
}
}
//读入input.txt到input
public List<String> loadInputList(){
List<String> inputList=new ArrayList<>();
try {
File file = new File("input.txt");
BufferedReader reader = new BufferedReader(new FileReader(file));
String line;
StringBuilder sb = new StringBuilder();
while ((line = reader.readLine()) != null) {
inputList.add(line);
}
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
return inputList;
}
//没有input.txt随机初始化input
public List<String> randomInputList(){
List<String> inputList=new ArrayList<>();
final String ERROR="~~";//错误字符
Random rand=new Random();
List<Symbol> wordList=new ArrayList<>();//现有词表 (关键词+符号表)
wordList.addAll(keywordList);
wordList.addAll(operatorList);
for (int r = 0; r < 20; r++) {//共20行
StringBuilder input= new StringBuilder();
for(int i=0;i<10;i++){//每行10个
String value;
int index=rand.nextInt(wordList.size()+1);//故意+1 人为使其出现错误单词
if(index>=wordList.size()){
value=ERROR;//错误单词
}else{
value=wordList.get(index).getValue();
if(value.equals("constantList")){//常数
StringBuilder constant= new StringBuilder();
for(int j=0;j<3;j++){
constant.append(rand.nextInt(10));
}
value= constant.toString();
}else if("identifierList".equals(value)){//标识符
StringBuilder letter= new StringBuilder();
for(int j=0;j<3;j++){
char c=(char)(rand.nextInt(26)+'a');
letter.append(c);
}
value= letter.toString();
}
}
input.append(value).append(" ");
}
inputList.add(input.toString());
}
return inputList;
}
void testInitInputList(){
List<String> strings = initInputList();
for (String s :
strings) {
System.out.println(s);
}
}
public static void main(String[] args) {
SeniorLexicalAnalyzer lex=new SeniorLexicalAnalyzer();
lex.testInitInputList();
}
}
SeniorLexicalAnalyzer
继承AbstractLexicalAnalyzer
重写analyzer方法
对input进行分析
public void analyzer(){
}
重载procError(String msg)方法
错误处理
跳到空格或结尾处
//出错处理,跳过到空白处,继续分析
public void procError(String msg){
System.out.println("词法分析出错"+"\n"+"出错行号为"+row+"出错位置为"+position);
System.out.println("错误原因:"+msg);
System.out.println("跳过错误,继续分析");
while (!isEOF()&&!isSpace()){
getChar();
}
if(ch!=EOF){
ch=' ';
}
}
SeniorLexicalAnalyzer
package s1;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* 词法分析器类
*/
public class SeniorLexicalAnalyzer extends AbstractSeniorLexicalAnalyzer{
public SeniorLexicalAnalyzer() {
}
public SeniorLexicalAnalyzer(List<String> inputList) {
super(inputList);
}
//根据状态转移图实现
@Override
public void analyzer(){
while (!isEOF()){
int id,code;
strToken="";
getChar();//初始ch=' '
getBC();
if(isEOF()){//面对不用回退的状态
break;
}
if(isLetter()||is_()){//判断是否是标识符 1.1
concat();
getChar();
while ((isLetter()||isDigit()||is$()||is_())&&ch!=EOF){//1.2
concat();
getChar();
}
if(!isSpace()&&!isEOF()){
procError("标识符定义不正确");
continue;
}
retract();
id =reserve();
//0是标识符 不是0就是保留字
if(id ==0){
code=insertId(strToken);
Symbol symbol=keywordList.get(keywordList.indexOf(new Symbol("identifierList")));
//<id,value,code> 编码 符号 索引
int idId=symbol.getId();//标识符在关键字中的编码
symbolList.add(new Symbol(idId,strToken,code));
}else{
int keyCode= keywordList.indexOf(new Symbol(id,strToken));//关键字的索引
symbolList.add(new Symbol(id,strToken,keyCode));
}
}
else if((isPlus()||isSub()||isDigit()||isPoint())&&AfterCharisDigit(getAfterChar())){//浮点常数 2.1
if(isPlus()||isSub()){
concat();
getChar();
}
if(isDigit()){
concat();
getChar();
while (isDigit()){//2.2
concat();
getChar();
}
if (isPoint()) {
concat();
getChar();
while (isDigit()) {//2.4
concat();
getChar();
}
if (!(isD() || isE())) {
retract();
code = insertConst(strToken);
id = keywordList.get(keywordList.indexOf(new Symbol("constantList"))).getId();
symbolList.add(new Symbol(id, strToken, code));
return;
}
}
}else if (isPoint()) { //2.3
strToken+="0";
concat();
getChar();
if(isDigit()){
concat();
getChar();
}
while (isDigit()){//2.4
concat();
getChar();
}
if(!(isD()||isE())){//2.8
retract();
code=insertConst(strToken);
id=keywordList.get(keywordList.indexOf(new Symbol("constantList"))).getId();
symbolList.add(new Symbol(id,strToken,code));
return;
}
}
if(isE()||isD()){//2.5 之前
concat();
getChar();
if(isPlus()|| isSub()){//2.5
concat();
getChar();
if(isDigit()){//2.6
while (isDigit()){//2.7
concat();
getChar();
}
retract();//2.8
code=insertConst(strToken);
id=keywordList.get(keywordList.indexOf(new Symbol("constantList"))).getId();
symbolList.add(new Symbol(id,strToken,code));
return;
}
}
}else{
retract();
code=insertConst(strToken);
id=keywordList.get(keywordList.indexOf(new Symbol("constantList"))).getId();
symbolList.add(new Symbol(id,strToken,code));
return;
}
}
else if (isDigitEx0()){//十进制整型 3.1
if(isDigit()){
concat();
getChar();
while (isDigit()){
concat();
getChar();
}
retract();
code=insertConst(strToken);
id=keywordList.get(keywordList.indexOf(new Symbol("constantList"))).getId();
symbolList.add(new Symbol(id,strToken,code));
}
} else if (is0()) {//4.1
if (isDigitB0A7()){//八进制整数 4.2
concat();
getChar();
while (isDigitB0A7()){//4.3
concat();
getChar();
}
retract();
code=insertConst(strToken);
id=keywordList.get(keywordList.indexOf(new Symbol("constantList"))).getId();
symbolList.add(new Symbol(id,strToken,code));
}else if(isx()){//16进制整数
if(isHex()){//5.2
concat();
getChar();
while (isHex()){//5.3
concat();
getChar();
}//5.4
retract();
code=insertConst(strToken);
id=keywordList.get(keywordList.indexOf(new Symbol("constantList"))).getId();
symbolList.add(new Symbol(id,strToken,code));
}
}
} else if (ch=='\'') {//字符常量 6.1
concat();
getChar();
if(ch=='\\'){//转义字符6.3
concat();
getChar();
if(escape()){
concat();
}else {
procError("转义字符常量定义不正确");
continue;
}
}else{//具体字符 6.2
concat();
}
getChar();
if(ch!='\''){
procError("字符常量定义不正确");
continue;
}
concat();//6.5
code=insertConst(strToken);
id=keywordList.get(keywordList.indexOf(new Symbol("constantList"))).getId();
symbolList.add(new Symbol(id,strToken,code));
} else if (ch=='"') {//字符串常量 7.1
concat();
getChar();
while(ch!='"'&&!isEOF()){//7.2
concat();
getChar();//具体字符
}
if(isEOF()){
procError("字符串没有右\"");
continue;
}
concat();
code=insertConst(strToken);
id=keywordList.get(keywordList.indexOf(new Symbol("constantList"))).getId();
symbolList.add(new Symbol(id,strToken,code));
} else if (ch=='/') {//8.1
concat();
getChar();
if(ch=='='){// 12.1/=
concat();
code=operatorList.indexOf(new Symbol(strToken));
id=operatorList.get(code).getId()+ keywordList.size();
symbolList.add(new Symbol(id,strToken,code));
}else if (ch=='/'){//单行注释 8.2
concat();
getChar();
while (!isEOF()){
concat();
getChar();
}
System.out.println("注释内容:"+strToken);
}else if(ch=='*'){//多行注释 /* 功能不完善
concat();
getChar();
while (!(ch=='*'&&getAfterChar()=='/')){ //9.2 */
concat();
getChar();
}
concat();
getChar();
concat();
System.out.println("注释内容:"+strToken);
}else {// 12.2/
retract();
code=operatorList.indexOf(new Symbol(strToken));
id=operatorList.get(code).getId()+ keywordList.size();
symbolList.add(new Symbol(id,strToken,code));
}
} else if (ch=='{'||ch=='}'||ch=='('||ch==')'||ch=='['||ch==']') {//1-7
concat();
code=operatorList.indexOf(new Symbol(strToken));
id=operatorList.get(code).getId()+ keywordList.size();//全局id
symbolList.add(new Symbol(id,strToken,code));
} else if (ch=='='||ch=='!'||ch=='%') {//8= 15% 16!
concat();
getChar();
if(ch=='='){//8.1== 15.1%= 16.1!=
concat();
}else {//8.2= 15.2% 16.2!
retract();
}
code=operatorList.indexOf(new Symbol(strToken));
id=operatorList.get(code).getId()+ keywordList.size();
symbolList.add(new Symbol(id,strToken,code));
} else if (ch=='+') {
concat();
getChar();
if(ch=='+'||ch=='='){ //9.1++ 9.2+=
concat();
}else {
retract();
}
code=operatorList.indexOf(new Symbol(strToken));
id=operatorList.get(code).getId()+ keywordList.size();
symbolList.add(new Symbol(id,strToken,code));
} else if (ch=='-') {
concat();
getChar();
if(ch=='-'||ch=='='||ch=='>'){//10.1-- 10.2-= 10.3->
concat();
}else {
retract();
}
code=operatorList.indexOf(new Symbol(strToken));
id=operatorList.get(code).getId()+ keywordList.size();
symbolList.add(new Symbol(id,strToken,code));
} else if (ch=='*') {
concat();
getChar();
if(ch=='='){//11.1 *=
concat();
}else{//* 11.2
retract();
}
code=operatorList.indexOf(new Symbol(strToken));
id=operatorList.get(code).getId()+ keywordList.size();
symbolList.add(new Symbol(id,strToken,code));
} else if (ch=='.'||ch=='~') {//13 17
concat();
code=operatorList.indexOf(new Symbol(strToken));
id=operatorList.get(code).getId()+ keywordList.size();
symbolList.add(new Symbol(id,strToken,code));
} else if (ch=='&'||ch=='|') {//15 18
concat();
getChar();
if (ch=='&'){
concat();
} else if(ch=='='){//15.2 &=
concat();
}else{//& 15.3
retract();
}
code=operatorList.indexOf(new Symbol(strToken));
id=operatorList.get(code).getId()+ keywordList.size();
symbolList.add(new Symbol(id,strToken,code));
} else if (ch=='^') {//19
concat();
getChar();
if (ch=='='){//^=
concat();
} else{//^
retract();
}
code=operatorList.indexOf(new Symbol(strToken));
id=operatorList.get(code).getId()+ keywordList.size();
symbolList.add(new Symbol(id,strToken,code));
} else if (ch=='<') {//20
concat();
getChar();
if (ch=='<'){
concat();
getChar();
if(ch=='='){//<<=
concat();
}else {//<<
retract();
}
}else{//<
retract();
}
code=operatorList.indexOf(new Symbol(strToken));
id=operatorList.get(code).getId()+ keywordList.size();
symbolList.add(new Symbol(id,strToken,code));
} else if (ch=='>') {//21
concat();
getChar();
if (ch=='>'){
concat();
getChar();
if(ch=='='){//>>=
concat();
}else {//>>
retract();
}
}else{//>
retract();
}
code=operatorList.indexOf(new Symbol(strToken));
id=operatorList.get(code).getId()+ keywordList.size();
symbolList.add(new Symbol(id,strToken,code));
} else if(ch=='?'||ch==':'){
concat();
code=operatorList.indexOf(new Symbol(strToken));
id=operatorList.get(code).getId()+ keywordList.size();
symbolList.add(new Symbol(id,strToken,code));
} else if (ch==',') {
concat();
code=operatorList.indexOf(new Symbol(strToken));
id=operatorList.get(code).getId()+ keywordList.size();
symbolList.add(new Symbol(id,strToken,code));
}else{
procError("未识别的字符");//出错处理
}
}
}
//重写出错处理,跳过到空白处,继续分析
public void procError(){
System.out.println("词法分析出错"+"\n"+"出错行号为"+row+"出错位置为"+position);
System.out.println("跳过错误,继续分析");
while (!isEOF()&&!isSpace()){
getChar();
}
if(ch!=EOF){
ch=' ';
}
}
//出错处理,跳过到空白处,继续分析
public void procError(String msg){
System.out.println("词法分析出错"+"\n"+"出错行号为"+row+"出错位置为"+position);
System.out.println("错误原因:"+msg);
System.out.println("跳过错误,继续分析");
while (!isEOF()&&!isSpace()){
getChar();
}
if(ch!=EOF){
ch=' ';
}
}
//测试关键词表的赋值
private void testKeywordList(){
for (Symbol s:keywordList) {
System.out.println(s);
}
}
//测试运算符表的赋值
private void testOperatorList(){
for (Symbol s:operatorList) {
System.out.println(s);
}
}
//测试词法分析器
private void testAnalyzers(){
SeniorLexicalAnalyzer lex = new SeniorLexicalAnalyzer();
List<String> inputList=lex.initInputList();
System.out.println("输出input串");
inputList.forEach(System.out::println);
lex.setInputList(inputList);
lex.analyzers();
System.out.println("输出单词表");
lex.toStringSymbolList();
System.out.println("正在把单词表存入symbols.txt");
lex.storeSymbols();
}
public static void main(String[] args) {
SeniorLexicalAnalyzer lex = new SeniorLexicalAnalyzer();
//测试
// lex.testKeywordList();
// lex.testOperatorList();
//测试
lex.testAnalyzers();
}
}
相关文件
keywords.txt
1 include
2 define
3 ifdef
4 ifndef
5 auto
6 break
7 case
8 char
9 const
10 continue
11 default
12 do
13 double
14 else
15 enum
16 extern
17 float
18 for
19 goto
20 if
21 int
22 long
23 register
24 return
25 short
26 signed
27 sizeof
28 static
29 struct
30 switch
31 typedef
32 union
33 unsigned
34 void
35 volatile
36 while
37 constantList
38 identifierList
operators.txt
1 #
2 $
3 ‘
4 “
5 \
6 {
7 }
8 ;
9 (
10 )
11 [
12 ]
13 ->
14 .
15 !
16 ~
17 ++
18 --
19 +
20 -
21 *
22 &
23 *
24 /
25 %
26 +
27 -
28 <<
29 >>
30 <
31 <=
32 >
33 >=
34 ==
35 !=
36 &
37 ^
38 |
39 &&
40 ||
41 ?
42 :
43 =
44 +=
45 -=
46 *=
47 /=
48 %=
49 <<=
50 >>=
51 &=
52 ^=
53 |=
54 ,
input.txt
if else while do while dowhile
as123_123 _1$23 _12*23 _12_12
+12 -34 12E+10 12.3E-9 .123
123 12-12 12345 1232s12 12 23 12 12
'a' 't' 'tt'
'\n' '\\' '\'' '\"' '\d' '\'
"abc" "asd" "tt
//abc //as //bi
/*abc*/ /*a*sd*/ /*as1*/ /*as2*/
{} () [] []
= ==
+ ++ +=
- -- -= ->
* *=
/ /=
. .
% %=
&& & &=
< << <<=
> >> >>=
?: ?:
,
结果
程序输出
keywords.txt文件存在
读取文件,加载关键词表
operators.txt文件存在
读取文件,加载运算符表
keywords.txt文件存在
读取文件,加载关键词表
operators.txt文件存在
读取文件,加载运算符表
input.txt文件存在
读取文件,input
输出input串
if else while do while dowhile
as123_123 _1$23 _12*23 _12_12
+12 -34 12E+10 12.3E-9 .123
123 12-12 12345 1232s12 12 23 12 12
'a' 't' 'tt'
'\n' '\\' '\'' '\"' '\d' '\'
"abc" "asd" "tt
//abc //as //bi
/*abc*/ /*a*sd*/ /*as1*/ /*as2*/
{} () [] []
= ==
+ ++ +=
- -- -= ->
* *=
/ /=
. .
% %=
&& & &=
< << <<=
> >> >>=
?: ?:
,
词法分析出错
出错行号为2出错位置为26
错误原因:标识符定义不正确
跳过错误,继续分析
词法分析出错
出错行号为5出错位置为13
错误原因:字符常量定义不正确
跳过错误,继续分析
词法分析出错
出错行号为6出错位置为23
错误原因:转义字符常量定义不正确
跳过错误,继续分析
词法分析出错
出错行号为6出错位置为28
错误原因:字符常量定义不正确
跳过错误,继续分析
词法分析出错
出错行号为7出错位置为17
错误原因:字符串没有右"
跳过错误,继续分析
注释内容://abc //as //bi
注释内容:/*abc*/
注释内容:/*a*sd*/
注释内容:/*as1*/
注释内容:/*as2*/
词法分析结束
输出单词表
Symbol{id=20 value: if code=19}
Symbol{id=14 value: else code=13}
Symbol{id=36 value: while code=35}
Symbol{id=12 value: do code=11}
Symbol{id=36 value: while code=35}
Symbol{id=38 value: dowhile code=0}
Symbol{id=38 value: as123_123 code=1}
Symbol{id=38 value: _1$23 code=2}
Symbol{id=38 value: _12_12 code=3}
Symbol{id=37 value: +12 code=0}
Symbol{id=37 value: 123 code=1}
Symbol{id=37 value: 'a' code=2}
Symbol{id=37 value: 't' code=3}
Symbol{id=37 value: '\n' code=4}
Symbol{id=37 value: '\\' code=5}
Symbol{id=37 value: '\'' code=6}
Symbol{id=37 value: '\"' code=7}
Symbol{id=37 value: "abc" code=8}
Symbol{id=37 value: "asd" code=9}
Symbol{id=44 value: { code=5}
Symbol{id=45 value: } code=6}
Symbol{id=47 value: ( code=8}
Symbol{id=48 value: ) code=9}
Symbol{id=49 value: [ code=10}
Symbol{id=50 value: ] code=11}
Symbol{id=49 value: [ code=10}
Symbol{id=50 value: ] code=11}
Symbol{id=81 value: = code=42}
Symbol{id=72 value: == code=33}
Symbol{id=57 value: + code=18}
Symbol{id=55 value: ++ code=16}
Symbol{id=82 value: += code=43}
Symbol{id=58 value: - code=19}
Symbol{id=56 value: -- code=17}
Symbol{id=83 value: -= code=44}
Symbol{id=51 value: -> code=12}
Symbol{id=59 value: * code=20}
Symbol{id=84 value: *= code=45}
Symbol{id=62 value: / code=23}
Symbol{id=85 value: /= code=46}
Symbol{id=52 value: . code=13}
Symbol{id=52 value: . code=13}
Symbol{id=63 value: % code=24}
Symbol{id=86 value: %= code=47}
Symbol{id=77 value: && code=38}
Symbol{id=60 value: & code=21}
Symbol{id=89 value: &= code=50}
Symbol{id=68 value: < code=29}
Symbol{id=66 value: << code=27}
Symbol{id=87 value: <<= code=48}
Symbol{id=70 value: > code=31}
Symbol{id=67 value: >> code=28}
Symbol{id=88 value: >>= code=49}
Symbol{id=79 value: ? code=40}
Symbol{id=80 value: : code=41}
Symbol{id=79 value: ? code=40}
Symbol{id=80 value: : code=41}
Symbol{id=92 value: , code=53}
正在把单词表存入symbols.txt
Disconnected from the target VM, address: '127.0.0.1:1027', transport: 'socket'
Process finished with exit code 0
symbols.txt
Symbol{id=20 value: if code=19}
Symbol{id=14 value: else code=13}
Symbol{id=36 value: while code=35}
Symbol{id=12 value: do code=11}
Symbol{id=36 value: while code=35}
Symbol{id=38 value: dowhile code=0}
Symbol{id=38 value: as123_123 code=1}
Symbol{id=38 value: _1$23 code=2}
Symbol{id=38 value: _12_12 code=3}
Symbol{id=37 value: +12 code=0}
Symbol{id=37 value: 123 code=1}
Symbol{id=37 value: 'a' code=2}
Symbol{id=37 value: 't' code=3}
Symbol{id=37 value: '\n' code=4}
Symbol{id=37 value: '\\' code=5}
Symbol{id=37 value: '\'' code=6}
Symbol{id=37 value: '\"' code=7}
Symbol{id=37 value: "abc" code=8}
Symbol{id=37 value: "asd" code=9}
Symbol{id=44 value: { code=5}
Symbol{id=45 value: } code=6}
Symbol{id=47 value: ( code=8}
Symbol{id=48 value: ) code=9}
Symbol{id=49 value: [ code=10}
Symbol{id=50 value: ] code=11}
Symbol{id=49 value: [ code=10}
Symbol{id=50 value: ] code=11}
Symbol{id=81 value: = code=42}
Symbol{id=72 value: == code=33}
Symbol{id=57 value: + code=18}
Symbol{id=55 value: ++ code=16}
Symbol{id=82 value: += code=43}
Symbol{id=58 value: - code=19}
Symbol{id=56 value: -- code=17}
Symbol{id=83 value: -= code=44}
Symbol{id=51 value: -> code=12}
Symbol{id=59 value: * code=20}
Symbol{id=84 value: *= code=45}
Symbol{id=62 value: / code=23}
Symbol{id=85 value: /= code=46}
Symbol{id=52 value: . code=13}
Symbol{id=52 value: . code=13}
Symbol{id=63 value: % code=24}
Symbol{id=86 value: %= code=47}
Symbol{id=77 value: && code=38}
Symbol{id=60 value: & code=21}
Symbol{id=89 value: &= code=50}
Symbol{id=68 value: < code=29}
Symbol{id=66 value: << code=27}
Symbol{id=87 value: <<= code=48}
Symbol{id=70 value: > code=31}
Symbol{id=67 value: >> code=28}
Symbol{id=88 value: >>= code=49}
Symbol{id=79 value: ? code=40}
Symbol{id=80 value: : code=41}
Symbol{id=79 value: ? code=40}
Symbol{id=80 value: : code=41}
Symbol{id=92 value: , code=53}