自学内容网 自学内容网

编译原理——扫描器设计与实现

非常详细(包括跳过注释部分),不多说直接上代码(结合代码讲解)

#include<bits/stdc++.h>

using namespace std;

#define ARRAY_LENGTH(arr) (sizeof(arr) / sizeof(arr[0]))

//关键词集合
string KEY_WORD[] = {"int","char","string","bool","float","double","true","false","return","if","else","while","for","default","do","public","static","switch","case","private","protected"};
//界符集合,我将'/'也加到界符表中用于判断是否是注释
char BOUND_CHAR[] = {',', ';', '(', ')', '{', '}', '[', ']', '\'', '\"', '/'};
int pos = 0;//当前读到的数的下标
short in_annotation = 0;//当前是否在注释的范围,1表示在并且是"//"这种类型,2表示在并且是"/**/"这种注释类型,默认不在(因为两种注释的退出条件不同)

// 关键字,标识符,运算符,界符和常量
enum WordTypeKind{
    KEYWORD, IDENTIFIER, CONSTANT, OPERATOR, DELIMITER, ANNOTATION, ERROR
};

//定义词
struct WORD {
    WordTypeKind wordType;
    string value;
};

//读取文件中的内容
string openFile(string fileName) {
    ifstream readFile(fileName);
    //文件是否打开
    if(!readFile.is_open()) {
        cerr << "无法打开文件!" << '\n';
        return "";
    }

    string content, temp;
    while(getline(readFile, temp)) {
        // cout << content << endl;
        content += temp;
        content += '\n';
    }

    //关闭资源
    readFile.close();
    return content;
}

//将字符串写入文件
bool writeFile(string fileName, string content) {
    ofstream writeFile(fileName);
    if(!writeFile.is_open()) {
        cerr << "无法打开文件!" << '\n';
        return false;
    }

    writeFile << content << endl;

    writeFile.close();
    return true;
}

//判断一个单词是不是关键字,是返回true
bool isKeyWord(string word) {
    for(int i = 0; i < ARRAY_LENGTH(KEY_WORD); i++) {
        if(word == KEY_WORD[i]) return true;
    }
    return false;
}

//分词器,将字符串分成最小单位(关键字,标识符,运算符,界符和常量),关键字我们可以自己根据使用的高级语句自定义
WORD getNextWord(string str) {
    string tempStr = "";//暂存这个单词
    WORD newWord;//返回的词
    newWord.wordType = ERROR;//方便后续退出循环
    while (pos < str.length() && std::isspace(str[pos])) {
        if(in_annotation == 1 && str[pos] == '\n') in_annotation = 0;//退出注释状态
        ++pos; // 跳过空白字符
    }
    if (pos >= str.length()) {
        return newWord; // 结束
    }
    if(pos < str.length()) {//不越界
        char c = str[pos++];
        tempStr += c;
        //标识符的命名规范:只能以字母或'_'开头
        if(isalpha(c) || c == '_') {//这个词是关键词或标识符
            // isalnum(str[pos])这个函数用来检查传递给它的字符是否是字母(isalpha)或者是数字(isdigit)
            while(pos < str.length() && (isalnum(str[pos]) || str[pos] == '_')) {
                tempStr += str[pos++];
            }
            //判断这个单词是标识符or关键字
            if(isKeyWord(tempStr)) {//是关键字
                newWord.wordType = KEYWORD;
            } else {//标识符
                newWord.wordType = IDENTIFIER;
            }
        } else if(isdigit(c)) {//数字开头只可能是常数,我们把所有数字读完
            while(pos < str.length() && isdigit(str[pos])) {
                tempStr += str[pos++];
            }
            newWord.wordType = CONSTANT;
        } else {//只可能是运算符或界符
            for(int i = 0; i < ARRAY_LENGTH(BOUND_CHAR); i++) {//是不是界符
                if(c == BOUND_CHAR[i]) {
                    //遇到'/'判断是不是注释和注释类型
                    if(c == '/' && pos < str.length() && str[pos] == '/') {//是'//'类型
                        in_annotation = 1; 
                        newWord.wordType = ANNOTATION;
                        pos++;
                        break;
                    } else if(c == '/' && pos < str.length() && str[pos] == '*') {//是'/**/'类型
                        in_annotation = 2;
                        newWord.wordType = ANNOTATION;
                        pos++;
                        break;
                    }
                    newWord.wordType = DELIMITER;
                    break;
                }
            }
            //是运算符,注意双目运算符(三目运算符我们就不考虑了)
            if(pos < str.length()) {//注意不要越界
                newWord.wordType = OPERATOR;
                char nextChar = str[pos];
                //特判一下'*/'的情况,因为这是第二种注释的退出标识
                if(c == '*' && nextChar == '/') {//退出注释状态,下面也不用看了
                    in_annotation = 0;
                    tempStr += nextChar;
                    newWord.wordType = ANNOTATION;
                    pos++;
                    newWord.value = tempStr;
                    return newWord;
                }
                //考虑一下所有的双目运算符
                if((c == '+' || c == '-' || c == '*' || c == '/' || c == '!' || c == '^' || c == '%' || c == '=' || c == '<' || c == '>') && nextChar == '=') {
                    tempStr += nextChar;//更新一下nextChar
                    pos++;
                }
                if(c == '+' && nextChar == '+') {
                    tempStr += nextChar;//更新一下nextChar
                    pos++;
                }
                if(c == '-' && nextChar == '-') {
                    tempStr += nextChar;//更新一下nextChar
                    pos++;
                }
                if(c == '&' && nextChar == '&') {
                    tempStr += nextChar;//更新一下nextChar
                    pos++;
                }
                if(c == '|' && nextChar == '|') {
                    tempStr += nextChar;//更新一下nextChar
                    pos++;
                }
                if(c == '<' && nextChar == '<') {
                    tempStr += nextChar;//更新一下nextChar
                    pos++;
                }
                if(c == '>' && nextChar == '>') {
                    tempStr += nextChar;//更新一下nextChar
                    pos++;
                }
            }
        }
        newWord.value = tempStr;
    }
    return newWord;
}

//换行符占一个长度!!!

int main() {
    //先读取txt文件
    string fileName = "E:\\program\\bianyiyuanli\\1.txt";
    string str = openFile(fileName);
    WORD word;
    while((word = getNextWord(str)).wordType != ERROR) {
        if(word.wordType != ANNOTATION && in_annotation == 0){//我们只有在词语不是注释类型和不在注释状态才输出它
            std::cout << "[" << word.wordType << ",   " << word.value << "]" << std::endl;
        }
    }
    return 1;
}


原文地址:https://blog.csdn.net/release_lonely/article/details/142889380

免责声明:本站文章内容转载自网络资源,如本站内容侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!