Compilation Principle LL(1) Grammar

Compilation Principle LL1 Grammar

After the last introduction to lexical analysis, this time it shows how to get LL1 grammar by eliminating left recursion and extracting the left factor.Post the final result first

Specific implementation requirements are given here

1. Converting a convertible non-LL(1) grammar to LL(1) grammar takes two phases: 1) eliminating the left recursion of the grammar, 2) extracting the left factor to eliminate backtracking.
2. Extracting Left Factor of Grammar algorithm:
1) Sort all non-terminators of grammar G
2) Perform the following sequence for each non-terminator Pi:
for( j=1; j< i-1;j++)
Replace Pj with Pi's production (if possible);
Eliminate direct left recursion about Pi:
Pi -> Pi alpha|beta, where beta does not start with Pi, the modified production is:
Pi —> βPi′
Pi′—> αPi′|ε
3) Simplify the grammar obtained above.
3. The algorithm for extracting the left factor:
A —> δβ1|δβ2|...|δβn|γ1|γ2|...|γm
(where each gamma does not start with delta)
So, you can rewrite these productions to
A —> δA′|γ1| γ2...|γm
A′—>β1|β2|...|βn
4. Using the above algorithm, an LL(1) grammar is constructed:
1) Read grammar from text file g.txt
2) Designing functions remove_left_recursion() and remove_left_gene() to eliminate left recursion and extract left factor algorithms, respectively, operate on grammars to eliminate left recursion and propose left factor in grammars;
3) The new grammar sorted out;
4) Output grammar in a new text file newg.txt. The grammar output is written on the first line as a non-terminating symbol line, the production from the start symbol is written on the first line, and the candidates for the same non-terminating symbol are output separated by'|'.

Input data:
Edit a text file g.txt and type the following into the file:
S->Qc|c|cab;
Q->Rb|b;
R->Sa|a;

Correct results:
The output of this experiment is not unique. Different results may be obtained depending on whether eliminating left recursion is choosing a non-terminating symbol in different order or choosing a new non-terminating symbol. Here is only one possible result:
S- >Qc|cT;
T->@|ab;//Replace with @ because epsilon cannot be output
Q->Rb|b;
R->bcaU|caU|cabaU|aU;
U->bcaU|@;

The following is implemented in C++ code

///////////////////////////////
/////Author:Jameslong
/////Date:4/20/2017
//////////////////////////////
#include<iostream>
#include <fstream>
#include<string>
#include<list>
using namespace std;

list<string> vf;
list<list<string>> ListVf;

list<list<string>>::iterator it_i;
list<list<string>>::iterator it_j;
list<string>::iterator it_k;
list<string>::iterator it_m;

char ch;//Each character to be compared
string str = "";//Each word to be analyzed
ifstream in;//File Input Stream
char buf[1024]; //cache
char *p;//Pointer, because there are fallback operations

//Load the file and read to the cache
void infile(string filename){
    p = buf;
    memset(buf, 0, 1024);
    in.open(filename);
    while ((*p = in.get()) != EOF)
    {
        p++;
    }
    p = buf;//Move pointer to cached first address
}
//Close File
void closefile(){
    in.close();
}

//Gets the character that the current pointer in the cache refers to, and uses the return value to determine if the end of the cache, that is, the end of the file (the cache needs to be large enough)
int Getchar(){
    if (*p != EOF){
        ch = *p;
        p++;
        return 1;
    }
    else
        return 0;
}
//If the current character is empty, or tab (9), or line break (10), or page break (12), take the next character until the current character is legal
void getBC(){
    while (ch == ' ' || ch == 9 || ch == 10 || ch == 12)
        Getchar();
}
//Connection String
void Concat(){
    str += ch;
}
//Determine if the current character is |
bool isVerticalBar(){
    return ch =='|';
}
//Determine if the current character is;
bool isSemicolon(){
    return ch == ';';
}
//Code exception
void procError(){
    cout << "Something is wrong..." << endl;
}

//Scan to get a list of grammars
int scan(){
    if (!Getchar()) return 0;
    getBC();
    if (ListVf.empty()){
        ListVf.push_back(vf);
        //cout << "sizeof(listvf):" << ListVf.size() << endl;
    }
    if (ch == '-'){
        ListVf.back().push_back(str);
        str = "";
        //Cout << "Did the insertion succeed:" << ListVf.back().back()<< endl;
        Getchar();
        return 1;
    }
    else if (isSemicolon()){
        ListVf.back().push_back(str);
        //(ListVn.back()->data).insertAsLast(str);
        str = "";
        //Cout << "Did the insertion succeed:"<< ListVf.back().back()<< endl;
        ListVf.push_back(*new list<string>);
        //Cout << "Add a new List" << endl;

    }
    else if (isVerticalBar()){
        ListVf.back().push_back(str);
        //ListVn.last()->data.insertAsLast(str);
        //Cout << "Did the insertion succeed:" << ListVf.back().back();
        //cout << endl << "sizeof(listvn):"<<ListVf.size() << endl;
        str = "";
    }
    else{
        Concat();
        //cout << str << endl;
    }
    return 1;
}
void show(){//Display Grammar
    for (it_i = ListVf.begin(); it_i != ListVf.end(); ++it_i){
        list<string>::iterator j = it_i->begin();
        cout << *j << "->";
        for (int i = 2; i < it_i->size();i++){
            j++;
            cout << *j << " | ";
        }
        j++;
        cout << *j;
        cout << ";"<<endl<<endl;
    }
}

//Eliminate Left Recursion
int removeLeftRecursion(){

    while (scan());
    ListVf.pop_back();
    cout << "----------------" << endl;
    cout << "Initial grammar:" << endl;
    show();//Show Initial Grammar
    cout << "----------------" << endl;


    list<string> vn;//Non-Terminator Set
    list<list<string>>::iterator it;
    for (it = ListVf.begin(); it != ListVf.end(); ++it){//Non-Terminator Set
        vn.push_back(it->front());
    }

    list<string>::iterator it_vn;
    cout << "Non-Terminator Set" << endl;
    for (it_vn = vn.begin(); it_vn != vn.end(); ++it_vn){
        cout << *it_vn << " ";
    }
    cout << endl<<"----------------" << endl;
    int i = 0;
    for (it_i = ListVf.begin(); it_i != ListVf.end(); ++it_i){
        for (it_j = ListVf.begin(); it_j != it_i; ++it_j){
            it_k = it_i->begin();
            for (it_k++; it_k != it_i->end();){
                string str1 = *it_k;
                string str2 = it_j->front(); 

                if (str1.substr(0,1)==str2){//If Pi = pja
                    it_m = it_j->begin();
                    for (it_m++; it_m != it_j->end(); it_m++){
                        it_i->insert(it_k,*it_m+str1.substr(1,str1.length()));
                    }
                    it_k = it_i->erase(it_k); 
                    for (int i = 0; i < it_j->size() - 1;i++){
                        it_k--;
                    }
                }
                else{
                    ++it_k;
                }
            }
        }
        bool flag = false;
        list<string>::iterator i = it_i->begin();
        string str2 = "";
        string str3 = "";
        for (i++; i != it_i->end();++i){
            if (i->substr(0,1) == it_i->front()){//Scanning to determine if there is R->Ra...
                string s = i->substr(1, i->size());
                *i = s;
                str2 = s;
                flag = true; break;
            }
        }
        it_i->unique();//Duplicate removal
        if (flag){//Eliminate direct left recursion if it exists
            list<string>::iterator j = it_i->begin();
            str3 = it_i->front() + "'";//Record R'
            for (j++; j != it_i->end(); j++){
                *j = *j + it_i->front() + "'";
            }
            list<string> la;//Add R'Generative to Grammar
            la.push_back(str3);
            la.push_back(str2+str3);
            la.push_back("@");
            ListVf.push_back(la);
        }
    }
    cout << "Grammar after eliminating left recursion:" << endl;
    show();//Show values after eliminating left recursion
    cout << "----------------" << endl;
    return 0;
}
int removeLeftGene(){

    for (it_i = ListVf.begin(); it_i != ListVf.end(); ++it_i){
        list<string>::iterator i = it_i->begin();
        list<string>::iterator p = it_i->begin();
        i++; 
        list<string> lm;
        for (; i != it_i->end(); i++){
            p = i;
            if (i->at(0) > 'z' || i->at(0) < 'a')continue;
            for (p++; p != it_i->end(); p++){
                if (p->at(0) == i->at(0)){
                    if (p->at(p->size() - 1) >= 'a'&&p->at(p->size() - 1) <= 'z') {//Last letter is lowercase
                        if (lm.empty()){

                            if (i->size() > 1){
                                lm.push_back(i->substr(1, i->size() + 1));
                            }
                            if (p->size() > 1){
                                lm.push_back(p->substr(1, i->size() + 1));
                            }
                            lm.push_back("@");
                            lm.push_back(it_i->front()+"^");//Didn't understand Push S^ from the back
                        }else
                            lm.push_back(*p);
                        i = it_i->erase(i);
                        *i = i->substr(0, 1) + it_i->front() + "^";
                    }
                    else{ continue; }
                }
                else{ continue; }
            }
            if (!lm.empty()){
                string s = lm.back();
                lm.pop_back();
                lm.push_front(s);
                ListVf.push_back(lm);
            }
        }
    }
    cout << "Grammar after extracting the left factor:" << endl;
    show();//Display grammar after extracting yesterday factor
    cout << "----------------" << endl;
    return 0;
}
int main(){
    string filename = "./test.txt";
    infile(filename);
    removeLeftRecursion();
    removeLeftGene();
    closefile();
    return 0;
}

This time, the list container in C++ is mainly used, but its main interface is still unfamiliar.The C++ module will be studied and introduced in detail later.

Posted by phpbaby2009 on Sun, 07 Jul 2019 09:06:41 -0700