mirror of
https://onedev.site.tesses.net/crosslang
synced 2026-04-17 14:07:03 +00:00
695 lines
22 KiB
C++
695 lines
22 KiB
C++
#include "CrossLang.hpp"
|
|
#include <iostream>
|
|
#include <sstream>
|
|
namespace Tesses::CrossLang
|
|
{
|
|
std::string EscapeString(std::string text,bool quote)
|
|
{
|
|
std::string str={};
|
|
if(quote) str.push_back('\"');
|
|
for(auto item : text)
|
|
{
|
|
if(item == '\\' || item == '\"' || item == '\'')
|
|
{
|
|
str.push_back('\\');
|
|
str.push_back(item);
|
|
}
|
|
else if(item == '\n')
|
|
{
|
|
str.push_back('\\');
|
|
str.push_back('n');
|
|
}
|
|
else if(item == '\r')
|
|
{
|
|
str.push_back('\\');
|
|
str.push_back('r');
|
|
}
|
|
else if(item == '\t')
|
|
{
|
|
str.push_back('\\');
|
|
str.push_back('t');
|
|
}
|
|
else if(item == '\f')
|
|
{
|
|
str.push_back('\\');
|
|
str.push_back('f');
|
|
}
|
|
else if(item == '\0')
|
|
{
|
|
str.push_back('\\');
|
|
str.push_back('0');
|
|
}
|
|
else if(item == '\b')
|
|
{
|
|
str.push_back('\\');
|
|
str.push_back('b');
|
|
}
|
|
else if(item == '\a')
|
|
{
|
|
str.push_back('\\');
|
|
str.push_back('a');
|
|
}
|
|
else if(item == '\v')
|
|
{
|
|
str.push_back('\\');
|
|
str.push_back('v');
|
|
}
|
|
else if(item == '\x1B')
|
|
{
|
|
str.push_back('\\');
|
|
str.push_back('e');
|
|
}
|
|
else if((uint8_t)item < 32 || (uint8_t)item > 126)
|
|
{
|
|
str.append("\\x");
|
|
str.push_back(Tesses::Framework::Http::HttpUtils::NibbleToHex(((uint8_t)item >> 4)&0x0F));
|
|
str.push_back(Tesses::Framework::Http::HttpUtils::NibbleToHex((uint8_t)item & 0x0F));
|
|
}
|
|
else
|
|
{
|
|
str.push_back(item);
|
|
}
|
|
}
|
|
|
|
if(quote) str.push_back('\"');
|
|
return str;
|
|
}
|
|
|
|
void LexTokenLineInfo::Subtract(size_t len)
|
|
{
|
|
this->offset -= len;
|
|
this->column -= len;
|
|
}
|
|
void LexTokenLineInfo::Add(int c)
|
|
{
|
|
this->offset++;
|
|
switch(c)
|
|
{
|
|
case ' ':
|
|
this->column++;
|
|
break;
|
|
case '\n':
|
|
this->column=1;
|
|
this->line++;
|
|
break;
|
|
case '\t':
|
|
this->column += 4;
|
|
break;
|
|
case '\r':
|
|
this->column++;
|
|
break;
|
|
default:
|
|
this->column++;
|
|
break;
|
|
}
|
|
}
|
|
|
|
int Lex(std::string filename, std::istream& strm, std::vector<LexToken>& tokens)
|
|
{
|
|
int _peeked=-1;
|
|
|
|
auto Read = [&_peeked,&strm]()->int {
|
|
if(_peeked > -1)
|
|
{
|
|
int _peek2 = _peeked;
|
|
_peeked=-1;
|
|
return _peek2;
|
|
}
|
|
uint8_t b;
|
|
|
|
strm.read((char*)&b,1);
|
|
if(strm.eof()) return -1;
|
|
return b;
|
|
};
|
|
|
|
auto Peek = [&_peeked,Read]()->int {
|
|
if(_peeked > -1) return _peeked;
|
|
_peeked = Read();
|
|
return _peeked;
|
|
};
|
|
|
|
int read;
|
|
int peek;
|
|
|
|
std::string buffer={};
|
|
|
|
LexTokenLineInfo lineInfo;
|
|
|
|
lineInfo.filename = filename;
|
|
lineInfo.column = 1;
|
|
lineInfo.line = 1;
|
|
lineInfo.offset = 0;
|
|
|
|
std::string whiteSpaceCharsBefore="";
|
|
|
|
auto Flush = [&buffer,&tokens,&lineInfo,&whiteSpaceCharsBefore]() -> void {
|
|
if(!buffer.empty())
|
|
{
|
|
LexToken token;
|
|
token.text = buffer;
|
|
token.whiteSpaceCharsBefore = whiteSpaceCharsBefore;
|
|
token.type = LexTokenType::Identifier;
|
|
token.lineInfo = lineInfo;
|
|
token.lineInfo.Subtract(buffer.size());
|
|
tokens.push_back(token);
|
|
buffer.clear();
|
|
|
|
whiteSpaceCharsBefore="";
|
|
}
|
|
};
|
|
|
|
auto Symbol = [&tokens,&lineInfo,&whiteSpaceCharsBefore](std::initializer_list<int> chrs)-> void {
|
|
LexToken token;
|
|
|
|
token.type = LexTokenType::Symbol;
|
|
token.lineInfo = lineInfo;
|
|
|
|
token.whiteSpaceCharsBefore=whiteSpaceCharsBefore;
|
|
|
|
token.text.reserve(chrs.size());
|
|
|
|
for(auto i : chrs)
|
|
token.text.push_back((char)i);
|
|
|
|
tokens.push_back(token);
|
|
|
|
whiteSpaceCharsBefore="";
|
|
};
|
|
|
|
auto ReadChr = [&lineInfo, &strm, Read]() -> std::pair<int,bool> {
|
|
int read=Read();
|
|
lineInfo.Add(read);
|
|
|
|
if(read == -1)
|
|
{
|
|
|
|
return std::pair<int,bool>(-1,false);
|
|
}
|
|
|
|
|
|
|
|
if(read == '\\')
|
|
{
|
|
read = Read();
|
|
lineInfo.Add(read);
|
|
if(read == -1)
|
|
{
|
|
return std::pair<int,bool>(-1,true);
|
|
}
|
|
else if(read == 'n')
|
|
{
|
|
return std::pair<int,bool>('\n',true);
|
|
}
|
|
else if(read == 'r')
|
|
{
|
|
return std::pair<int,bool>('\r',true);
|
|
}
|
|
else if(read == 'f')
|
|
{
|
|
return std::pair<int,bool>('\f',true);
|
|
}
|
|
else if(read == 'b')
|
|
{
|
|
return std::pair<int,bool>('\b',true);
|
|
}
|
|
else if(read == 'a')
|
|
{
|
|
return std::pair<int,bool>('\a',true);
|
|
}
|
|
else if(read == '0')
|
|
{
|
|
return std::pair<int,bool>('\0',true);
|
|
}
|
|
else if(read == 'v')
|
|
{
|
|
return std::pair<int,bool>('\v',true);
|
|
}
|
|
else if(read == 'e')
|
|
{
|
|
return std::pair<int,bool>('\x1B',true);
|
|
}
|
|
else if(read == 't')
|
|
{
|
|
return std::pair<int,bool>('\t',true);
|
|
}
|
|
else if(read == 'x')
|
|
{
|
|
int r1 = Read();
|
|
lineInfo.Add(r1);
|
|
if(r1 == -1)
|
|
{
|
|
return std::pair<int,bool>(-1,true);
|
|
}
|
|
int r2 = Read();
|
|
lineInfo.Add(r2);
|
|
if(r2 == -1)
|
|
{
|
|
return std::pair<int,bool>(-1,true);
|
|
}
|
|
|
|
uint8_t c = (uint8_t)std::stoi(std::string{(char)r1,(char)r2},nullptr,16);
|
|
|
|
return std::pair<int,bool>(c,true);
|
|
}
|
|
else
|
|
{
|
|
return std::pair<int,bool>(read,true);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
return std::pair<int,bool>(read,false);
|
|
}
|
|
};
|
|
|
|
|
|
auto ParseString = [&lineInfo, &strm, Read, ReadChr,&tokens](bool interopolated)->int {
|
|
auto lI = lineInfo;
|
|
|
|
std::string b={};
|
|
|
|
|
|
auto rChr = ReadChr();
|
|
lineInfo.Add(rChr.first);
|
|
|
|
while(rChr.first != '\"' || rChr.second)
|
|
{
|
|
if(rChr.first == -1) return lineInfo.line;
|
|
|
|
b.push_back((char)rChr.first);
|
|
rChr = ReadChr();
|
|
lineInfo.Add(rChr.first);
|
|
}
|
|
|
|
if(interopolated)
|
|
{
|
|
int e = 0;
|
|
int escapeI = 0;
|
|
std::string b2 = {};
|
|
|
|
for(size_t i = 0; i< b.size();i++)
|
|
{
|
|
if(b[i] == '{')
|
|
{
|
|
if((i+1 < b.size() && b[i+1] != '{') || escapeI >= 1)
|
|
{
|
|
if(b2.size() > 0 && escapeI < 1)
|
|
{
|
|
if(e > 0)
|
|
{
|
|
LexToken _tkn;
|
|
_tkn.type = LexTokenType::Symbol;
|
|
_tkn.text = "+";
|
|
_tkn.lineInfo = lI;
|
|
tokens.push_back(_tkn);
|
|
}
|
|
LexToken _tkn2;
|
|
_tkn2.type = LexTokenType::String;
|
|
_tkn2.text = b2;
|
|
_tkn2.lineInfo = lI;
|
|
tokens.push_back(_tkn2);
|
|
b2.clear();
|
|
e++;
|
|
|
|
}
|
|
escapeI++;
|
|
if(escapeI > 1)
|
|
{
|
|
b2.push_back('{');
|
|
}
|
|
}
|
|
else
|
|
{
|
|
b2.push_back('{');
|
|
i++;
|
|
}
|
|
}
|
|
else if(b[i] == '}')
|
|
{
|
|
if(escapeI >= 1)
|
|
{
|
|
escapeI--;
|
|
if(b2.size() > 0 && escapeI == 0)
|
|
{
|
|
if(e > 0)
|
|
{
|
|
LexToken _tkn;
|
|
_tkn.type = LexTokenType::Symbol;
|
|
_tkn.text = "+";
|
|
_tkn.lineInfo = lI;
|
|
tokens.push_back(_tkn);
|
|
}
|
|
LexToken _tkn2;
|
|
_tkn2.type = LexTokenType::Symbol;
|
|
_tkn2.text = "(";
|
|
_tkn2.lineInfo = lI;
|
|
tokens.push_back(_tkn2);
|
|
std::stringstream strm2(b2,std::ios_base::in | std::ios_base::binary);
|
|
int res = Lex("lexGen", strm2, tokens);
|
|
if(res != 0) return res;
|
|
|
|
_tkn2.text = ")";
|
|
tokens.push_back(_tkn2);
|
|
|
|
_tkn2.text = ".";
|
|
tokens.push_back(_tkn2);
|
|
_tkn2.type = LexTokenType::Identifier;
|
|
_tkn2.text = "ToString";
|
|
tokens.push_back(_tkn2);
|
|
_tkn2.type = LexTokenType::Symbol;
|
|
_tkn2.text = "(";
|
|
tokens.push_back(_tkn2);
|
|
_tkn2.text = ")";
|
|
tokens.push_back(_tkn2);
|
|
b2.clear();
|
|
e++;
|
|
}
|
|
if(escapeI >= 1)
|
|
{
|
|
b2.push_back('}');
|
|
}
|
|
}
|
|
} else {
|
|
b2.push_back(b[i]);
|
|
}
|
|
}
|
|
if(b2.size() > 0)
|
|
{
|
|
if(escapeI > 0)
|
|
{
|
|
if(e > 0)
|
|
{
|
|
LexToken _tkn;
|
|
_tkn.type = LexTokenType::Symbol;
|
|
_tkn.text = "+";
|
|
_tkn.lineInfo = lI;
|
|
tokens.push_back(_tkn);
|
|
}
|
|
LexToken _tkn2;
|
|
_tkn2.type = LexTokenType::Symbol;
|
|
_tkn2.text = "(";
|
|
_tkn2.lineInfo = lI;
|
|
tokens.push_back(_tkn2);
|
|
std::stringstream strm2(b2,std::ios_base::in | std::ios_base::binary);
|
|
int res = Lex("lexGen", strm2, tokens);
|
|
if(res != 0) return res;
|
|
|
|
_tkn2.text = ")";
|
|
tokens.push_back(_tkn2);
|
|
|
|
_tkn2.text = ".";
|
|
tokens.push_back(_tkn2);
|
|
_tkn2.type = LexTokenType::Identifier;
|
|
_tkn2.text = "ToString";
|
|
tokens.push_back(_tkn2);
|
|
_tkn2.type = LexTokenType::Symbol;
|
|
_tkn2.text = "(";
|
|
tokens.push_back(_tkn2);
|
|
_tkn2.text = ")";
|
|
tokens.push_back(_tkn2);
|
|
b2.clear();
|
|
e++;
|
|
|
|
}
|
|
else
|
|
{
|
|
if(e > 0)
|
|
{
|
|
LexToken _tkn;
|
|
_tkn.type = LexTokenType::Symbol;
|
|
_tkn.text = "+";
|
|
_tkn.lineInfo = lI;
|
|
tokens.push_back(_tkn);
|
|
}
|
|
LexToken _tkn2;
|
|
_tkn2.type = LexTokenType::String;
|
|
_tkn2.text = b2;
|
|
_tkn2.lineInfo = lI;
|
|
tokens.push_back(_tkn2);
|
|
b2.clear();
|
|
e++;
|
|
}
|
|
}
|
|
} else {
|
|
LexToken _tkn2;
|
|
_tkn2.type = LexTokenType::String;
|
|
_tkn2.text = b;
|
|
_tkn2.lineInfo = lI;
|
|
tokens.push_back(_tkn2);
|
|
|
|
}
|
|
return 0;
|
|
};
|
|
|
|
while((read = Read()) != -1)
|
|
{
|
|
|
|
peek = Peek();
|
|
|
|
|
|
switch(read)
|
|
{
|
|
case '$':
|
|
if(peek == '\"')
|
|
{
|
|
Flush();
|
|
lineInfo.Add(Read());
|
|
int re = ParseString(true);
|
|
if(re != 0) return re;
|
|
}
|
|
else
|
|
{
|
|
buffer.push_back('$');
|
|
}
|
|
break;
|
|
case '\"':
|
|
{
|
|
Flush();
|
|
int re = ParseString(false);
|
|
if(re != 0) return re;
|
|
}
|
|
break;
|
|
case '\'':
|
|
{
|
|
Flush();
|
|
auto res = ReadChr();
|
|
if(res.first == -1) return lineInfo.line;
|
|
int r = Read();
|
|
lineInfo.Add(r);
|
|
if(r != '\'')
|
|
return lineInfo.line;
|
|
LexToken token;
|
|
token.text = {(char)(uint8_t)res.first};
|
|
token.lineInfo = lineInfo;
|
|
token.type = LexTokenType::Char;
|
|
|
|
tokens.push_back(token);
|
|
}
|
|
break;
|
|
case '#':
|
|
Flush();
|
|
while(true)
|
|
{
|
|
int r = Read();
|
|
lineInfo.Add(r);
|
|
if(r == '\n' || r == -1) break;
|
|
}
|
|
break;
|
|
case '/':
|
|
if(peek == '/')
|
|
{
|
|
Flush();
|
|
while(true)
|
|
{
|
|
int r = Read();
|
|
lineInfo.Add(r);
|
|
if(r == '\n' || r == -1) break;
|
|
}
|
|
}
|
|
else if(peek == '*')
|
|
{
|
|
Flush();
|
|
while(true)
|
|
{
|
|
int r = Read();
|
|
lineInfo.Add(r);
|
|
if(r == -1)
|
|
{
|
|
return lineInfo.line;
|
|
}
|
|
if(r == '*')
|
|
{
|
|
r = Read();
|
|
lineInfo.Add(r);
|
|
if(r == -1)
|
|
return lineInfo.line;
|
|
if(r == '/')
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
else if(peek == '^')
|
|
{
|
|
Flush();
|
|
lineInfo.Add(Read());
|
|
std::string str={};
|
|
while(true)
|
|
{
|
|
int r = Read();
|
|
lineInfo.Add(r);
|
|
if(r == -1)
|
|
{
|
|
return lineInfo.line;
|
|
}
|
|
if(r == '^')
|
|
{
|
|
r = Read();
|
|
lineInfo.Add(r);
|
|
if(r == -1)
|
|
return lineInfo.line;
|
|
if(r == '^')
|
|
{
|
|
str.push_back('^');
|
|
continue;
|
|
}
|
|
if(r == '/')
|
|
break;
|
|
str.push_back('^');
|
|
}
|
|
str.push_back((char)r);
|
|
}
|
|
LexToken token;
|
|
|
|
token.type = LexTokenType::Documentation;
|
|
token.lineInfo = lineInfo;
|
|
token.text = str;
|
|
tokens.push_back(token);
|
|
}
|
|
else if(peek == '=')
|
|
{
|
|
Flush();
|
|
lineInfo.Add(Read());
|
|
Symbol({read,peek});
|
|
}
|
|
else
|
|
{
|
|
Flush();
|
|
Symbol({read});
|
|
}
|
|
break;
|
|
case '<':
|
|
case '>':
|
|
case '?':
|
|
if(peek == read)
|
|
{
|
|
Flush();
|
|
lineInfo.Add(Read());
|
|
int peek2=Peek();
|
|
if(peek2 == '=')
|
|
{
|
|
lineInfo.Add(Read());
|
|
Symbol({read,peek,peek2});
|
|
}
|
|
else
|
|
{
|
|
Symbol({read,peek});
|
|
}
|
|
}
|
|
else if(peek == '=')
|
|
{
|
|
Flush();
|
|
lineInfo.Add(Read());
|
|
Symbol({read,peek});
|
|
}
|
|
else
|
|
{
|
|
Flush();
|
|
Symbol({read});
|
|
}
|
|
break;
|
|
case '+':
|
|
case '-':
|
|
case '|':
|
|
case '&':
|
|
if(peek == '=' || peek == read)
|
|
{
|
|
Flush();
|
|
lineInfo.Add(Read());
|
|
Symbol({read,peek});
|
|
}
|
|
else
|
|
{
|
|
Flush();
|
|
Symbol({read});
|
|
}
|
|
break;
|
|
case '=':
|
|
if(peek == '>')
|
|
{
|
|
Flush();
|
|
lineInfo.Add(Read());
|
|
Symbol({read,peek});
|
|
}
|
|
else if(peek == '=')
|
|
{
|
|
Flush();
|
|
lineInfo.Add(Read());
|
|
Symbol({read,peek});
|
|
}
|
|
else
|
|
{
|
|
Flush();
|
|
Symbol({read});
|
|
}
|
|
break;
|
|
case '^':
|
|
case '~':
|
|
case '!':
|
|
case '*':
|
|
case '%':
|
|
//*
|
|
//*=
|
|
if(peek == '=')
|
|
{
|
|
Flush();
|
|
lineInfo.Add(Read());
|
|
Symbol({read,peek});
|
|
}
|
|
else
|
|
{
|
|
Flush();
|
|
Symbol({read});
|
|
}
|
|
break;
|
|
case '(':
|
|
case ')':
|
|
case '[':
|
|
case ']':
|
|
case '{':
|
|
case '}':
|
|
case '.':
|
|
case ':':
|
|
case ';':
|
|
case ',':
|
|
Flush();
|
|
Symbol({read});
|
|
break;
|
|
case '\n':
|
|
case '\t':
|
|
case '\r':
|
|
case ' ':
|
|
Flush();
|
|
whiteSpaceCharsBefore += read;
|
|
|
|
break;
|
|
default:
|
|
buffer.push_back((char)read);
|
|
break;
|
|
}
|
|
|
|
lineInfo.Add(read);
|
|
}
|
|
Flush();
|
|
return 0;
|
|
}
|
|
} |