From ba7213e1847cc75bc93c5337ab4554298991f1f2 Mon Sep 17 00:00:00 2001 From: Mike Nolan Date: Fri, 23 Jan 2026 05:02:24 -0600 Subject: [PATCH] Implement meta tag --- include/CrossLang.hpp | 14 +- src/compiler/codegen.cpp | 367 +++++++++++++++++++++++++++++++++++++++ src/compiler/parser.cpp | 26 ++- src/vm/filereader.cpp | 186 +++++++++++++++++++- src/vm/vm.cpp | 50 ++++++ 5 files changed, 631 insertions(+), 12 deletions(-) diff --git a/include/CrossLang.hpp b/include/CrossLang.hpp index ee811b7..12fe616 100644 --- a/include/CrossLang.hpp +++ b/include/CrossLang.hpp @@ -871,8 +871,10 @@ class CodeGen { std::vector, std::vector>> chunks; std::vector,uint32_t>> funcs; std::vector classes; - + std::vector> meta; + SyntaxNode OptimizeNode(SyntaxNode n); + void WriteMetadataObject(std::vector& bytes, SyntaxNode n); void GenNode(std::vector& instructions, SyntaxNode n,int32_t scope, int32_t contscope, int32_t brkscope, int32_t contI, int32_t brkI); void GenPop(std::vector& instrs,SyntaxNode n); public: @@ -1264,6 +1266,11 @@ constexpr std::string_view NullCoalescingExpression = "nullCoalescingExpression" * @brief For debugging (store line info and filename) */ constexpr std::string_view LineNode="lineNode"; +/** + * @brief For storing generic metadata + */ +constexpr std::string_view MetadataStatement = "MetadataStatement"; + /** * @brief Advanced AST node * @@ -1532,7 +1539,7 @@ class GC { TObject value; }; - + class TDictionary; class TFile : public THeapObject { public: @@ -1547,6 +1554,7 @@ class GC { std::vector> dependencies; std::vector> tools; std::vector>> sections; + std::vector>> metadata; std::vector> resources; std::vector classes; std::string name; @@ -1564,6 +1572,8 @@ class GC { void Mark(); void EnsureCanRunInCrossLang(); + + TDictionary* DecodeMetadata(GCList& ls, size_t index); }; class TAssociativeArray : public THeapObject { diff --git a/src/compiler/codegen.cpp b/src/compiler/codegen.cpp index 567af5b..efd5efe 100644 --- a/src/compiler/codegen.cpp +++ b/src/compiler/codegen.cpp @@ -33,6 +33,350 @@ namespace Tesses::CrossLang } } + SyntaxNode CodeGen::OptimizeNode(SyntaxNode n) + { + if(std::holds_alternative(n)) + { + auto& asn = std::get(n); + if(asn.nodeName == AddExpression && asn.nodes.size() == 2) + { + auto leftNode = OptimizeNode(asn.nodes[0]); + auto rightNode = OptimizeNode(asn.nodes[1]); + + + if(std::holds_alternative(leftNode) && std::holds_alternative(rightNode)) + { + return std::get(leftNode) + std::get(rightNode); + } + if(std::holds_alternative(leftNode) && std::holds_alternative(rightNode)) + { + return std::get(leftNode) + std::get(rightNode); + } + if(std::holds_alternative(leftNode) && std::holds_alternative(rightNode)) + { + return (double)std::get(leftNode) + std::get(rightNode); + } + if(std::holds_alternative(leftNode) && std::holds_alternative(rightNode)) + { + return std::get(leftNode) + (double)std::get(rightNode); + } + if(std::holds_alternative(leftNode) && std::holds_alternative(rightNode)) + { + return std::get(leftNode) + std::get(rightNode); + } + if(std::holds_alternative(leftNode) && std::holds_alternative(rightNode)) + { + return std::get(leftNode) + std::get(rightNode); + } + if(std::holds_alternative(leftNode) && std::holds_alternative(rightNode)) + { + return std::get(leftNode) + std::get(rightNode); + } + } + if(asn.nodeName == SubExpression && asn.nodes.size() == 2) + { + auto leftNode = OptimizeNode(asn.nodes[0]); + auto rightNode = OptimizeNode(asn.nodes[1]); + + + if(std::holds_alternative(leftNode) && std::holds_alternative(rightNode)) + { + return std::get(leftNode) - std::get(rightNode); + } + if(std::holds_alternative(leftNode) && std::holds_alternative(rightNode)) + { + return std::get(leftNode) - std::get(rightNode); + } + if(std::holds_alternative(leftNode) && std::holds_alternative(rightNode)) + { + return (double)std::get(leftNode) - std::get(rightNode); + } + if(std::holds_alternative(leftNode) && std::holds_alternative(rightNode)) + { + return std::get(leftNode) - (double)std::get(rightNode); + } + } + if(asn.nodeName == CommaExpression && asn.nodes.size() == 2) + { + return AdvancedSyntaxNode::Create(CommaExpression,true, { + OptimizeNode(asn.nodes[0]), + OptimizeNode(asn.nodes[1]) + }); + } + if(asn.nodeName == ScopeNode) + { + if(asn.nodes.empty()) + { + asn.nodeName = NodeList; + return asn; + } + else + { + for(auto& item : asn.nodes) + { + item = OptimizeNode(item); + } + } + } + } + return n; + } + + /* + 0: false, + 1: true, + 2: null, + 3: Long, + 4: Double, + 5: Char + 6: String, + 7: List, + 8: Dictionary, + 9: ByteArray (embed), + 10: Stream (embedstrm), + 11: VFS (embeddir), + 12: ClosureOfEmbedStream (used by embeddir) + */ + + void CodeGen::WriteMetadataObject(std::vector& bytes, SyntaxNode n) + { + if(std::holds_alternative(n)) + { + bytes.push_back(std::get(n) ? 1 : 0); + return; + } + if(std::holds_alternative(n)) + { + bytes.push_back(2); + return; + } + if(std::holds_alternative(n)) + { + auto num = std::get(n); + bytes.push_back(3); + size_t offset = bytes.size(); + bytes.resize(offset+8); + BitConverter::FromUint64BE(bytes[offset],num); + return; + } + if(std::holds_alternative(n)) + { + auto num = std::get(n); + bytes.push_back(4); + size_t offset = bytes.size(); + bytes.resize(offset+8); + BitConverter::FromDoubleBE(bytes[offset],num); + return; + } + if(std::holds_alternative(n)) + { + auto chr = std::get(n); + bytes.push_back(5); + bytes.push_back((uint8_t)chr); + return; + } + if(std::holds_alternative(n)) + { + auto& str = std::get(n); + bytes.push_back(6); + size_t offset = bytes.size(); + bytes.resize(offset+4); + BitConverter::FromUint32BE(bytes[offset], GetString(str)); + return; + } + if(std::holds_alternative(n)) + { + auto& asn = std::get(n); + if(asn.nodeName == ArrayExpression) + { + std::vector itms; + if(asn.nodes.size() > 0) + + GetFunctionArgs(itms,asn.nodes[0]); + bytes.push_back(7); + size_t offset = bytes.size(); + bytes.resize(offset+4); + BitConverter::FromUint32BE(bytes[offset], (uint32_t)itms.size()); + for(auto& item : itms) + { + WriteMetadataObject(bytes,item); + } + return; + } + if(asn.nodeName == DictionaryExpression) + { + std::vector itms; + if(asn.nodes.size() > 0) + + GetFunctionArgs(itms,asn.nodes[0]); + bytes.push_back(8); + size_t offset = bytes.size(); + bytes.resize(offset+4); + BitConverter::FromUint32BE(bytes[offset], (uint32_t)itms.size()); + for(auto& item : itms) + { + if(std::holds_alternative(item)) + { + auto tkn = std::get(item); + if(tkn.nodeName == GetVariableExpression && !tkn.nodes.empty()) + { + if(std::holds_alternative(tkn.nodes[0])) + { + size_t offset2 = bytes.size(); + bytes.resize(offset2+4); + BitConverter::FromUint32BE(bytes[offset2], GetString(std::get(tkn.nodes[0]))); + bytes.push_back(2); + + } + else + { + + size_t offset2 = bytes.size(); + bytes.resize(offset2+4); + BitConverter::FromUint32BE(bytes[offset2], GetString("__unknown")); + bytes.push_back(2); + + } + } + else if(tkn.nodeName == AssignExpression && tkn.nodes.size()==2 && std::holds_alternative(tkn.nodes[0])) + { + auto myTn = std::get(tkn.nodes[0]); + if(myTn.nodeName == GetVariableExpression && !myTn.nodes.empty()) + { + if(std::holds_alternative(myTn.nodes[0])) + { + size_t offset2 = bytes.size(); + bytes.resize(offset2+4); + BitConverter::FromUint32BE(bytes[offset2], GetString(std::get(myTn.nodes[0]))); + WriteMetadataObject(bytes,tkn.nodes[1]); + + } + else + { + + size_t offset2 = bytes.size(); + bytes.resize(offset2+4); + BitConverter::FromUint32BE(bytes[offset2], GetString("__unknown")); + bytes.push_back(2); + + } + } + else + { + + size_t offset2 = bytes.size(); + bytes.resize(offset2+4); + BitConverter::FromUint32BE(bytes[offset2], GetString("__unknown")); + bytes.push_back(2); + + } + } + else + { + + size_t offset2 = bytes.size(); + bytes.resize(offset2+4); + BitConverter::FromUint32BE(bytes[offset2], GetString("__unknown")); + bytes.push_back(2); + + } + } + } + return; + } + if(asn.nodeName == EmbedExpression) + { + if(!asn.nodes.empty() && std::holds_alternative(asn.nodes[0])) + { + auto& filename = std::get(asn.nodes[0]); + bytes.push_back(9); + size_t offset = bytes.size(); + bytes.resize(offset+4); + BitConverter::FromUint32BE(bytes[offset], GetResource(std::make_shared(filename))); + return; + } + } + if(asn.nodeName == EmbedStreamExpression) + { + if(!asn.nodes.empty() && std::holds_alternative(asn.nodes[0])) + { + auto& filename = std::get(asn.nodes[0]); + bytes.push_back(10); + size_t offset = bytes.size(); + bytes.resize(offset+4); + BitConverter::FromUint32BE(bytes[offset], GetResource(std::make_shared(filename))); + return; + } + } + if(asn.nodeName == EmbedDirectoryExpression) + { + if(!asn.nodes.empty() && std::holds_alternative(asn.nodes[0])) + { + auto& filename = std::get(asn.nodes[0]); + bytes.push_back(11); + + + std::function embedDir; + embedDir = [&](Tesses::Framework::Filesystem::VFSPath path)-> void { + bytes.push_back(8); + std::vector> entries; + if(embedFS != nullptr && embedFS->DirectoryExists(path)) + for(auto& item : embedFS->EnumeratePaths(path)) + { + if(embedFS->DirectoryExists(item)) + entries.emplace_back(item,true); + else if(embedFS->FileExists(item)) + entries.emplace_back(item,false); + + + /*GenNode(instructions,item.GetFileName(),scope,contscope,brkscope,contI,brkI); + if(embedFS->DirectoryExists(item)) + { + embedDir(item); + } + else if(embedFS->RegularFileExists(item)) + { + auto ce = AdvancedSyntaxNode::Create(ClosureExpression,true,{ + AdvancedSyntaxNode::Create(ParenthesesExpression,true,{}), + AdvancedSyntaxNode::Create(ReturnStatement,false,{ + AdvancedSyntaxNode::Create(EmbedStreamExpression,true,{item.ToString()}) + }) + }); + GenNode(instructions,ce,scope,contscope,brkscope,contI,brkI); + } + else { + instructions.push_back(new SimpleInstruction(PUSHUNDEFINED)); + } + + instructions.push_back(new SimpleInstruction(APPENDDICT));*/ + } + size_t offset = bytes.size(); + bytes.resize(offset+4); + BitConverter::FromUint32BE(bytes[offset], (uint32_t)entries.size()); + for(auto& item : entries) + { + offset = bytes.size(); + bytes.resize(offset+4); + BitConverter::FromUint32BE(bytes[offset],GetString(item.first.GetFileName())); + if(item.second) + embedDir(item.first); + else { + bytes.push_back(12); + offset = bytes.size(); + bytes.resize(offset+4); + bytes.resize(offset+4); + BitConverter::FromUint32BE(bytes[offset],GetResource(std::make_shared(item.first.ToString()))); + } + } + }; + embedDir(filename); + return; + } + } + } + + bytes.push_back(2); + } void CodeGen::Save(std::shared_ptr stream) { @@ -56,6 +400,10 @@ namespace Tesses::CrossLang GetString(tool.first); sections++; } + for(auto& meta : this->meta) + { + sections++; + } if(!this->icon.empty()) { this->GetResource(std::make_shared(this->icon)); @@ -246,6 +594,13 @@ namespace Tesses::CrossLang WriteInt(stream,this->GetResource(std::make_shared(this->icon))); } + for(auto& meta : this->meta) + { + memcpy(buffer,"META", 4); + Write(stream,buffer,4); + WriteInt(stream,(uint32_t)meta.size()); + Write(stream,meta.data(),meta.size()); + } } @@ -1380,6 +1735,17 @@ namespace Tesses::CrossLang instructions.push_back(new EmbedInstruction(GetResource(std::make_shared(filename)))); } + else if(adv.nodeName == MetadataStatement && adv.nodes.size() == 2 && std::holds_alternative(adv.nodes[0])) + { + auto& name = std::get(adv.nodes[0]); + + + auto& metabytes= this->meta.emplace_back(); + metabytes.resize(4); + BitConverter::FromUint32BE(metabytes[0],GetString(name)); + + WriteMetadataObject(metabytes, adv.nodes[1]); + } else if(adv.nodeName == EmbedStreamExpression && adv.nodes.size() == 1 && std::holds_alternative(adv.nodes[0])) { std::string filename = std::get(adv.nodes[0]); @@ -1446,6 +1812,7 @@ namespace Tesses::CrossLang } else if(adv.nodeName == ScopeNode) { + scope++; instructions.push_back(new SimpleInstruction(SCOPEBEGIN)); for(size_t i = 0; i < adv.nodes.size(); i++) diff --git a/src/compiler/parser.cpp b/src/compiler/parser.cpp index 057b9d0..6aedb71 100644 --- a/src/compiler/parser.cpp +++ b/src/compiler/parser.cpp @@ -1456,7 +1456,7 @@ namespace Tesses::CrossLang EnsureSymbol("("); SyntaxNode list = ParseExpression(); SyntaxNode body = nullptr; - if(IsSymbol(":")) + if(IsSymbol(":") || IsIdentifier("in")) { item = list; list = ParseExpression(); @@ -1467,6 +1467,11 @@ namespace Tesses::CrossLang { body = ParseNode(); } + if(std::holds_alternative(item)) + item = AdvancedSyntaxNode::Create(DeclareExpression,true, { + AdvancedSyntaxNode::Create(GetVariableExpression,true,{"item"}) + }); + return AdvancedSyntaxNode::Create(EachStatement,false,{item,list,body}); } if(IsIdentifier("class")) @@ -1681,6 +1686,25 @@ namespace Tesses::CrossLang }); } } + if(IsIdentifier("meta")) + { + if(i >= tokens.size() || tokens[i].type != LexTokenType::String) + { + std::cout << "WARN: meta is a conditional keyword,\nif you suffix it with a string, it will be assumed to be a metadata tag" << std::endl; + i--; + } + else { + std::string name = tokens[i++].text; + EnsureSymbol("{"); + auto expr = ParseExpression(); + + + EnsureSymbol("}"); + + return AdvancedSyntaxNode::Create(MetadataStatement,false,{name, AdvancedSyntaxNode::Create(DictionaryExpression,true,{expr})}); + } + + } if(IsIdentifier("func")) { diff --git a/src/vm/filereader.cpp b/src/vm/filereader.cpp index dcb76af..38716e5 100644 --- a/src/vm/filereader.cpp +++ b/src/vm/filereader.cpp @@ -118,6 +118,167 @@ namespace Tesses::CrossLang throw VMException(errorMessage); } + TDictionary* TFile::DecodeMetadata(GCList& ls, size_t midx) + { + if(midx >= this->metadata.size()) return nullptr; + if(this->metadata[midx].second.empty()) return nullptr; + if(this->metadata[midx].second[0] != 8) return nullptr; + size_t index = 0; + auto& bytes = this->metadata[midx].second; + + std::function parseEnt; + parseEnt = [&]()->TObject { + if(index >= bytes.size()) throw std::out_of_range("Abrupt end of metadata"); + switch(bytes[index++]) + { + case 0: + return false; + case 1: + return true; + case 2: + return nullptr; + case 3: + { + if(index + 8 <= bytes.size()) + { + auto val= BitConverter::ToUint64BE(bytes[index]); + index+=8; + int64_t val2; + memcpy(&val2,&val,sizeof(val)); + return val2; + } + else throw std::out_of_range("Abrupt end of metadata"); + } + break; + case 4: + { + if(index + 8 <= bytes.size()) + { + auto val= BitConverter::ToDoubleBE(bytes[index]); + index+=8; + return val; + } + else throw std::out_of_range("Abrupt end of metadata"); + } + break; + case 5: + + if(index + 1 <= bytes.size()) + { + return (char)bytes[index++]; + } + else throw std::out_of_range("Abrupt end of metadata"); + break; + case 6: + { + if(index + 4 <= bytes.size()) + { + auto val= BitConverter::ToUint32BE(bytes[index]); + index+=4; + return this->strings.at((size_t)val); + } else throw std::out_of_range("Abrupt end of metadata"); + + } + break; + case 7: + { + if(index + 4 <= bytes.size()) + { + auto val= BitConverter::ToUint32BE(bytes[index]); + index+=4; + std::vector items; + for(uint32_t i = 0; i < val; i++) + { + items.push_back(parseEnt()); + } + + return TList::Create(ls,items.begin(),items.end()); + } else throw std::out_of_range("Abrupt end of metadata"); + } + break; + case 8: + if(index + 4 <= bytes.size()) + { + auto val= BitConverter::ToUint32BE(bytes[index]); + index+=4; + std::vector items; + + for(uint32_t i = 0; i < val; i++) + { + if(index + 4 <= bytes.size()) + { + auto val2= BitConverter::ToUint32BE(bytes[index]); + index+=4; + std::string& text=this->strings.at((size_t)val2); + items.emplace_back(text,parseEnt()); + } else throw std::out_of_range("Abrupt end of metadata"); + + } + + return TDictionary::Create(ls, items.begin(),items.end()); + }else throw std::out_of_range("Abrupt end of metadata"); + break; + case 9: + { + if(index + 4 <= bytes.size()) + { + auto val= BitConverter::ToUint32BE(bytes[index]); + index+=4; + auto ba = TByteArray::Create(ls); + ba->data = this->resources.at((size_t)val); + return ba; + } else throw std::out_of_range("Abrupt end of metadata"); + } + break; + case 10: + { + if(index + 4 <= bytes.size()) + { + auto val= BitConverter::ToUint32BE(bytes[index]); + index+=4; + return std::make_shared(ls.GetGC(),this,val); + } else throw std::out_of_range("Abrupt end of metadata"); + } + break; + case 11: + { + auto data = parseEnt(); + TDictionary* dict; + if(GetObjectHeap(data,dict)) + { + return std::make_shared(ls.GetGC(),dict); + } + else return Undefined(); + } + break; + case 12: + { + if(index + 4 <= bytes.size()) + { + auto val= BitConverter::ToUint32BE(bytes[index]); + index+=4; + ls.GetGC()->BarrierBegin(); + auto em = TExternalMethod::Create(ls,"",{},[val,this](GCList& ls, std::vector args)->TObject { + return std::make_shared(ls.GetGC(),this,val); + }); + em->watch.push_back(this); + ls.GetGC()->BarrierEnd(); + + return em; + } else throw std::out_of_range("Abrupt end of metadata"); + } + break; + default: + throw std::runtime_error("Invalid metadata opcode"); + } + }; + + TDictionary* dict_res; + TObject ent = parseEnt(); + if(GetObjectHeap(ent, dict_res)) return dict_res; + return nullptr; + } + void TFile::Load(GC* gc, std::shared_ptr stream) { @@ -166,10 +327,8 @@ namespace Tesses::CrossLang } else if(strncmp(table_name,"RESO",4) == 0) //resources (using embed) { - std::vector data; - data.resize(tableLen); - Ensure(stream,data.data(), tableLen); - this->resources.push_back(data); + auto& data = this->resources.emplace_back(tableLen); + Ensure(stream,data.data(), data.size()); } else if(strncmp(table_name,"CHKS",4) == 0 && gc != nullptr) //chunks { @@ -268,13 +427,22 @@ namespace Tesses::CrossLang this->classes.push_back(cls); } } + else if(strncmp(table_name,"META",4) == 0) //structured metadata + { + if(tableLen > 4) + { + auto name = this->GetString(stream); + auto& data = this->metadata.emplace_back(name, std::vector(tableLen-4)); + Ensure(stream,data.second.data(), tableLen-4); + } + else throw VMException("meta tag is not valid"); + + } else { - std::vector data; - data.resize(tableLen); - Ensure(stream,data.data(), tableLen); - std::string key(std::string(table_name), 4); - this->sections.push_back(std::pair>(key,data)); + auto& data = this->sections.emplace_back(std::string(table_name, 4), std::vector(tableLen)); + Ensure(stream,data.second.data(), tableLen); + } } diff --git a/src/vm/vm.cpp b/src/vm/vm.cpp index 86cc3e1..30090de 100644 --- a/src/vm/vm.cpp +++ b/src/vm/vm.cpp @@ -4328,7 +4328,31 @@ namespace Tesses::CrossLang { auto cls = dynamic_cast(obj); auto aArray=dynamic_cast(obj); auto ttask = dynamic_cast(obj); + auto file = dynamic_cast(obj); + if(file != nullptr) + { + if(key == "MetadataDecode") + { + int64_t index; + if(GetArgument(args,0, index) && (size_t)index < file->metadata.size()) + { + cse.back()->Push(gc,file->DecodeMetadata(ls,(size_t)index)); + return false; + } + } + if(key == "MetadataName") + { + int64_t index; + if(GetArgument(args,0, index) && (size_t)index < file->metadata.size()) + { + cse.back()->Push(gc,file->metadata.at((size_t)index).first); + return false; + } + } + cse.back()->Push(gc,Undefined()); + return false; + } if(ttask != nullptr) { if(key == "ContinueWith") @@ -6017,6 +6041,32 @@ namespace Tesses::CrossLang { cse.back()->Push(gc, list); return false; } + else if(key == "MetadataCount") + { + + cse.back()->Push(gc, (int64_t)file->metadata.size()); + return false; + } + else if(key == "Metadata") + { + TList* meta = TList::Create(ls); + gc->BarrierBegin(); + for(size_t i = 0; i < file->metadata.size(); i++) + { + meta->Add( + TDictionary::Create(ls, + { + TDItem("Name", file->metadata[i].first), + TDItem("Index",(int64_t)i) + } + ) + ); + } + gc->BarrierEnd(); + + cse.back()->Push(gc, meta); + return false; + } else if(key == "Sections") { TList* sections = TList::Create(ls);