Mercurial > hg > orthanc-stone
view Resources/CodeGeneration/stonegentool.py @ 498:6d62fc8a6988 bgo-commands-codegen
Web demonstrator for codegen ongoing work
author | bgo-osimis |
---|---|
date | Sun, 24 Feb 2019 13:23:14 +0100 |
parents | 8b6ceae45ba0 |
children | ce49eae4c887 |
line wrap: on
line source
import json import yaml import re import os import sys from jinja2 import Template from io import StringIO import time """ 1 2 3 4 5 6 7 12345678901234567890123456789012345678901234567890123456789012345678901234567890 """ # see https://stackoverflow.com/a/2504457/2927708 def trim(docstring): if not docstring: return '' # Convert tabs to spaces (following the normal Python rules) # and split into a list of lines: lines = docstring.expandtabs().splitlines() # Determine minimum indentation (first line doesn't count): indent = sys.maxsize for line in lines[1:]: stripped = line.lstrip() if stripped: indent = min(indent, len(line) - len(stripped)) # Remove indentation (first line is special): trimmed = [lines[0].strip()] if indent < sys.maxsize: for line in lines[1:]: trimmed.append(line[indent:].rstrip()) # Strip off trailing and leading blank lines: while trimmed and not trimmed[-1]: trimmed.pop() while trimmed and not trimmed[0]: trimmed.pop(0) # Return a single string: return '\n'.join(trimmed) class GenCode: def __init__(self): # file-wide preamble (#include directives, comment...) self.cppPreamble = StringIO() self.cppEnums = StringIO() self.cppStructs = StringIO() self.cppDispatcher = StringIO() self.cppHandler = StringIO() # file-wide preamble (module directives, comment...) self.tsPreamble = StringIO() self.tsEnums = StringIO() self.tsStructs = StringIO() self.tsDispatcher = StringIO() self.tsHandler = StringIO() def FlattenToFiles(self, outputDir): raise NotImplementedError() class JsonHelpers: """A set of utilities to perform JSON operations""" @staticmethod def removeCommentsFromJsonContent(string): """ Remove comments from a JSON file Comments are not allowed in JSON but, i.e., Orthanc configuration files contains C++ like comments that we need to remove before python can parse the file """ # remove all occurrence streamed comments (/*COMMENT */) from string string = re.sub(re.compile("/\*.*?\*/", re.DOTALL), "", string) # remove all occurrence singleline comments (//COMMENT\n ) from string string = re.sub(re.compile("//.*?\n"), "", string) return string @staticmethod def loadJsonWithComments(path): """ Reads a JSON file that may contain C++ like comments """ with open(path, "r") as fp: fileContent = fp.read() fileContent = JsonHelpers.removeCommentsFromJsonContent(fileContent) return json.loads(fileContent) def LoadSchemaFromJson(filePath): return JsonHelpers.loadJsonWithComments(filePath) def CanonToCpp(canonicalTypename): # C++: prefix map vector and string with std::map, std::vector and # std::string # replace int32 by int32_t # replace float32 by float # replace float64 by double retVal = canonicalTypename retVal = retVal.replace("map", "std::map") retVal = retVal.replace("vector", "std::vector") retVal = retVal.replace("string", "std::string") retVal = retVal.replace("int32", "int32_t") retVal = retVal.replace("float32", "float") retVal = retVal.replace("float64", "double") return retVal def CanonToTs(canonicalTypename): # TS: replace vector with Array and map with Map # string remains string # replace int32 by number # replace float32 by number # replace float64 by number retVal = canonicalTypename retVal = retVal.replace("map", "Map") retVal = retVal.replace("vector", "Array") retVal = retVal.replace("int32", "number") retVal = retVal.replace("float32", "number") retVal = retVal.replace("float64", "number") retVal = retVal.replace("bool", "boolean") return retVal def NeedsTsConstruction(enums, tsType): if tsType == 'boolean': return False elif tsType == 'number': return False elif tsType == 'string': return False else: enumNames = [] for enum in enums: enumNames.append(enum['name']) if tsType in enumNames: return False return True def NeedsCppConstruction(canonTypename): return False def RegisterTemplateFunction(template,func): """Makes a function callable by a jinja2 template""" template.globals[func.__name__] = func return func def MakeTemplate(templateStr): template = Template(templateStr) RegisterTemplateFunction(template,CanonToCpp) RegisterTemplateFunction(template,CanonToTs) RegisterTemplateFunction(template,NeedsTsConstruction) RegisterTemplateFunction(template,NeedsCppConstruction) return template def MakeTemplateFromFile(templateFileName): templateFile = open(templateFileName, "r") templateFileContents = templateFile.read() return MakeTemplate(templateFileContents) templateFile.close() def EatToken(sentence): """splits "A,B,C" into "A" and "B,C" where A, B and C are type names (including templates) like "int32", "TotoTutu", or "map<map<int32,vector<string>>,map<string,int32>>" """ if sentence.count("<") != sentence.count(">"): raise Exception( "Error in the partial template type list " + str(sentence) + "." + " The number of < and > do not match!" ) # the template level we're currently in templateLevel = 0 for i in range(len(sentence)): if (sentence[i] == ",") and (templateLevel == 0): return (sentence[0:i], sentence[i + 1 :]) elif sentence[i] == "<": templateLevel += 1 elif sentence[i] == ">": templateLevel -= 1 return (sentence, "") def SplitListOfTypes(typename): """Splits something like vector<string>,int32,map<string,map<string,int32>> in: - vector<string> - int32 map<string,map<string,int32>> This is not possible with a regex so """ stillStuffToEat = True tokenList = [] restOfString = typename while stillStuffToEat: firstToken, restOfString = EatToken(restOfString) tokenList.append(firstToken) if restOfString == "": stillStuffToEat = False return tokenList templateRegex = \ re.compile(r"([a-zA-Z0-9_]*[a-zA-Z0-9_]*)<([a-zA-Z0-9_,:<>]+)>") def ParseTemplateType(typename): """ If the type is a template like "SOMETHING<SOME<THING,EL<SE>>>", then it returns (true,"SOMETHING","SOME<THING,EL<SE>>") otherwise it returns (false,"","")""" # let's remove all whitespace from the type # split without argument uses any whitespace string as separator # (space, tab, newline, return or formfeed) typename = "".join(typename.split()) matches = templateRegex.match(typename) if matches == None: return (False, "", []) else: m = matches assert len(m.groups()) == 2 # we need to split with the commas that are outside of the # defined types. Simply splitting at commas won't work listOfDependentTypes = SplitListOfTypes(m.group(2)) return (True, m.group(1), listOfDependentTypes) def ComputeOrderFromTypeTree( ancestors, genOrder, shortTypename, schema): if shortTypename in ancestors: raise Exception( "Cyclic dependency chain found: the last of " + str(ancestors) + + " depends on " + str(shortTypename) + " that is already in the list." ) if not (shortTypename in genOrder): (isTemplate, _, dependentTypenames) = ParseTemplateType(shortTypename) if isTemplate: # if it is a template, it HAS dependent types... They can be # anything (primitive, collection, enum, structs..). # Let's process them! for dependentTypename in dependentTypenames: # childAncestors = ancestors.copy() NO TEMPLATE ANCESTOR!!! # childAncestors.append(typename) ComputeOrderFromTypeTree( ancestors, genOrder, dependentTypename, schema ) else: # If it is not template, we are only interested if it is a # dependency that we must take into account in the dep graph, # i.e., a struct. if IsShortStructType(shortTypename, schema): struct = schema[GetLongTypename(shortTypename, schema)] # The keys in the struct dict are the member names # The values in the struct dict are the member types for field in struct.keys(): # we fill the chain of dependent types (starting here) ancestors.append(shortTypename) ComputeOrderFromTypeTree( ancestors, genOrder, struct[field], schema) # don't forget to restore it! ancestors.pop() # now we're pretty sure our dependencies have been processed, # we can start marking our code for generation (it might # already have been done if someone referenced us earlier) if not shortTypename in genOrder: genOrder.append(shortTypename) # +-----------------------+ # | Utility functions | # +-----------------------+ def IsShortStructType(typename, schema): fullStructName = "struct " + typename return (fullStructName in schema) def GetLongTypename(shortTypename, schema): if shortTypename.startswith("enum "): raise RuntimeError('shortTypename.startswith("enum "):') enumName = "enum " + shortTypename isEnum = enumName in schema if shortTypename.startswith("struct "): raise RuntimeError('shortTypename.startswith("struct "):') structName = "struct " + shortTypename isStruct = ("struct " + shortTypename) in schema if isEnum and isStruct: raise RuntimeError('Enums and structs cannot have the same name') if isEnum: return enumName if isStruct: return structName def IsTypename(fullName): return (fullName.startswith("enum ") or fullName.startswith("struct ")) def IsEnumType(fullName): return fullName.startswith("enum ") def IsStructType(fullName): return fullName.startswith("struct ") def GetShortTypename(fullTypename): if fullTypename.startswith("struct "): return fullTypename[7:] elif fullTypename.startswith("enum"): return fullTypename[5:] else: raise RuntimeError \ ('fullTypename should start with either "struct " or "enum "') def CheckSchemaSchema(schema): if not "rootName" in schema: raise Exception("schema lacks the 'rootName' key") for name in schema.keys(): if (not IsEnumType(name)) and (not IsStructType(name)) and \ (name != 'rootName'): raise RuntimeError \ ('Type "' + str(name) + '" should start with "enum " or "struct "') # TODO: check enum fields are unique (in whole namespace) # TODO: check struct fields are unique (in each struct) # TODO: check that in the source schema, there are spaces after each colon # +-----------------------+ # | Main processing logic | # +-----------------------+ def ComputeRequiredDeclarationOrder(schema): # sanity check CheckSchemaSchema(schema) # we traverse the type dependency graph and we fill a queue with # the required struct types, in a bottom-up fashion, to compute # the declaration order # The genOrder list contains the struct full names in the order # where they must be defined. # We do not care about the enums here... They do not depend upon # anything and we'll handle them, in their original declaration # order, at the start genOrder = [] for fullName in schema.keys(): if IsStructType(fullName): realName = GetShortTypename(fullName) ancestors = [] ComputeOrderFromTypeTree(ancestors, genOrder, realName, schema) return genOrder def ProcessSchema(schema, genOrder): # sanity check CheckSchemaSchema(schema) # let's doctor the schema to clean it up a bit # order DOES NOT matter for enums, even though it's a list enums = [] for fullName in schema.keys(): if IsEnumType(fullName): # convert "enum Toto" to "Toto" typename = GetShortTypename(fullName) enum = {} enum['name'] = typename assert(type(schema[fullName]) == list) enum['fields'] = schema[fullName] # must be a list enums.append(enum) # now that the order has been established, we actually store\ # the structs in the correct order # the structs are like: # example = [ # { # "name": "Message1", # "fields": { # "someMember":"int32", # "someOtherMember":"vector<string>" # } # }, # { # "name": "Message2", # "fields": { # "someMember":"int32", # "someOtherMember22":"vector<Message1>" # } # } # ] structs = [] for i in range(len(genOrder)): # this is already the short name typename = genOrder[i] fieldDict = schema["struct " + typename] struct = {} struct['name'] = typename struct['fields'] = fieldDict structs.append(struct) templatingDict = {} templatingDict['enums'] = enums templatingDict['structs'] = structs templatingDict['rootName'] = schema['rootName'] return templatingDict # +-----------------------+ # | Write to files | # +-----------------------+ # def WriteStreamsToFiles(rootName: str, genc: Dict[str, StringIO]) \ # -> None: # pass def LoadSchema(fn): # latin-1 is a trick, when we do NOT care about NON-ascii chars but # we wish to avoid using a decoding error handler # (see http://python-notes.curiousefficiency.org/en/latest/python3/text_file_processing.html#files-in-an-ascii-compatible-encoding-best-effort-is-acceptable) # TL;DR: all 256 values are mapped to characters in latin-1 so the file # contents never cause an error. with open(fn, 'r', encoding='latin-1') as f: schemaText = f.read() assert(type(schemaText) == str) # ensure there is a space after each colon. Otherwise, dicts could be # erroneously recognized as an array of strings containing ':' for i in range(len(schemaText)-1): ch = schemaText[i] nextCh = schemaText[i+1] if ch == ':': if not (nextCh == ' ' or nextCh == '\n'): assert(False) schema = yaml.load(schemaText) return schema def GetTemplatingDictFromSchemaFilename(fn): obj = LoadSchema(fn) genOrder = ComputeRequiredDeclarationOrder(obj) templatingDict = ProcessSchema(obj, genOrder) return templatingDict # +-----------------------+ # | ENTRY POINT | # +-----------------------+ if __name__ == "__main__": import argparse parser = argparse.ArgumentParser( usage="""stonegentool.py [-h] [-o OUT_DIR] [-v] input_schema EXAMPLE: python stonegentool.py -o "generated_files/" """ + """ "mainSchema.yaml,App Specific Commands.json" """ ) parser.add_argument("input_schema", type=str, \ help="path to the schema file") parser.add_argument( "-o", "--out_dir", type=str, default=".", help="""path of the directory where the files will be generated. Default is current working folder""", ) parser.add_argument( "-v", "--verbosity", action="count", default=0, help="""increase output verbosity (0 == errors only, 1 == some verbosity, 2 == nerd mode""", ) args = parser.parse_args() schemaFile = args.input_schema outDir = args.out_dir tdico = GetTemplatingDictFromSchemaFilename(schemaFile) tsTemplateFile = \ os.path.join(os.path.dirname(__file__), 'template.in.ts') template = MakeTemplateFromFile(tsTemplateFile) renderedTsCode = template.render(**tdico) outputTsFile = os.path.join( \ outDir,str(tdico['rootName']) + "_generated.ts") with open(outputTsFile,"wt",encoding='utf8') as outFile: outFile.write(renderedTsCode) cppTemplateFile = \ os.path.join(os.path.dirname(__file__), 'template.in.h') template = MakeTemplateFromFile(cppTemplateFile) renderedCppCode = template.render(**tdico) outputCppFile = os.path.join( \ outDir, str(tdico['rootName']) + "_generated.hpp") with open(outputCppFile,"wt",encoding='utf8') as outFile: outFile.write(renderedCppCode) # def GenEnumDecl(genc: GenCode, fullName: str, schema: Dict) -> None: # """Writes the enumerations in genc""" # enumDict:Dict=schema[fullName] # # jinja2 template # j2cppEnum = Template(trim( # """ {{fullName}} # { # {% for key in enumDict.keys()%} # {{key}}, # {%endfor%} # }; # """)) # j2cppEnumR = j2cppEnum.render(locals()) # genc.cppEnums.write(j2cppEnumR) # j2tsEnum = Template(trim( # """ export {{fullName}} # { # {% for key in enumDict.keys()%} # {{key}}, # {%endfor%} # }; # """)) # j2cppEnumR = j2cppEnum.render(locals()) # genc.tsEnums.write(j2cppEnumR) # def GetSerializationCode(typename: str,valueName: str, tempName: str) # if IsPrimitiveType(typename) or IsTemplateCollection(typename): # # no need to write code for the primitive types or collections. # # It is handled in C++ by the template functions and in TS by # # the JSON.stringify code. # elif IsStructType(typename): # pass # def GenStructTypeDeclAndSerialize(genc: GenCode, type, schema) -> None: # ###### # # CPP # ###### # sampleCpp = """ struct Message1 # { # int32_t a; # std::string b; # EnumMonth0 c; # bool d; # }; # Json::Value StoneSerialize(const Message1& value) # { # Json::Value result(Json::objectValue); # result["a"] = StoneSerialize(value.a); # result["b"] = StoneSerialize(value.b); # result["c"] = StoneSerialize(value.c); # result["d"] = StoneSerialize(value.d); # return result; # } # """ # ###### # # TS # ###### # sampleTs = """ # { # export class Message1 { # a: number; # b: string; # c: EnumMonth0; # d: boolean; # public StoneSerialize(): string { # let container: object = {}; # container['type'] = 'Message1'; # container['value'] = this; # return JSON.stringify(container); # } # }; # } # """ # tsText: StringIO = StringIO() # cppText: StringIO = StringIO() # tsText.write("class %s\n" % typeDict["name"]) # tsText.write("{\n") # cppText.write("struct %s\n" % typeDict["name"]) # cppText.write("{\n") # """ # GenerateSerializationCode(typename,valueName) # primitives: # ----------- # int # jsonValue val(objectInt); # val.setValue("$name") # parent.add(("$name",$name) # double # ... # string # ... # collections: # ----------- # dict { } # serializeValue() # """ # for i in range(len(typeDict["fields"])): # field = typeDict["fields"][i] # name = field["name"] # tsType = GetTypeScriptTypenameFromCanonical(field["type"]) # tsText.write(" public %s %s;\n" % (tsType, name)) # cppType = GetCppTypenameFromCanonical(field["type"]) # cppText.write(" %s %s;\n" % (cppType, name)) # tsText.write("};\n\n") # cppText.write("};\n\n") # genc.tsStructs.write(tsText.getvalue()) # genc.cppStructs.write(cppText.getvalue()) # def GenerateCodeFromTsTemplate(genc) # +-----------------------+ # | CODE GENERATION | # +-----------------------+ # def GenPreambles(rootName: str, genc: GenCode) -> None: # cppPreambleT = Template(trim( # """// autogenerated by stonegentool on {{time.ctime()}} # // for module {{rootName}} # #include <cstdint> # #include <string> # #include <vector> # #include <map> # namespace {{rootName}} # { # Json::Value StoneSerialize(int32_t value) # { # Json::Value result(value); # return result; # } # Json::Value StoneSerialize(double value) # { # Json::Value result(value); # return result; # } # Json::Value StoneSerialize(bool value) # { # Json::Value result(value); # return result; # } # Json::Value StoneSerialize(const std::string& value) # { # // the following is better than # Json::Value result(value.data(),value.data()+value.size()); # return result; # } # template<typename T> # Json::Value StoneSerialize(const std::map<std::string,T>& value) # { # Json::Value result(Json::objectValue); # for (std::map<std::string, T>::const_iterator it = value.cbegin(); # it != value.cend(); ++it) # { # // it->first it->second # result[it->first] = StoneSerialize(it->second); # } # return result; # } # template<typename T> # Json::Value StoneSerialize(const std::vector<T>& value) # { # Json::Value result(Json::arrayValue); # for (size_t i = 0; i < value.size(); ++i) # { # result.append(StoneSerialize(value[i])); # } # return result; # } # """ # cppPreambleR = cppPreambleT.render(locals()) # genc.cppPreamble.write(cppPreambleR) # tsPreambleT = Template(trim( # """// autogenerated by stonegentool on {{time.ctime()}} # // for module {{rootName}} # namespace {{rootName}} # { # """ # tsPreambleR = tsPreambleT.render(locals()) # genc.tsPreamble.write(tsPreambleR) # def ComputeOrder_ProcessStruct( \ # genOrder: List[str], name:str, schema: Dict[str, str]) -> None: # # let's generate the code according to the # struct = schema[name] # if not IsStructType(name): # raise Exception(f'{typename} should start with "struct "')