texdict.py: get_data_from_texfile() cleaner rewrite attempt

2025-07-21 22:24:25 +02:00 · 2025-07-21 22:24:25 +02:00 · c08442f2ea
commit c08442f2ea
parent 64de297467
2 changed files with 143 additions and 12 deletions
--- a/texdict/texdict.py
+++ b/texdict/texdict.py
@ -45,8 +45,114 @@ def get_data_from_jsonfile(filename):
        verboseprint("INFO:", json.dumps(data, ensure_ascii=False))
        return json.dumps(data, ensure_ascii=False)
 def find_matching_parens(s, braces=None):
    openers = braces or {"{": "}"}
    closers = {v: k for k, v in openers.items()}
    stack = []
    result = []
    for i, c in enumerate(s):
        if c in openers:
            stack.append([c, i])
        elif c in closers:
            if not stack:
                raise ValueError(f"tried to close brace without an open at position {i}")
            pair, idx = stack.pop()
            result.append([idx, i])
            if pair != closers[c]:
                raise ValueError(f"mismatched brace at position {i}")
    if stack:
        raise ValueError(f"no closing brace at position {i}")
    return result
 # READ TEX FILE
 def get_data_from_texfile(filename):
 #    # delimiters
 #    meaning_delim='\m '
 #    type_delim=['(',') ']
 #    description_delim=', '
 #    examples_delim='; '
 #    example_delim=', ' # what if sentence has komma? maybe doch use '. ' what if example is a question???
 #    entry_command='\dictentrysorted'
 #    verboseprint("INFO: opening file:", filename)
 #    with open(filename) as infile:
 #        oldline=''
 #        i=0
 #        jsonstring='{ "entries" : [ '
 #        for line in infile:
 #            i+=1
 #            line=line.strip()
 #            line=line.split('%')[0]
 #            if line.startswith('%'):
 #                continue
 #            if line == '\n':
 #                continue
 #            if oldline != '':
 #                if line.startswith(entry_command):
 #                    raise ValueError(f"new entry starts at line {k}, but previous entry not closed!")
 #                else: 
 #                    line=oldline+line
 #            
 #            try:
 #                matches=find_matching_parens(line)
 #                split_indices=[
 #                    matches[0][0], matches[0][1],
 #                    matches[1][0], matches[1][1],
 #                    matches[-1][0], matches[-1][1]
 #                ]
 #                substrings=[line[start:end] for start, end in zip([0] + split_indices, split_indices + [None])]
 #                entry=substrings[1][1:]
 #                key=substring[3][1:]
 #                meanings=substrings[5][1:]
 #
 #                #end 
 #                oldline=''
 #            except:
 #                oldline=line
 #                continue
 #            #ENTRY
 #            jsonstring+='{"entry": "'
 #            jsonstring+=entry
 #            jsonstring+='", '
 #            #KEY
 #            jsonstring+='"key": "'
 #            jsonstring+=key
 #            jsonstring+='", meanings": [ '
 #            #MEANINGS
 #            j=0
 #            for meaning in meanings.split('\m'):
 #                j+=1
 #                #WORD TYPE
 #                #TODO what if no word type???
 #                type_matches=find_matching_parens(meaning, {'(':')'})
 #                type_split_indices=[0]
 #                meaning_substrings=[meaning[start:end] for start, end in zip([0] + type_split_indices, type_split_indices + [None])]
 #                word_type=meaning_substrings[1][1:]
 #                jsonstring+=' { "type": "'
 #                jsonstring+=word_type
 #                jsonstring+='", '
 #                # DESCRIPTION
 #                descriptions=meaning_substrings[2][2:].split(examples_delim)[0]
 #                jsonstring+='"description": "'
 #                jsonstring+=description
 #                jsonstring+='", '
 #                # EXAMPLES
 #                # only execute if try: 
 #                # except: "examples" : [] 
 #                examples_raw=meaning_substrings[2][2:].split(examples_delim)[1]
 #                examples=examples.split(example_delim)
 #                jsonstring+='"examples": ['
 #                k=0
 #                for example in examples:
 #                    k+=1
 #                    jsonstring+='"'
 #                    jsonstring+=example
 #                    jsonstring+='", '
 #                jsonstring+=']????'
    #OLD VERSION
    verboseprint("INFO: opening file:", filename)
    with open(filename) as infile:
        jsonstring='{ "entries" : [ '
@ -134,18 +240,19 @@ def json2tex(entries):
        entries_tex+="\n"
    return entries_tex
-def tex2json(entry):
+#def tex2json(entry):
-    entry_json=json.dumps(json.loads(entry), ensure_ascii=False)
+#    entry_json=json.dumps(json.loads(entry), ensure_ascii=False)
-    return entry_json
+#    return entry_json
-if informat == "json":
+if __name__ == "__main__":
-    data = get_data_from_jsonfile(inputfile)
+    if informat == "json":
-elif informat == 'tex':
+        data = get_data_from_jsonfile(inputfile)
-    data = get_data_from_texfile(inputfile)
+    elif informat == 'tex':
        data = get_data_from_texfile(inputfile)
-if outformat == 'json':
+    if outformat == 'json':
-    print(data)
+        print(data)
-elif outformat == 'tex':
+    elif outformat == 'tex':
-    print(json2tex(data))
+        print(json2tex(data))
-exit(0)
+    exit(0)
--- a/texdict/todo.md
+++ b/texdict/todo.md
@ -97,6 +97,21 @@ example.json
    ]
 }
 ```
 \glossary{entry}{key}{
    \m (type) description, description, description ; example, example 
    \m 
 }
 - tex2json
 ```
 def tex2json(dict_tex)
    meaning_delim='\m '
    type_delim=['(',') ']
    description_delim=', '
    examples_delim='; '
    example_delim=', '
    # how to cope with multiline entries? 
        - [find closing brackets](https://stackoverflow.com/q/63382152)
 ```
 - notes: 
    - `type` could also include gender/genus? 
    - where should I put info like 'go, went, gone' or plural forms for some words?
@ -105,3 +120,12 @@ example.json
    - `dialect/usage/gebrauch`
        - https://www.duden.de/hilfe/gebrauch
    - statt `description` = `meaning`? 
    - what about `see also`s and confere (cf.)
    - what about `aussprache`?
 ```
 \glossary{entry}{key}{
    \m (type) description, description, description ; example, example 
    \m 
 }
 - tex2json