textdict.py: first minimal example

2025-07-13 19:35:59 +02:00 · 2025-07-13 19:35:59 +02:00 · 18ef47c536
commit 18ef47c536
parent 4f283b0c7f
3 changed files with 155 additions and 0 deletions
--- a/texdict/example.json
+++ b/texdict/example.json
@ -0,0 +1 @@
+{"entry": "asɛm", "key": "asem", "meanings": [ { "type": "noun", "description": "word", "examples": ["Wonim saa asɛm yi?", " Wo nim sa ara asɛm yi? " ]}, { "type": "noun", "description": "issue", "examples": ["Dɛn ne asɛm no? " ]}, { "type": "noun", "description": "news ", "examples": [ ]} ] }
--- a/texdict/example.tex
+++ b/texdict/example.tex
@ -0,0 +1 @@
+\dictentrysorted{asɛm}{asem}{\m (n) word ; Wonim saa asɛm yi?, Wo nim sa ara asɛm yi? \m (n) issue ; Dɛn ne asɛm no? \m (n) news }
--- a/texdict/texdict.py
+++ b/texdict/texdict.py
@ -0,0 +1,153 @@
+#!/usr/bin/env python3 
+
+import json
+import argparse 
+parser = argparse.ArgumentParser(
+    prog='texdict',
+    description='convert LaTeX glossary to json',
+    epilog='onipa'
+)
+
+parser.add_argument('file', help='input file')
+parser.add_argument('-i', '--input', choices=('json', 'tex'), help='input format: json, tex, sqlite (default: file extension')
+#parser.add_argument('-e', '--entry', help='add a entry to a file')
+#parser.add_argument('-w', '--write', action='store_true', required=False, help='overwrite input file')
+parser.add_argument('-f', '--format', choices=('json', 'tex'), help='choose output format: json, tex, sqlite (default: json)')
+#parser.add_argument('-o', '--output', help='write dictionary file to file')
+parser.add_argument('-v', '--verbose', action='store_true', help='boolean verbose switch (default: false)')  
+
+args = parser.parse_args()
+# verboseprint https://stackoverflow.com/a/5980173
+verboseprint = print if args.verbose else lambda *a, **k: None
+inputfile=args.file
+informat=inputfile.split('.')[-1]
+#verboseprint(args.input)
+if args.input:
+    informat=args.input
+verboseprint("input file format:", informat)
+outformat='json'
+if args.format:
+    outformat=args.format
+verboseprint("output file format:", outformat)
+
+
+#example= '''{
+#    "entry" : "asɛm", 
+#    "key": "asem", 
+#    "meanings": [
+#        { 
+#            "type" : "noun", 
+#            "description" : "word", 
+#            "examples" : ["Wonim saa asɛm yi?"]
+#         },{
+#            "type": "noun",
+#            "description": "news",
+#            "examples" : []
+#         }
+#     ]
+#}'''
+# KNOWN WORD TYPES
+shorttypes={
+        'noun':'n'
+        }
+shorttypes_inversed= {v: k for k, v in shorttypes.items()}
+verboseprint("Word types:",shorttypes)
+#
+# READ JSON FILE
+def get_data_from_jsonfile(filename):
+    verboseprint("opening file:", filename)
+    with open(filename) as infile:
+        data = json.load(infile)
+        verboseprint(json.dumps(data, ensure_ascii=False))
+#        print( json.dumps(data, ensure_ascii=False))
+        return json.dumps(data, ensure_ascii=False)
+
+# READ TEX FILE
+def get_data_from_texfile(filename):
+    verboseprint("opening file:", filename)
+    with open(filename) as infile:
+        for line in infile:
+            jsonstring='{"entry": "'
+            #entry
+            jsonstring+=line.split('{')[1].split('}')[0]+'", '
+            #key
+            jsonstring+='"key": "'
+            jsonstring+=line.split('{')[2].split('}')[0]+'", '
+            #meanings
+            number_of_meanings=line.count('\m')
+            verboseprint("Meanings found:", number_of_meanings)
+            jsonstring+='"meanings": [ '
+            for i in range(number_of_meanings):
+                #meaning
+                verboseprint(i)
+                meaning=line.split("\m")[i+1][1:]
+                if i == (number_of_meanings-1):
+                    meaning=meaning[:-2]
+                verboseprint("Meaning:",meaning)
+                #type
+                #todo what if no type
+                if i == 0:
+                    jsonstring+='{ "type": "'
+                else:
+                    jsonstring+=', { "type": "'
+                short_wordtype=meaning.split("(")[1].split(")")[0]
+                wordtype=shorttypes_inversed[short_wordtype]
+                jsonstring+=wordtype+'", '
+                #description
+                jsonstring+='"description": "'
+                jsonstring+=meaning.split(") ")[1].split(' ;')[0]+'"'
+                #examples
+                jsonstring+=', "examples": ['
+                if ';' in meaning:
+                    examples=meaning.split('; ')[1]
+                    verboseprint("examples:", examples)
+                    number_of_examples=len(examples.split(', '))
+                    j=0 
+                    for example in examples.split(','):
+                        j+=1
+                        if j == number_of_examples:
+                            jsonstring+='"'+example+'"'
+                        else:
+                            jsonstring+='"'+example+'", '
+                jsonstring+=' ]'
+                #close meaning
+                jsonstring+='}'
+            jsonstring+=' ] }'
+    return jsonstring
+
+def json2tex(entry):
+    entry_json=json.loads(entry)
+    entry_tex='\dictentrysorted{'+entry_json['entry']+'}{'+entry_json['key']+'}{'
+    for meaning in entry_json['meanings']:
+        #type
+        wordtype=meaning['type']
+        if wordtype in shorttypes:
+            wordtype=shorttypes[wordtype]
+        else:
+            print("VERBOSE: stderr: "+wordtype+" is not in the the known word type list!")
+        entry_tex+='\m ('+wordtype+') '
+        #description
+        entry_tex+=meaning['description']+' '
+        #examples
+        verboseprint(meaning)
+        if len(meaning['examples']) != 0: 
+            for example in meaning['examples']:
+                entry_tex+='; '+example+' '
+    entry_tex+='}'
+    return entry_tex
+
+def tex2json(entry):
+    entry_json=json.dumps(json.loads(entry), ensure_ascii=False)
+    return entry_json
+
+if informat == "json":
+    data = get_data_from_jsonfile(inputfile)
+elif informat == 'tex':
+    data = get_data_from_texfile(inputfile)
+
+if outformat == 'json':
+    print(data)
+elif outformat == 'tex':
+    print(json2tex(data))
+
+exit(0)
				`@ -0,0 +1 @@`
				`{"entry": "asɛm", "key": "asem", "meanings": [ { "type": "noun", "description": "word", "examples": ["Wonim saa asɛm yi?", " Wo nim sa ara asɛm yi? " ]}, { "type": "noun", "description": "issue", "examples": ["Dɛn ne asɛm no? " ]}, { "type": "noun", "description": "news ", "examples": [ ]} ] }`
				`@ -0,0 +1 @@`
				`\dictentrysorted{asɛm}{asem}{\m (n) word ; Wonim saa asɛm yi?, Wo nim sa ara asɛm yi? \m (n) issue ; Dɛn ne asɛm no? \m (n) news }`