textdict: add basic mulit-entry capability

2025-07-20 20:06:05 +02:00 · 2025-07-20 20:06:05 +02:00 · 9066701bdd
commit 9066701bdd
parent 52f4d78fa3
6 changed files with 127 additions and 38 deletions
--- a/texdict/example.json
+++ b/texdict/example.json
@ -1 +1 @@
-{"entry": "asɛm", "key": "asem", "meanings": [ { "type": "noun", "description": "word", "examples": ["Wonim saa asɛm yi?", " Wo nim sa ara asɛm yi? " ]}, { "type": "noun", "description": "issue", "examples": ["Dɛn ne asɛm no? " ]}, { "type": "noun", "description": "news ", "examples": [ ]} ] }
+{ "entries" : [ {"entry": "asɛm", "key": "asem", "meanings": [ { "type": "noun", "description": "word", "examples": ["Wonim saa asɛm yi?", " Wo nim sa ara asɛm yi? " ]}, { "type": "noun", "description": "issue", "examples": ["Dɛn ne asɛm no? " ]}, { "type": "noun", "description": "news ", "examples": [ ]} ] } ] } 
--- a/texdict/example2.json
+++ b/texdict/example2.json
@ -0,0 +1,4 @@
+{ "entries" : [ 
+	{"entry": "asɛm", "key": "asem", "meanings": [ { "type": "noun", "description": "word", "examples": ["Wonim saa asɛm yi?", " Wo nim sa ara asɛm yi? " ]}, { "type": "noun", "description": "issue", "examples": ["Dɛn ne asɛm no? " ]}, { "type": "noun", "description": "news ", "examples": [ ]} ] },
+	{"entry": "asɛm", "key": "asem", "meanings": [ { "type": "noun", "description": "word", "examples": ["Wonim saa asɛm yi?", " Wo nim sa ara asɛm yi? " ]}, { "type": "noun", "description": "issue", "examples": ["Dɛn ne asɛm no? " ]}, { "type": "noun", "description": "news ", "examples": [ ]} ] } 
+] } 
--- a/texdict/example2.tex
+++ b/texdict/example2.tex
@ -0,0 +1,5 @@
+\dictentrysorted{asɛm}{asem}{\m (n) word ; Wonim saa asɛm yi? ;  Wo nim sa ara asɛm yi?  \m (n) issue ; Dɛn ne asɛm no?  \m (n) news  }
+\dictentrysorted{asɛm}{asem}{\m (n) word ; Wonim saa asɛm yi? ;  Wo nim sa ara asɛm yi?  \m (n) issue ; Dɛn ne asɛm no?  \m (n) news  }
+%\dictentrysorted{asɛm}{asem}{\m (n) word ; Wonim saa asɛm yi? ;  Wo nim sa ara asɛm yi?  \m (n) issue ; Dɛn ne asɛm no?  \m (n) news  }
+%testasdfasd
+
--- a/texdict/example5.tex
+++ b/texdict/example5.tex
@ -0,0 +1,8 @@
+
+\dictentrysorted{a}{a}{\w{n} which, who}
+%\dictentrysorted{aba}{aba}{ \m \w{v} \g{ba} \m \w{n} child, offspring}
+\dictentrysorted{ababawa}{ababawa}{\w{n}young woman}
+\dictentrysorted{abarima}{abarima}{\w{n}young man}
+\dictentrysorted{abe}{abe}{\w{n} palm}
+\dictentrysorted{abebisa}{abebisa}{\s{bisa}}
+%\dictentrysorted{abede}{abede}{\g{de} to be ripe \td{???}}
--- a/texdict/texdict.py
+++ b/texdict/texdict.py
@ -9,48 +9,57 @@ parser = argparse.ArgumentParser(
 )

 parser.add_argument('file', help='input file')
-parser.add_argument('-i', '--input', choices=('json', 'tex'), help='input format: json, tex, sqlite (default: file extension')
-parser.add_argument('-f', '--format', choices=('json', 'tex'), help='choose output format: json, tex, sqlite (default: json)')
+parser.add_argument('-i', '--input', choices=('json', 'tex'), help='input format: json, tex (default: json; sqlite might be added)')
+parser.add_argument('-o', '--output', choices=('json', 'tex'), help='output format: json, tex (default: json; sqlite might be added)')
 parser.add_argument('-v', '--verbose', action='store_true', help='boolean verbose switch (default: false)')  
 #parser.add_argument('-e', '--entry', help='add a entry to a file')
 #parser.add_argument('-o', '--output', help='write dictionary file to file')
 #parser.add_argument('-w', '--write', action='store_true', required=False, help='overwrite input file')

 args = parser.parse_args()
-# verboseprint https://stackoverflow.com/a/5980173
+# def verboseprint() https://stackoverflow.com/a/5980173
 verboseprint = print if args.verbose else lambda *a, **k: None
 inputfile=args.file
 informat=inputfile.split('.')[-1]
 #verboseprint(args.input)
 if args.input:
    informat=args.input
-verboseprint("input file format:", informat)
+verboseprint("INFO: input file format:", informat)
 outformat='json'
-if args.format:
-    outformat=args.format
-verboseprint("output file format:", outformat)
+if args.output:
+    outformat=args.output
+verboseprint("INFO: output file format:", outformat)

 # KNOWN WORD TYPES
 shorttypes={
        'noun':'n'
        }
 shorttypes_inversed= {v: k for k, v in shorttypes.items()}
-verboseprint("Word types:",shorttypes)
+verboseprint("INFO: Word types:",shorttypes)
 #
 # READ JSON FILE
 def get_data_from_jsonfile(filename):
-    verboseprint("opening file:", filename)
+    verboseprint("INFO: opening file:", filename)
    with open(filename) as infile:
        data = json.load(infile)
-        verboseprint(json.dumps(data, ensure_ascii=False))
+        verboseprint("INFO:", json.dumps(data, ensure_ascii=False))
        return json.dumps(data, ensure_ascii=False)

 # READ TEX FILE
 def get_data_from_texfile(filename):
-    verboseprint("opening file:", filename)
+    verboseprint("INFO: opening file:", filename)
    with open(filename) as infile:
+        jsonstring='{ "entries" : [ '
+        j=0
        for line in infile:
-            jsonstring='{"entry": "'
+            if line == '\n':
+                continue
+            if line.startswith('%'):
+                continue
+            if j!=0:
+                jsonstring+=', '
+            j+=1
+            jsonstring+='{"entry": "'
            #ENTRY
            jsonstring+=line.split('{')[1].split('}')[0]+'", '
            #KEY
@ -58,15 +67,15 @@ def get_data_from_texfile(filename):
            jsonstring+=line.split('{')[2].split('}')[0]+'", '
            #MEANINGS
            number_of_meanings=line.count('\m')
-            verboseprint("Meanings found:", number_of_meanings)
+            verboseprint("INFO: Meanings found:", number_of_meanings)
            jsonstring+='"meanings": [ '
            for i in range(number_of_meanings):
                #MEANING
-                verboseprint(i)
+                verboseprint("INFO: number of meanins", i)
                meaning=line.split("\m")[i+1][1:]
                if i == (number_of_meanings-1):
                    meaning=meaning[:-2]
-                verboseprint("Meaning:",meaning)
+                verboseprint("INFO: Meaning:",meaning)
                #TYPE
                #todo what if no type
                if i == 0:
@ -83,7 +92,7 @@ def get_data_from_texfile(filename):
                jsonstring+=', "examples": ['
                if ';' in meaning:
                    examples=meaning.split('; ')[1]
-                    verboseprint("examples:", examples)
+                    verboseprint("INFO: examples:", examples)
                    number_of_examples=len(examples.split(', '))
                    j=0 
                    for example in examples.split(','):
@ -96,10 +105,14 @@ def get_data_from_texfile(filename):
                #CLOSE MEANING
                jsonstring+='}'
            jsonstring+=' ] }'
+        jsonstring+='] }'
    return jsonstring

-def json2tex(entry):
-    entry_json=json.loads(entry)
+def json2tex(entries):
+    entries_json=json.loads(entries)
+    entries_tex=""
+    for entry_json in entries_json["entries"]:
+#        entry_json=json.loads(entry)
        entry_tex='\dictentrysorted{'+entry_json['entry']+'}{'+entry_json['key']+'}{'
        for meaning in entry_json['meanings']:
            #TYPE
@ -112,12 +125,14 @@ def json2tex(entry):
            #DESCRIPTION
            entry_tex+=meaning['description']+' '
            #EXAMPLES
-        verboseprint(meaning)
+            verboseprint("INFO:", meaning)
            if len(meaning['examples']) != 0: 
                for example in meaning['examples']:
                    entry_tex+='; '+example+' '
        entry_tex+='}'
-    return entry_tex
+        entries_tex+=entry_tex
+        entries_tex+="\n"
+    return entries_tex

 def tex2json(entry):
    entry_json=json.dumps(json.loads(entry), ensure_ascii=False)
--- a/texdict/todo.md
+++ b/texdict/todo.md
@ -1,2 +1,59 @@
 # TODO:
- change such that two entries can be converted
+
+- [ ] test with example5.tex
+- [ ] make tex parser cleaner and think more about structure of entries and levels 
+- [ ] how should the output of texdict.py -o tex be used?
+```
+\documenttype{standalone}
+%\usepackage{texdict}
+\begin{document}
+\input{output-from-textdict.tex}
+\end{document}
+```
+- [ ] if dict package, how can I keep texdict.py and texdict.sty in sync? 
+    - [ ] textdict.py needs to be part of textdict.sty package
+
+- [ ] refactor code acording to 12factor app 
+    - [ ] https://12factor.net/de/
+- [ ] what about that every entry should only appear once? 
+- [ ] if input and output are the same, should it be converted to json trotzdem? 
+    - [ ] maybe not and add a --test flag for testing the syntax correctness 
+
+---
+---
+
+- [x] test: 
+    - [x] $(./texdict.py example2.json -o tex`)
+- [x] dann auch testen:
+    - [x] $(./texdict.py example2.tex )
+
+
+
+
+
+- [x] change such that two entries can be converted
+- [x] json:
+```
+{[entry1,e2,e3]}
+{entries : [entry1, entry2, entry3]}
+```
+- [x] tex
+```
+\entry{1}
+\entry{2}
+\entry{3}
+```
+
+
+\documentclass{standalone}
+\begin{document}
+$\tau$e$\Xi$\textit{dict}\texttt{.py}
+\end{document}
+
+
+
+a poems with vim 
+a poems with him 
+
+a poems with vim vim
+a poems with vim vim