From 9066701bdd5c87370aab262312565c14d02ac083 Mon Sep 17 00:00:00 2001
From: "uni@bor.mac" <j.w.dorn@gmx.at>
Date: Sun, 20 Jul 2025 20:06:05 +0200
Subject: [PATCH] textdict: add basic mulit-entry capability

---
 texdict/example.json  |  2 +-
 texdict/example2.json |  4 ++
 texdict/example2.tex  |  5 +++
 texdict/example5.tex  |  8 ++++
 texdict/texdict.py    | 87 +++++++++++++++++++++++++------------------
 texdict/todo.md       | 59 ++++++++++++++++++++++++++++-
 6 files changed, 127 insertions(+), 38 deletions(-)
 create mode 100644 texdict/example2.json
 create mode 100644 texdict/example2.tex
 create mode 100644 texdict/example5.tex

diff --git a/texdict/example.json b/texdict/example.json
index ccb3a12..0cabf51 100644
--- a/texdict/example.json
+++ b/texdict/example.json
@@ -1 +1 @@
-{"entry": "asɛm", "key": "asem", "meanings": [ { "type": "noun", "description": "word", "examples": ["Wonim saa asɛm yi?", " Wo nim sa ara asɛm yi? " ]}, { "type": "noun", "description": "issue", "examples": ["Dɛn ne asɛm no? " ]}, { "type": "noun", "description": "news ", "examples": [ ]} ] }
+{ "entries" : [ {"entry": "asɛm", "key": "asem", "meanings": [ { "type": "noun", "description": "word", "examples": ["Wonim saa asɛm yi?", " Wo nim sa ara asɛm yi? " ]}, { "type": "noun", "description": "issue", "examples": ["Dɛn ne asɛm no? " ]}, { "type": "noun", "description": "news ", "examples": [ ]} ] } ] } 
diff --git a/texdict/example2.json b/texdict/example2.json
new file mode 100644
index 0000000..d1493d3
--- /dev/null
+++ b/texdict/example2.json
@@ -0,0 +1,4 @@
+{ "entries" : [ 
+	{"entry": "asɛm", "key": "asem", "meanings": [ { "type": "noun", "description": "word", "examples": ["Wonim saa asɛm yi?", " Wo nim sa ara asɛm yi? " ]}, { "type": "noun", "description": "issue", "examples": ["Dɛn ne asɛm no? " ]}, { "type": "noun", "description": "news ", "examples": [ ]} ] },
+	{"entry": "asɛm", "key": "asem", "meanings": [ { "type": "noun", "description": "word", "examples": ["Wonim saa asɛm yi?", " Wo nim sa ara asɛm yi? " ]}, { "type": "noun", "description": "issue", "examples": ["Dɛn ne asɛm no? " ]}, { "type": "noun", "description": "news ", "examples": [ ]} ] } 
+] } 
diff --git a/texdict/example2.tex b/texdict/example2.tex
new file mode 100644
index 0000000..3171bd0
--- /dev/null
+++ b/texdict/example2.tex
@@ -0,0 +1,5 @@
+\dictentrysorted{asɛm}{asem}{\m (n) word ; Wonim saa asɛm yi? ;  Wo nim sa ara asɛm yi?  \m (n) issue ; Dɛn ne asɛm no?  \m (n) news  }
+\dictentrysorted{asɛm}{asem}{\m (n) word ; Wonim saa asɛm yi? ;  Wo nim sa ara asɛm yi?  \m (n) issue ; Dɛn ne asɛm no?  \m (n) news  }
+%\dictentrysorted{asɛm}{asem}{\m (n) word ; Wonim saa asɛm yi? ;  Wo nim sa ara asɛm yi?  \m (n) issue ; Dɛn ne asɛm no?  \m (n) news  }
+%testasdfasd
+
diff --git a/texdict/example5.tex b/texdict/example5.tex
new file mode 100644
index 0000000..6ee7980
--- /dev/null
+++ b/texdict/example5.tex
@@ -0,0 +1,8 @@
+
+\dictentrysorted{a}{a}{\w{n} which, who}
+%\dictentrysorted{aba}{aba}{ \m \w{v} \g{ba} \m \w{n} child, offspring}
+\dictentrysorted{ababawa}{ababawa}{\w{n}young woman}
+\dictentrysorted{abarima}{abarima}{\w{n}young man}
+\dictentrysorted{abe}{abe}{\w{n} palm}
+\dictentrysorted{abebisa}{abebisa}{\s{bisa}}
+%\dictentrysorted{abede}{abede}{\g{de} to be ripe \td{???}}
diff --git a/texdict/texdict.py b/texdict/texdict.py
index 3e01e34..3e7945d 100755
--- a/texdict/texdict.py
+++ b/texdict/texdict.py
@@ -9,48 +9,57 @@ parser = argparse.ArgumentParser(
 )
 
 parser.add_argument('file', help='input file')
-parser.add_argument('-i', '--input', choices=('json', 'tex'), help='input format: json, tex, sqlite (default: file extension')
-parser.add_argument('-f', '--format', choices=('json', 'tex'), help='choose output format: json, tex, sqlite (default: json)')
+parser.add_argument('-i', '--input', choices=('json', 'tex'), help='input format: json, tex (default: json; sqlite might be added)')
+parser.add_argument('-o', '--output', choices=('json', 'tex'), help='output format: json, tex (default: json; sqlite might be added)')
 parser.add_argument('-v', '--verbose', action='store_true', help='boolean verbose switch (default: false)')  
 #parser.add_argument('-e', '--entry', help='add a entry to a file')
 #parser.add_argument('-o', '--output', help='write dictionary file to file')
 #parser.add_argument('-w', '--write', action='store_true', required=False, help='overwrite input file')
 
 args = parser.parse_args()
-# verboseprint https://stackoverflow.com/a/5980173
+# def verboseprint() https://stackoverflow.com/a/5980173
 verboseprint = print if args.verbose else lambda *a, **k: None
 inputfile=args.file
 informat=inputfile.split('.')[-1]
 #verboseprint(args.input)
 if args.input:
     informat=args.input
-verboseprint("input file format:", informat)
+verboseprint("INFO: input file format:", informat)
 outformat='json'
-if args.format:
-    outformat=args.format
-verboseprint("output file format:", outformat)
+if args.output:
+    outformat=args.output
+verboseprint("INFO: output file format:", outformat)
 
 # KNOWN WORD TYPES
 shorttypes={
         'noun':'n'
         }
 shorttypes_inversed= {v: k for k, v in shorttypes.items()}
-verboseprint("Word types:",shorttypes)
+verboseprint("INFO: Word types:",shorttypes)
 #
 # READ JSON FILE
 def get_data_from_jsonfile(filename):
-    verboseprint("opening file:", filename)
+    verboseprint("INFO: opening file:", filename)
     with open(filename) as infile:
         data = json.load(infile)
-        verboseprint(json.dumps(data, ensure_ascii=False))
+        verboseprint("INFO:", json.dumps(data, ensure_ascii=False))
         return json.dumps(data, ensure_ascii=False)
 
 # READ TEX FILE
 def get_data_from_texfile(filename):
-    verboseprint("opening file:", filename)
+    verboseprint("INFO: opening file:", filename)
     with open(filename) as infile:
+        jsonstring='{ "entries" : [ '
+        j=0
         for line in infile:
-            jsonstring='{"entry": "'
+            if line == '\n':
+                continue
+            if line.startswith('%'):
+                continue
+            if j!=0:
+                jsonstring+=', '
+            j+=1
+            jsonstring+='{"entry": "'
             #ENTRY
             jsonstring+=line.split('{')[1].split('}')[0]+'", '
             #KEY
@@ -58,15 +67,15 @@ def get_data_from_texfile(filename):
             jsonstring+=line.split('{')[2].split('}')[0]+'", '
             #MEANINGS
             number_of_meanings=line.count('\m')
-            verboseprint("Meanings found:", number_of_meanings)
+            verboseprint("INFO: Meanings found:", number_of_meanings)
             jsonstring+='"meanings": [ '
             for i in range(number_of_meanings):
                 #MEANING
-                verboseprint(i)
+                verboseprint("INFO: number of meanins", i)
                 meaning=line.split("\m")[i+1][1:]
                 if i == (number_of_meanings-1):
                     meaning=meaning[:-2]
-                verboseprint("Meaning:",meaning)
+                verboseprint("INFO: Meaning:",meaning)
                 #TYPE
                 #todo what if no type
                 if i == 0:
@@ -83,7 +92,7 @@ def get_data_from_texfile(filename):
                 jsonstring+=', "examples": ['
                 if ';' in meaning:
                     examples=meaning.split('; ')[1]
-                    verboseprint("examples:", examples)
+                    verboseprint("INFO: examples:", examples)
                     number_of_examples=len(examples.split(', '))
                     j=0 
                     for example in examples.split(','):
@@ -96,28 +105,34 @@ def get_data_from_texfile(filename):
                 #CLOSE MEANING
                 jsonstring+='}'
             jsonstring+=' ] }'
+        jsonstring+='] }'
     return jsonstring
 
-def json2tex(entry):
-    entry_json=json.loads(entry)
-    entry_tex='\dictentrysorted{'+entry_json['entry']+'}{'+entry_json['key']+'}{'
-    for meaning in entry_json['meanings']:
-        #TYPE
-        wordtype=meaning['type']
-        if wordtype in shorttypes:
-            wordtype=shorttypes[wordtype]
-        else:
-            print("VERBOSE: stderr: "+wordtype+" is not in the the known word type list!")
-        entry_tex+='\m ('+wordtype+') '
-        #DESCRIPTION
-        entry_tex+=meaning['description']+' '
-        #EXAMPLES
-        verboseprint(meaning)
-        if len(meaning['examples']) != 0: 
-            for example in meaning['examples']:
-                entry_tex+='; '+example+' '
-    entry_tex+='}'
-    return entry_tex
+def json2tex(entries):
+    entries_json=json.loads(entries)
+    entries_tex=""
+    for entry_json in entries_json["entries"]:
+#        entry_json=json.loads(entry)
+        entry_tex='\dictentrysorted{'+entry_json['entry']+'}{'+entry_json['key']+'}{'
+        for meaning in entry_json['meanings']:
+            #TYPE
+            wordtype=meaning['type']
+            if wordtype in shorttypes:
+                wordtype=shorttypes[wordtype]
+            else:
+                print("VERBOSE: stderr: "+wordtype+" is not in the the known word type list!")
+            entry_tex+='\m ('+wordtype+') '
+            #DESCRIPTION
+            entry_tex+=meaning['description']+' '
+            #EXAMPLES
+            verboseprint("INFO:", meaning)
+            if len(meaning['examples']) != 0: 
+                for example in meaning['examples']:
+                    entry_tex+='; '+example+' '
+        entry_tex+='}'
+        entries_tex+=entry_tex
+        entries_tex+="\n"
+    return entries_tex
 
 def tex2json(entry):
     entry_json=json.dumps(json.loads(entry), ensure_ascii=False)
diff --git a/texdict/todo.md b/texdict/todo.md
index 9279d3e..f27ca99 100644
--- a/texdict/todo.md
+++ b/texdict/todo.md
@@ -1,2 +1,59 @@
 # TODO:
-- change such that two entries can be converted
+
+- [ ] test with example5.tex
+- [ ] make tex parser cleaner and think more about structure of entries and levels 
+- [ ] how should the output of texdict.py -o tex be used?
+```
+\documenttype{standalone}
+%\usepackage{texdict}
+\begin{document}
+\input{output-from-textdict.tex}
+\end{document}
+```
+- [ ] if dict package, how can I keep texdict.py and texdict.sty in sync? 
+    - [ ] textdict.py needs to be part of textdict.sty package
+
+- [ ] refactor code acording to 12factor app 
+    - [ ] https://12factor.net/de/
+- [ ] what about that every entry should only appear once? 
+- [ ] if input and output are the same, should it be converted to json trotzdem? 
+    - [ ] maybe not and add a --test flag for testing the syntax correctness 
+
+---
+---
+
+- [x] test: 
+    - [x] $(./texdict.py example2.json -o tex`)
+- [x] dann auch testen:
+    - [x] $(./texdict.py example2.tex )
+
+
+
+
+
+- [x] change such that two entries can be converted
+- [x] json:
+```
+{[entry1,e2,e3]}
+{entries : [entry1, entry2, entry3]}
+```
+- [x] tex
+```
+\entry{1}
+\entry{2}
+\entry{3}
+```
+
+
+\documentclass{standalone}
+\begin{document}
+$\tau$e$\Xi$\textit{dict}\texttt{.py}
+\end{document}
+
+
+
+a poems with vim 
+a poems with him 
+
+a poems with vim vim
+a poems with vim vim