From 9066701bdd5c87370aab262312565c14d02ac083 Mon Sep 17 00:00:00 2001 From: "uni@bor.mac" Date: Sun, 20 Jul 2025 20:06:05 +0200 Subject: [PATCH] textdict: add basic mulit-entry capability --- texdict/example.json | 2 +- texdict/example2.json | 4 ++ texdict/example2.tex | 5 +++ texdict/example5.tex | 8 ++++ texdict/texdict.py | 87 +++++++++++++++++++++++++------------------ texdict/todo.md | 59 ++++++++++++++++++++++++++++- 6 files changed, 127 insertions(+), 38 deletions(-) create mode 100644 texdict/example2.json create mode 100644 texdict/example2.tex create mode 100644 texdict/example5.tex diff --git a/texdict/example.json b/texdict/example.json index ccb3a12..0cabf51 100644 --- a/texdict/example.json +++ b/texdict/example.json @@ -1 +1 @@ -{"entry": "asɛm", "key": "asem", "meanings": [ { "type": "noun", "description": "word", "examples": ["Wonim saa asɛm yi?", " Wo nim sa ara asɛm yi? " ]}, { "type": "noun", "description": "issue", "examples": ["Dɛn ne asɛm no? " ]}, { "type": "noun", "description": "news ", "examples": [ ]} ] } +{ "entries" : [ {"entry": "asɛm", "key": "asem", "meanings": [ { "type": "noun", "description": "word", "examples": ["Wonim saa asɛm yi?", " Wo nim sa ara asɛm yi? " ]}, { "type": "noun", "description": "issue", "examples": ["Dɛn ne asɛm no? " ]}, { "type": "noun", "description": "news ", "examples": [ ]} ] } ] } diff --git a/texdict/example2.json b/texdict/example2.json new file mode 100644 index 0000000..d1493d3 --- /dev/null +++ b/texdict/example2.json @@ -0,0 +1,4 @@ +{ "entries" : [ + {"entry": "asɛm", "key": "asem", "meanings": [ { "type": "noun", "description": "word", "examples": ["Wonim saa asɛm yi?", " Wo nim sa ara asɛm yi? " ]}, { "type": "noun", "description": "issue", "examples": ["Dɛn ne asɛm no? " ]}, { "type": "noun", "description": "news ", "examples": [ ]} ] }, + {"entry": "asɛm", "key": "asem", "meanings": [ { "type": "noun", "description": "word", "examples": ["Wonim saa asɛm yi?", " Wo nim sa ara asɛm yi? " ]}, { "type": "noun", "description": "issue", "examples": ["Dɛn ne asɛm no? " ]}, { "type": "noun", "description": "news ", "examples": [ ]} ] } +] } diff --git a/texdict/example2.tex b/texdict/example2.tex new file mode 100644 index 0000000..3171bd0 --- /dev/null +++ b/texdict/example2.tex @@ -0,0 +1,5 @@ +\dictentrysorted{asɛm}{asem}{\m (n) word ; Wonim saa asɛm yi? ; Wo nim sa ara asɛm yi? \m (n) issue ; Dɛn ne asɛm no? \m (n) news } +\dictentrysorted{asɛm}{asem}{\m (n) word ; Wonim saa asɛm yi? ; Wo nim sa ara asɛm yi? \m (n) issue ; Dɛn ne asɛm no? \m (n) news } +%\dictentrysorted{asɛm}{asem}{\m (n) word ; Wonim saa asɛm yi? ; Wo nim sa ara asɛm yi? \m (n) issue ; Dɛn ne asɛm no? \m (n) news } +%testasdfasd + diff --git a/texdict/example5.tex b/texdict/example5.tex new file mode 100644 index 0000000..6ee7980 --- /dev/null +++ b/texdict/example5.tex @@ -0,0 +1,8 @@ + +\dictentrysorted{a}{a}{\w{n} which, who} +%\dictentrysorted{aba}{aba}{ \m \w{v} \g{ba} \m \w{n} child, offspring} +\dictentrysorted{ababawa}{ababawa}{\w{n}young woman} +\dictentrysorted{abarima}{abarima}{\w{n}young man} +\dictentrysorted{abe}{abe}{\w{n} palm} +\dictentrysorted{abebisa}{abebisa}{\s{bisa}} +%\dictentrysorted{abede}{abede}{\g{de} to be ripe \td{???}} diff --git a/texdict/texdict.py b/texdict/texdict.py index 3e01e34..3e7945d 100755 --- a/texdict/texdict.py +++ b/texdict/texdict.py @@ -9,48 +9,57 @@ parser = argparse.ArgumentParser( ) parser.add_argument('file', help='input file') -parser.add_argument('-i', '--input', choices=('json', 'tex'), help='input format: json, tex, sqlite (default: file extension') -parser.add_argument('-f', '--format', choices=('json', 'tex'), help='choose output format: json, tex, sqlite (default: json)') +parser.add_argument('-i', '--input', choices=('json', 'tex'), help='input format: json, tex (default: json; sqlite might be added)') +parser.add_argument('-o', '--output', choices=('json', 'tex'), help='output format: json, tex (default: json; sqlite might be added)') parser.add_argument('-v', '--verbose', action='store_true', help='boolean verbose switch (default: false)') #parser.add_argument('-e', '--entry', help='add a entry to a file') #parser.add_argument('-o', '--output', help='write dictionary file to file') #parser.add_argument('-w', '--write', action='store_true', required=False, help='overwrite input file') args = parser.parse_args() -# verboseprint https://stackoverflow.com/a/5980173 +# def verboseprint() https://stackoverflow.com/a/5980173 verboseprint = print if args.verbose else lambda *a, **k: None inputfile=args.file informat=inputfile.split('.')[-1] #verboseprint(args.input) if args.input: informat=args.input -verboseprint("input file format:", informat) +verboseprint("INFO: input file format:", informat) outformat='json' -if args.format: - outformat=args.format -verboseprint("output file format:", outformat) +if args.output: + outformat=args.output +verboseprint("INFO: output file format:", outformat) # KNOWN WORD TYPES shorttypes={ 'noun':'n' } shorttypes_inversed= {v: k for k, v in shorttypes.items()} -verboseprint("Word types:",shorttypes) +verboseprint("INFO: Word types:",shorttypes) # # READ JSON FILE def get_data_from_jsonfile(filename): - verboseprint("opening file:", filename) + verboseprint("INFO: opening file:", filename) with open(filename) as infile: data = json.load(infile) - verboseprint(json.dumps(data, ensure_ascii=False)) + verboseprint("INFO:", json.dumps(data, ensure_ascii=False)) return json.dumps(data, ensure_ascii=False) # READ TEX FILE def get_data_from_texfile(filename): - verboseprint("opening file:", filename) + verboseprint("INFO: opening file:", filename) with open(filename) as infile: + jsonstring='{ "entries" : [ ' + j=0 for line in infile: - jsonstring='{"entry": "' + if line == '\n': + continue + if line.startswith('%'): + continue + if j!=0: + jsonstring+=', ' + j+=1 + jsonstring+='{"entry": "' #ENTRY jsonstring+=line.split('{')[1].split('}')[0]+'", ' #KEY @@ -58,15 +67,15 @@ def get_data_from_texfile(filename): jsonstring+=line.split('{')[2].split('}')[0]+'", ' #MEANINGS number_of_meanings=line.count('\m') - verboseprint("Meanings found:", number_of_meanings) + verboseprint("INFO: Meanings found:", number_of_meanings) jsonstring+='"meanings": [ ' for i in range(number_of_meanings): #MEANING - verboseprint(i) + verboseprint("INFO: number of meanins", i) meaning=line.split("\m")[i+1][1:] if i == (number_of_meanings-1): meaning=meaning[:-2] - verboseprint("Meaning:",meaning) + verboseprint("INFO: Meaning:",meaning) #TYPE #todo what if no type if i == 0: @@ -83,7 +92,7 @@ def get_data_from_texfile(filename): jsonstring+=', "examples": [' if ';' in meaning: examples=meaning.split('; ')[1] - verboseprint("examples:", examples) + verboseprint("INFO: examples:", examples) number_of_examples=len(examples.split(', ')) j=0 for example in examples.split(','): @@ -96,28 +105,34 @@ def get_data_from_texfile(filename): #CLOSE MEANING jsonstring+='}' jsonstring+=' ] }' + jsonstring+='] }' return jsonstring -def json2tex(entry): - entry_json=json.loads(entry) - entry_tex='\dictentrysorted{'+entry_json['entry']+'}{'+entry_json['key']+'}{' - for meaning in entry_json['meanings']: - #TYPE - wordtype=meaning['type'] - if wordtype in shorttypes: - wordtype=shorttypes[wordtype] - else: - print("VERBOSE: stderr: "+wordtype+" is not in the the known word type list!") - entry_tex+='\m ('+wordtype+') ' - #DESCRIPTION - entry_tex+=meaning['description']+' ' - #EXAMPLES - verboseprint(meaning) - if len(meaning['examples']) != 0: - for example in meaning['examples']: - entry_tex+='; '+example+' ' - entry_tex+='}' - return entry_tex +def json2tex(entries): + entries_json=json.loads(entries) + entries_tex="" + for entry_json in entries_json["entries"]: +# entry_json=json.loads(entry) + entry_tex='\dictentrysorted{'+entry_json['entry']+'}{'+entry_json['key']+'}{' + for meaning in entry_json['meanings']: + #TYPE + wordtype=meaning['type'] + if wordtype in shorttypes: + wordtype=shorttypes[wordtype] + else: + print("VERBOSE: stderr: "+wordtype+" is not in the the known word type list!") + entry_tex+='\m ('+wordtype+') ' + #DESCRIPTION + entry_tex+=meaning['description']+' ' + #EXAMPLES + verboseprint("INFO:", meaning) + if len(meaning['examples']) != 0: + for example in meaning['examples']: + entry_tex+='; '+example+' ' + entry_tex+='}' + entries_tex+=entry_tex + entries_tex+="\n" + return entries_tex def tex2json(entry): entry_json=json.dumps(json.loads(entry), ensure_ascii=False) diff --git a/texdict/todo.md b/texdict/todo.md index 9279d3e..f27ca99 100644 --- a/texdict/todo.md +++ b/texdict/todo.md @@ -1,2 +1,59 @@ # TODO: -- change such that two entries can be converted + +- [ ] test with example5.tex +- [ ] make tex parser cleaner and think more about structure of entries and levels +- [ ] how should the output of texdict.py -o tex be used? +``` +\documenttype{standalone} +%\usepackage{texdict} +\begin{document} +\input{output-from-textdict.tex} +\end{document} +``` +- [ ] if dict package, how can I keep texdict.py and texdict.sty in sync? + - [ ] textdict.py needs to be part of textdict.sty package + +- [ ] refactor code acording to 12factor app + - [ ] https://12factor.net/de/ +- [ ] what about that every entry should only appear once? +- [ ] if input and output are the same, should it be converted to json trotzdem? + - [ ] maybe not and add a --test flag for testing the syntax correctness + +--- +--- + +- [x] test: + - [x] $(./texdict.py example2.json -o tex`) +- [x] dann auch testen: + - [x] $(./texdict.py example2.tex ) + + + + + +- [x] change such that two entries can be converted +- [x] json: +``` +{[entry1,e2,e3]} +{entries : [entry1, entry2, entry3]} +``` +- [x] tex +``` +\entry{1} +\entry{2} +\entry{3} +``` + + +\documentclass{standalone} +\begin{document} +$\tau$e$\Xi$\textit{dict}\texttt{.py} +\end{document} + + + +a poems with vim +a poems with him + +a poems with vim vim +a poems with vim vim