From 18ef47c53601983f0d75289123ee09f7a9d6cee4 Mon Sep 17 00:00:00 2001 From: "uni@bor.mac" Date: Sun, 13 Jul 2025 19:35:59 +0200 Subject: [PATCH] textdict.py: first minimal example --- texdict/example.json | 1 + texdict/example.tex | 1 + texdict/texdict.py | 153 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 155 insertions(+) create mode 100644 texdict/example.json create mode 100644 texdict/example.tex create mode 100755 texdict/texdict.py diff --git a/texdict/example.json b/texdict/example.json new file mode 100644 index 0000000..ccb3a12 --- /dev/null +++ b/texdict/example.json @@ -0,0 +1 @@ +{"entry": "asɛm", "key": "asem", "meanings": [ { "type": "noun", "description": "word", "examples": ["Wonim saa asɛm yi?", " Wo nim sa ara asɛm yi? " ]}, { "type": "noun", "description": "issue", "examples": ["Dɛn ne asɛm no? " ]}, { "type": "noun", "description": "news ", "examples": [ ]} ] } diff --git a/texdict/example.tex b/texdict/example.tex new file mode 100644 index 0000000..768c107 --- /dev/null +++ b/texdict/example.tex @@ -0,0 +1 @@ +\dictentrysorted{asɛm}{asem}{\m (n) word ; Wonim saa asɛm yi?, Wo nim sa ara asɛm yi? \m (n) issue ; Dɛn ne asɛm no? \m (n) news } diff --git a/texdict/texdict.py b/texdict/texdict.py new file mode 100755 index 0000000..1b29383 --- /dev/null +++ b/texdict/texdict.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 + +import json +import argparse +parser = argparse.ArgumentParser( + prog='texdict', + description='convert LaTeX glossary to json', + epilog='onipa' +) + +parser.add_argument('file', help='input file') +parser.add_argument('-i', '--input', choices=('json', 'tex'), help='input format: json, tex, sqlite (default: file extension') +#parser.add_argument('-e', '--entry', help='add a entry to a file') +#parser.add_argument('-w', '--write', action='store_true', required=False, help='overwrite input file') +parser.add_argument('-f', '--format', choices=('json', 'tex'), help='choose output format: json, tex, sqlite (default: json)') +#parser.add_argument('-o', '--output', help='write dictionary file to file') +parser.add_argument('-v', '--verbose', action='store_true', help='boolean verbose switch (default: false)') + +args = parser.parse_args() +# verboseprint https://stackoverflow.com/a/5980173 +verboseprint = print if args.verbose else lambda *a, **k: None +inputfile=args.file +informat=inputfile.split('.')[-1] +#verboseprint(args.input) +if args.input: + informat=args.input +verboseprint("input file format:", informat) +outformat='json' +if args.format: + outformat=args.format +verboseprint("output file format:", outformat) + + +#example= '''{ +# "entry" : "asɛm", +# "key": "asem", +# "meanings": [ +# { +# "type" : "noun", +# "description" : "word", +# "examples" : ["Wonim saa asɛm yi?"] +# },{ +# "type": "noun", +# "description": "news", +# "examples" : [] +# } +# ] +#}''' +# KNOWN WORD TYPES +shorttypes={ + 'noun':'n' + } +shorttypes_inversed= {v: k for k, v in shorttypes.items()} +verboseprint("Word types:",shorttypes) +# +# READ JSON FILE +def get_data_from_jsonfile(filename): + verboseprint("opening file:", filename) + with open(filename) as infile: + data = json.load(infile) + verboseprint(json.dumps(data, ensure_ascii=False)) +# print( json.dumps(data, ensure_ascii=False)) + return json.dumps(data, ensure_ascii=False) + +# READ TEX FILE +def get_data_from_texfile(filename): + verboseprint("opening file:", filename) + with open(filename) as infile: + for line in infile: + jsonstring='{"entry": "' + #entry + jsonstring+=line.split('{')[1].split('}')[0]+'", ' + #key + jsonstring+='"key": "' + jsonstring+=line.split('{')[2].split('}')[0]+'", ' + #meanings + number_of_meanings=line.count('\m') + verboseprint("Meanings found:", number_of_meanings) + jsonstring+='"meanings": [ ' + for i in range(number_of_meanings): + #meaning + verboseprint(i) + meaning=line.split("\m")[i+1][1:] + if i == (number_of_meanings-1): + meaning=meaning[:-2] + verboseprint("Meaning:",meaning) + #type + #todo what if no type + if i == 0: + jsonstring+='{ "type": "' + else: + jsonstring+=', { "type": "' + short_wordtype=meaning.split("(")[1].split(")")[0] + wordtype=shorttypes_inversed[short_wordtype] + jsonstring+=wordtype+'", ' + #description + jsonstring+='"description": "' + jsonstring+=meaning.split(") ")[1].split(' ;')[0]+'"' + #examples + jsonstring+=', "examples": [' + if ';' in meaning: + examples=meaning.split('; ')[1] + verboseprint("examples:", examples) + number_of_examples=len(examples.split(', ')) + j=0 + for example in examples.split(','): + j+=1 + if j == number_of_examples: + jsonstring+='"'+example+'"' + else: + jsonstring+='"'+example+'", ' + jsonstring+=' ]' + #close meaning + jsonstring+='}' + jsonstring+=' ] }' + return jsonstring + +def json2tex(entry): + entry_json=json.loads(entry) + entry_tex='\dictentrysorted{'+entry_json['entry']+'}{'+entry_json['key']+'}{' + for meaning in entry_json['meanings']: + #type + wordtype=meaning['type'] + if wordtype in shorttypes: + wordtype=shorttypes[wordtype] + else: + print("VERBOSE: stderr: "+wordtype+" is not in the the known word type list!") + entry_tex+='\m ('+wordtype+') ' + #description + entry_tex+=meaning['description']+' ' + #examples + verboseprint(meaning) + if len(meaning['examples']) != 0: + for example in meaning['examples']: + entry_tex+='; '+example+' ' + entry_tex+='}' + return entry_tex + +def tex2json(entry): + entry_json=json.dumps(json.loads(entry), ensure_ascii=False) + return entry_json + +if informat == "json": + data = get_data_from_jsonfile(inputfile) +elif informat == 'tex': + data = get_data_from_texfile(inputfile) + +if outformat == 'json': + print(data) +elif outformat == 'tex': + print(json2tex(data)) + +exit(0)