texdict/todo.md: add

textdict.py: first minimal example
2025-07-13 19:39:20 +02:00 · 2025-07-13 19:35:59 +02:00
4 changed files with 140 additions and 0 deletions
--- a/texdict/example.json
+++ b/texdict/example.json
@ -0,0 +1 @@
 {"entry": "asɛm", "key": "asem", "meanings": [ { "type": "noun", "description": "word", "examples": ["Wonim saa asɛm yi?", " Wo nim sa ara asɛm yi? " ]}, { "type": "noun", "description": "issue", "examples": ["Dɛn ne asɛm no? " ]}, { "type": "noun", "description": "news ", "examples": [ ]} ] }
--- a/texdict/example.tex
+++ b/texdict/example.tex
@ -0,0 +1 @@
 \dictentrysorted{asɛm}{asem}{\m (n) word ; Wonim saa asɛm yi?, Wo nim sa ara asɛm yi? \m (n) issue ; Dɛn ne asɛm no? \m (n) news }
--- a/texdict/texdict.py
+++ b/texdict/texdict.py
@ -0,0 +1,136 @@
 #!/usr/bin/env python3 
 import json
 import argparse 
 parser = argparse.ArgumentParser(
    prog='texdict',
    description='convert LaTeX glossary to json',
    epilog='onipa'
 )
 parser.add_argument('file', help='input file')
 parser.add_argument('-i', '--input', choices=('json', 'tex'), help='input format: json, tex, sqlite (default: file extension')
 parser.add_argument('-f', '--format', choices=('json', 'tex'), help='choose output format: json, tex, sqlite (default: json)')
 parser.add_argument('-v', '--verbose', action='store_true', help='boolean verbose switch (default: false)')  
 #parser.add_argument('-e', '--entry', help='add a entry to a file')
 #parser.add_argument('-o', '--output', help='write dictionary file to file')
 #parser.add_argument('-w', '--write', action='store_true', required=False, help='overwrite input file')
 args = parser.parse_args()
 # verboseprint https://stackoverflow.com/a/5980173
 verboseprint = print if args.verbose else lambda *a, **k: None
 inputfile=args.file
 informat=inputfile.split('.')[-1]
 #verboseprint(args.input)
 if args.input:
    informat=args.input
 verboseprint("input file format:", informat)
 outformat='json'
 if args.format:
    outformat=args.format
 verboseprint("output file format:", outformat)
 # KNOWN WORD TYPES
 shorttypes={
        'noun':'n'
        }
 shorttypes_inversed= {v: k for k, v in shorttypes.items()}
 verboseprint("Word types:",shorttypes)
 #
 # READ JSON FILE
 def get_data_from_jsonfile(filename):
    verboseprint("opening file:", filename)
    with open(filename) as infile:
        data = json.load(infile)
        verboseprint(json.dumps(data, ensure_ascii=False))
        return json.dumps(data, ensure_ascii=False)
 # READ TEX FILE
 def get_data_from_texfile(filename):
    verboseprint("opening file:", filename)
    with open(filename) as infile:
        for line in infile:
            jsonstring='{"entry": "'
            #ENTRY
            jsonstring+=line.split('{')[1].split('}')[0]+'", '
            #KEY
            jsonstring+='"key": "'
            jsonstring+=line.split('{')[2].split('}')[0]+'", '
            #MEANINGS
            number_of_meanings=line.count('\m')
            verboseprint("Meanings found:", number_of_meanings)
            jsonstring+='"meanings": [ '
            for i in range(number_of_meanings):
                #MEANING
                verboseprint(i)
                meaning=line.split("\m")[i+1][1:]
                if i == (number_of_meanings-1):
                    meaning=meaning[:-2]
                verboseprint("Meaning:",meaning)
                #TYPE
                #todo what if no type
                if i == 0:
                    jsonstring+='{ "type": "'
                else:
                    jsonstring+=', { "type": "'
                short_wordtype=meaning.split("(")[1].split(")")[0]
                wordtype=shorttypes_inversed[short_wordtype]
                jsonstring+=wordtype+'", '
                #DESCRIPTION
                jsonstring+='"description": "'
                jsonstring+=meaning.split(") ")[1].split(' ;')[0]+'"'
                #EXAMPLES
                jsonstring+=', "examples": ['
                if ';' in meaning:
                    examples=meaning.split('; ')[1]
                    verboseprint("examples:", examples)
                    number_of_examples=len(examples.split(', '))
                    j=0 
                    for example in examples.split(','):
                        j+=1
                        if j == number_of_examples:
                            jsonstring+='"'+example+'"'
                        else:
                            jsonstring+='"'+example+'", '
                jsonstring+=' ]'
                #CLOSE MEANING
                jsonstring+='}'
            jsonstring+=' ] }'
    return jsonstring
 def json2tex(entry):
    entry_json=json.loads(entry)
    entry_tex='\dictentrysorted{'+entry_json['entry']+'}{'+entry_json['key']+'}{'
    for meaning in entry_json['meanings']:
        #TYPE
        wordtype=meaning['type']
        if wordtype in shorttypes:
            wordtype=shorttypes[wordtype]
        else:
            print("VERBOSE: stderr: "+wordtype+" is not in the the known word type list!")
        entry_tex+='\m ('+wordtype+') '
        #DESCRIPTION
        entry_tex+=meaning['description']+' '
        #EXAMPLES
        verboseprint(meaning)
        if len(meaning['examples']) != 0: 
            for example in meaning['examples']:
                entry_tex+='; '+example+' '
    entry_tex+='}'
    return entry_tex
 def tex2json(entry):
    entry_json=json.dumps(json.loads(entry), ensure_ascii=False)
    return entry_json
 if informat == "json":
    data = get_data_from_jsonfile(inputfile)
 elif informat == 'tex':
    data = get_data_from_texfile(inputfile)
 if outformat == 'json':
    print(data)
 elif outformat == 'tex':
    print(json2tex(data))
 exit(0)
--- a/texdict/todo.md
+++ b/texdict/todo.md
@ -0,0 +1,2 @@
 # TODO:
 - change such that two entries can be converted
Author	SHA1	Message	Date
uni@bor.mac	52f4d78fa3	texdict/todo.md: add	2025-07-13 19:39:20 +02:00
uni@bor.mac	18ef47c536	textdict.py: first minimal example	2025-07-13 19:35:59 +02:00
		`@ -0,0 +1 @@`
							`{"entry": "asɛm", "key": "asem", "meanings": [ { "type": "noun", "description": "word", "examples": ["Wonim saa asɛm yi?", " Wo nim sa ara asɛm yi? " ]}, { "type": "noun", "description": "issue", "examples": ["Dɛn ne asɛm no? " ]}, { "type": "noun", "description": "news ", "examples": [ ]} ] }`
		`@ -0,0 +1 @@`
							`\dictentrysorted{asɛm}{asem}{\m (n) word ; Wonim saa asɛm yi?, Wo nim sa ara asɛm yi? \m (n) issue ; Dɛn ne asɛm no? \m (n) news }`
		`@ -0,0 +1,2 @@`
							`# TODO:`
							`- change such that two entries can be converted`