textdict.py: first minimal example

This commit is contained in:
uni@bor.mac 2025-07-13 19:35:59 +02:00
parent 4f283b0c7f
commit 18ef47c536
3 changed files with 155 additions and 0 deletions

1
texdict/example.json Normal file
View File

@ -0,0 +1 @@
{"entry": "asɛm", "key": "asem", "meanings": [ { "type": "noun", "description": "word", "examples": ["Wonim saa asɛm yi?", " Wo nim sa ara asɛm yi? " ]}, { "type": "noun", "description": "issue", "examples": ["Dɛn ne asɛm no? " ]}, { "type": "noun", "description": "news ", "examples": [ ]} ] }

1
texdict/example.tex Normal file
View File

@ -0,0 +1 @@
\dictentrysorted{asɛm}{asem}{\m (n) word ; Wonim saa asɛm yi?, Wo nim sa ara asɛm yi? \m (n) issue ; Dɛn ne asɛm no? \m (n) news }

153
texdict/texdict.py Executable file
View File

@ -0,0 +1,153 @@
#!/usr/bin/env python3
import json
import argparse
parser = argparse.ArgumentParser(
prog='texdict',
description='convert LaTeX glossary to json',
epilog='onipa'
)
parser.add_argument('file', help='input file')
parser.add_argument('-i', '--input', choices=('json', 'tex'), help='input format: json, tex, sqlite (default: file extension')
#parser.add_argument('-e', '--entry', help='add a entry to a file')
#parser.add_argument('-w', '--write', action='store_true', required=False, help='overwrite input file')
parser.add_argument('-f', '--format', choices=('json', 'tex'), help='choose output format: json, tex, sqlite (default: json)')
#parser.add_argument('-o', '--output', help='write dictionary file to file')
parser.add_argument('-v', '--verbose', action='store_true', help='boolean verbose switch (default: false)')
args = parser.parse_args()
# verboseprint https://stackoverflow.com/a/5980173
verboseprint = print if args.verbose else lambda *a, **k: None
inputfile=args.file
informat=inputfile.split('.')[-1]
#verboseprint(args.input)
if args.input:
informat=args.input
verboseprint("input file format:", informat)
outformat='json'
if args.format:
outformat=args.format
verboseprint("output file format:", outformat)
#example= '''{
# "entry" : "asɛm",
# "key": "asem",
# "meanings": [
# {
# "type" : "noun",
# "description" : "word",
# "examples" : ["Wonim saa asɛm yi?"]
# },{
# "type": "noun",
# "description": "news",
# "examples" : []
# }
# ]
#}'''
# KNOWN WORD TYPES
shorttypes={
'noun':'n'
}
shorttypes_inversed= {v: k for k, v in shorttypes.items()}
verboseprint("Word types:",shorttypes)
#
# READ JSON FILE
def get_data_from_jsonfile(filename):
verboseprint("opening file:", filename)
with open(filename) as infile:
data = json.load(infile)
verboseprint(json.dumps(data, ensure_ascii=False))
# print( json.dumps(data, ensure_ascii=False))
return json.dumps(data, ensure_ascii=False)
# READ TEX FILE
def get_data_from_texfile(filename):
verboseprint("opening file:", filename)
with open(filename) as infile:
for line in infile:
jsonstring='{"entry": "'
#entry
jsonstring+=line.split('{')[1].split('}')[0]+'", '
#key
jsonstring+='"key": "'
jsonstring+=line.split('{')[2].split('}')[0]+'", '
#meanings
number_of_meanings=line.count('\m')
verboseprint("Meanings found:", number_of_meanings)
jsonstring+='"meanings": [ '
for i in range(number_of_meanings):
#meaning
verboseprint(i)
meaning=line.split("\m")[i+1][1:]
if i == (number_of_meanings-1):
meaning=meaning[:-2]
verboseprint("Meaning:",meaning)
#type
#todo what if no type
if i == 0:
jsonstring+='{ "type": "'
else:
jsonstring+=', { "type": "'
short_wordtype=meaning.split("(")[1].split(")")[0]
wordtype=shorttypes_inversed[short_wordtype]
jsonstring+=wordtype+'", '
#description
jsonstring+='"description": "'
jsonstring+=meaning.split(") ")[1].split(' ;')[0]+'"'
#examples
jsonstring+=', "examples": ['
if ';' in meaning:
examples=meaning.split('; ')[1]
verboseprint("examples:", examples)
number_of_examples=len(examples.split(', '))
j=0
for example in examples.split(','):
j+=1
if j == number_of_examples:
jsonstring+='"'+example+'"'
else:
jsonstring+='"'+example+'", '
jsonstring+=' ]'
#close meaning
jsonstring+='}'
jsonstring+=' ] }'
return jsonstring
def json2tex(entry):
entry_json=json.loads(entry)
entry_tex='\dictentrysorted{'+entry_json['entry']+'}{'+entry_json['key']+'}{'
for meaning in entry_json['meanings']:
#type
wordtype=meaning['type']
if wordtype in shorttypes:
wordtype=shorttypes[wordtype]
else:
print("VERBOSE: stderr: "+wordtype+" is not in the the known word type list!")
entry_tex+='\m ('+wordtype+') '
#description
entry_tex+=meaning['description']+' '
#examples
verboseprint(meaning)
if len(meaning['examples']) != 0:
for example in meaning['examples']:
entry_tex+='; '+example+' '
entry_tex+='}'
return entry_tex
def tex2json(entry):
entry_json=json.dumps(json.loads(entry), ensure_ascii=False)
return entry_json
if informat == "json":
data = get_data_from_jsonfile(inputfile)
elif informat == 'tex':
data = get_data_from_texfile(inputfile)
if outformat == 'json':
print(data)
elif outformat == 'tex':
print(json2tex(data))
exit(0)