#!/usr/bin/env python3 import json import argparse parser = argparse.ArgumentParser( prog='texdict', description='convert LaTeX glossary to json', epilog='onipa' ) parser.add_argument('file', help='input file') parser.add_argument('-i', '--input', choices=('json', 'tex'), help='input format: json, tex, sqlite (default: file extension') parser.add_argument('-f', '--format', choices=('json', 'tex'), help='choose output format: json, tex, sqlite (default: json)') parser.add_argument('-v', '--verbose', action='store_true', help='boolean verbose switch (default: false)') #parser.add_argument('-e', '--entry', help='add a entry to a file') #parser.add_argument('-o', '--output', help='write dictionary file to file') #parser.add_argument('-w', '--write', action='store_true', required=False, help='overwrite input file') args = parser.parse_args() # verboseprint https://stackoverflow.com/a/5980173 verboseprint = print if args.verbose else lambda *a, **k: None inputfile=args.file informat=inputfile.split('.')[-1] #verboseprint(args.input) if args.input: informat=args.input verboseprint("input file format:", informat) outformat='json' if args.format: outformat=args.format verboseprint("output file format:", outformat) # KNOWN WORD TYPES shorttypes={ 'noun':'n' } shorttypes_inversed= {v: k for k, v in shorttypes.items()} verboseprint("Word types:",shorttypes) # # READ JSON FILE def get_data_from_jsonfile(filename): verboseprint("opening file:", filename) with open(filename) as infile: data = json.load(infile) verboseprint(json.dumps(data, ensure_ascii=False)) return json.dumps(data, ensure_ascii=False) # READ TEX FILE def get_data_from_texfile(filename): verboseprint("opening file:", filename) with open(filename) as infile: for line in infile: jsonstring='{"entry": "' #ENTRY jsonstring+=line.split('{')[1].split('}')[0]+'", ' #KEY jsonstring+='"key": "' jsonstring+=line.split('{')[2].split('}')[0]+'", ' #MEANINGS number_of_meanings=line.count('\m') verboseprint("Meanings found:", number_of_meanings) jsonstring+='"meanings": [ ' for i in range(number_of_meanings): #MEANING verboseprint(i) meaning=line.split("\m")[i+1][1:] if i == (number_of_meanings-1): meaning=meaning[:-2] verboseprint("Meaning:",meaning) #TYPE #todo what if no type if i == 0: jsonstring+='{ "type": "' else: jsonstring+=', { "type": "' short_wordtype=meaning.split("(")[1].split(")")[0] wordtype=shorttypes_inversed[short_wordtype] jsonstring+=wordtype+'", ' #DESCRIPTION jsonstring+='"description": "' jsonstring+=meaning.split(") ")[1].split(' ;')[0]+'"' #EXAMPLES jsonstring+=', "examples": [' if ';' in meaning: examples=meaning.split('; ')[1] verboseprint("examples:", examples) number_of_examples=len(examples.split(', ')) j=0 for example in examples.split(','): j+=1 if j == number_of_examples: jsonstring+='"'+example+'"' else: jsonstring+='"'+example+'", ' jsonstring+=' ]' #CLOSE MEANING jsonstring+='}' jsonstring+=' ] }' return jsonstring def json2tex(entry): entry_json=json.loads(entry) entry_tex='\dictentrysorted{'+entry_json['entry']+'}{'+entry_json['key']+'}{' for meaning in entry_json['meanings']: #TYPE wordtype=meaning['type'] if wordtype in shorttypes: wordtype=shorttypes[wordtype] else: print("VERBOSE: stderr: "+wordtype+" is not in the the known word type list!") entry_tex+='\m ('+wordtype+') ' #DESCRIPTION entry_tex+=meaning['description']+' ' #EXAMPLES verboseprint(meaning) if len(meaning['examples']) != 0: for example in meaning['examples']: entry_tex+='; '+example+' ' entry_tex+='}' return entry_tex def tex2json(entry): entry_json=json.dumps(json.loads(entry), ensure_ascii=False) return entry_json if informat == "json": data = get_data_from_jsonfile(inputfile) elif informat == 'tex': data = get_data_from_texfile(inputfile) if outformat == 'json': print(data) elif outformat == 'tex': print(json2tex(data)) exit(0)