#!/usr/bin/env python3 import json import argparse parser = argparse.ArgumentParser( prog='texdict', description='convert LaTeX glossary to json', epilog='onipa' ) parser.add_argument('file', help='input file') parser.add_argument('-i', '--input', choices=('json', 'tex'), help='input format: json, tex (default: json; sqlite might be added)') parser.add_argument('-o', '--output', choices=('json', 'tex'), help='output format: json, tex (default: json; sqlite might be added)') parser.add_argument('-v', '--verbose', action='store_true', help='boolean verbose switch (default: false)') #parser.add_argument('-e', '--entry', help='add a entry to a file') #parser.add_argument('-o', '--output', help='write dictionary file to file') #parser.add_argument('-w', '--write', action='store_true', required=False, help='overwrite input file') args = parser.parse_args() # def verboseprint() https://stackoverflow.com/a/5980173 verboseprint = print if args.verbose else lambda *a, **k: None inputfile=args.file informat=inputfile.split('.')[-1] #verboseprint(args.input) if args.input: informat=args.input verboseprint("INFO: input file format:", informat) outformat='json' if args.output: outformat=args.output verboseprint("INFO: output file format:", outformat) # KNOWN WORD TYPES shorttypes={ 'noun':'n' } shorttypes_inversed= {v: k for k, v in shorttypes.items()} verboseprint("INFO: Word types:",shorttypes) # # READ JSON FILE def get_data_from_jsonfile(filename): verboseprint("INFO: opening file:", filename) with open(filename) as infile: data = json.load(infile) verboseprint("INFO:", json.dumps(data, ensure_ascii=False)) return json.dumps(data, ensure_ascii=False) # READ TEX FILE def get_data_from_texfile(filename): verboseprint("INFO: opening file:", filename) with open(filename) as infile: jsonstring='{ "entries" : [ ' j=0 for line in infile: if line == '\n': continue if line.startswith('%'): continue if j!=0: jsonstring+=', ' j+=1 jsonstring+='{"entry": "' #ENTRY jsonstring+=line.split('{')[1].split('}')[0]+'", ' #KEY jsonstring+='"key": "' jsonstring+=line.split('{')[2].split('}')[0]+'", ' #MEANINGS number_of_meanings=line.count('\m') verboseprint("INFO: Meanings found:", number_of_meanings) jsonstring+='"meanings": [ ' for i in range(number_of_meanings): #MEANING verboseprint("INFO: number of meanins", i) meaning=line.split("\m")[i+1][1:] if i == (number_of_meanings-1): meaning=meaning[:-2] verboseprint("INFO: Meaning:",meaning) #TYPE #todo what if no type if i == 0: jsonstring+='{ "type": "' else: jsonstring+=', { "type": "' short_wordtype=meaning.split("(")[1].split(")")[0] wordtype=shorttypes_inversed[short_wordtype] jsonstring+=wordtype+'", ' #DESCRIPTION jsonstring+='"description": "' jsonstring+=meaning.split(") ")[1].split(' ;')[0]+'"' #EXAMPLES jsonstring+=', "examples": [' if ';' in meaning: examples=meaning.split('; ')[1] verboseprint("INFO: examples:", examples) number_of_examples=len(examples.split(', ')) j=0 for example in examples.split(','): j+=1 if j == number_of_examples: jsonstring+='"'+example+'"' else: jsonstring+='"'+example+'", ' jsonstring+=' ]' #CLOSE MEANING jsonstring+='}' jsonstring+=' ] }' jsonstring+='] }' return jsonstring def json2tex(entries): entries_json=json.loads(entries) entries_tex="" for entry_json in entries_json["entries"]: # entry_json=json.loads(entry) entry_tex='\dictentrysorted{'+entry_json['entry']+'}{'+entry_json['key']+'}{' for meaning in entry_json['meanings']: #TYPE wordtype=meaning['type'] if wordtype in shorttypes: wordtype=shorttypes[wordtype] else: print("VERBOSE: stderr: "+wordtype+" is not in the the known word type list!") entry_tex+='\m ('+wordtype+') ' #DESCRIPTION entry_tex+=meaning['description']+' ' #EXAMPLES verboseprint("INFO:", meaning) if len(meaning['examples']) != 0: for example in meaning['examples']: entry_tex+='; '+example+' ' entry_tex+='}' entries_tex+=entry_tex entries_tex+="\n" return entries_tex def tex2json(entry): entry_json=json.dumps(json.loads(entry), ensure_ascii=False) return entry_json if informat == "json": data = get_data_from_jsonfile(inputfile) elif informat == 'tex': data = get_data_from_texfile(inputfile) if outformat == 'json': print(data) elif outformat == 'tex': print(json2tex(data)) exit(0)