#!/usr/bin/env python3 import json import argparse parser = argparse.ArgumentParser( prog='texdict', description='convert LaTeX glossary to json', epilog='onipa' ) parser.add_argument('file', help='input file') parser.add_argument('-i', '--input', choices=('json', 'tex'), help='input format: json, tex (default: json; sqlite might be added)') parser.add_argument('-o', '--output', choices=('json', 'tex'), help='output format: json, tex (default: json; sqlite might be added)') parser.add_argument('-v', '--verbose', action='store_true', help='boolean verbose switch (default: false)') #parser.add_argument('-e', '--entry', help='add a entry to a file') #parser.add_argument('-o', '--output', help='write dictionary file to file') #parser.add_argument('-w', '--write', action='store_true', required=False, help='overwrite input file') args = parser.parse_args() # def verboseprint() https://stackoverflow.com/a/5980173 verboseprint = print if args.verbose else lambda *a, **k: None inputfile=args.file informat=inputfile.split('.')[-1] #verboseprint(args.input) if args.input: informat=args.input verboseprint("INFO: input file format:", informat) outformat='json' if args.output: outformat=args.output verboseprint("INFO: output file format:", outformat) # KNOWN WORD TYPES shorttypes={ 'noun':'n' } shorttypes_inversed= {v: k for k, v in shorttypes.items()} verboseprint("INFO: Word types:",shorttypes) # # READ JSON FILE def get_data_from_jsonfile(filename): verboseprint("INFO: opening file:", filename) with open(filename) as infile: data = json.load(infile) verboseprint("INFO:", json.dumps(data, ensure_ascii=False)) return json.dumps(data, ensure_ascii=False) def find_matching_parens(s, braces=None): openers = braces or {"{": "}"} closers = {v: k for k, v in openers.items()} stack = [] result = [] for i, c in enumerate(s): if c in openers: stack.append([c, i]) elif c in closers: if not stack: raise ValueError(f"tried to close brace without an open at position {i}") pair, idx = stack.pop() result.append([idx, i]) if pair != closers[c]: raise ValueError(f"mismatched brace at position {i}") if stack: raise ValueError(f"no closing brace at position {i}") return result # READ TEX FILE def get_data_from_texfile(filename): # # delimiters # meaning_delim='\m ' # type_delim=['(',') '] # description_delim=', ' # examples_delim='; ' # example_delim=', ' # what if sentence has komma? maybe doch use '. ' what if example is a question??? # entry_command='\dictentrysorted' # verboseprint("INFO: opening file:", filename) # with open(filename) as infile: # oldline='' # i=0 # jsonstring='{ "entries" : [ ' # for line in infile: # i+=1 # line=line.strip() # line=line.split('%')[0] # if line.startswith('%'): # continue # if line == '\n': # continue # if oldline != '': # if line.startswith(entry_command): # raise ValueError(f"new entry starts at line {k}, but previous entry not closed!") # else: # line=oldline+line # # try: # matches=find_matching_parens(line) # split_indices=[ # matches[0][0], matches[0][1], # matches[1][0], matches[1][1], # matches[-1][0], matches[-1][1] # ] # substrings=[line[start:end] for start, end in zip([0] + split_indices, split_indices + [None])] # entry=substrings[1][1:] # key=substring[3][1:] # meanings=substrings[5][1:] # # #end # oldline='' # except: # oldline=line # continue # #ENTRY # jsonstring+='{"entry": "' # jsonstring+=entry # jsonstring+='", ' # #KEY # jsonstring+='"key": "' # jsonstring+=key # jsonstring+='", meanings": [ ' # #MEANINGS # j=0 # for meaning in meanings.split('\m'): # j+=1 # #WORD TYPE # #TODO what if no word type??? # type_matches=find_matching_parens(meaning, {'(':')'}) # type_split_indices=[0] # meaning_substrings=[meaning[start:end] for start, end in zip([0] + type_split_indices, type_split_indices + [None])] # word_type=meaning_substrings[1][1:] # jsonstring+=' { "type": "' # jsonstring+=word_type # jsonstring+='", ' # # DESCRIPTION # descriptions=meaning_substrings[2][2:].split(examples_delim)[0] # jsonstring+='"description": "' # jsonstring+=description # jsonstring+='", ' # # EXAMPLES # # only execute if try: # # except: "examples" : [] # examples_raw=meaning_substrings[2][2:].split(examples_delim)[1] # examples=examples.split(example_delim) # jsonstring+='"examples": [' # k=0 # for example in examples: # k+=1 # jsonstring+='"' # jsonstring+=example # jsonstring+='", ' # jsonstring+=']????' #OLD VERSION verboseprint("INFO: opening file:", filename) with open(filename) as infile: jsonstring='{ "entries" : [ ' j=0 for line in infile: if line == '\n': continue if line.startswith('%'): continue if j!=0: jsonstring+=', ' j+=1 jsonstring+='{"entry": "' #ENTRY jsonstring+=line.split('{')[1].split('}')[0]+'", ' #KEY jsonstring+='"key": "' jsonstring+=line.split('{')[2].split('}')[0]+'", ' #MEANINGS number_of_meanings=line.count('\m') verboseprint("INFO: Meanings found:", number_of_meanings) jsonstring+='"meanings": [ ' for i in range(number_of_meanings): #MEANING verboseprint("INFO: number of meanins", i) meaning=line.split("\m")[i+1][1:] if i == (number_of_meanings-1): meaning=meaning[:-2] verboseprint("INFO: Meaning:",meaning) #TYPE #todo what if no type if i == 0: jsonstring+='{ "type": "' else: jsonstring+=', { "type": "' short_wordtype=meaning.split("(")[1].split(")")[0] wordtype=shorttypes_inversed[short_wordtype] jsonstring+=wordtype+'", ' #DESCRIPTION jsonstring+='"description": "' jsonstring+=meaning.split(") ")[1].split(' ;')[0]+'"' #EXAMPLES jsonstring+=', "examples": [' if ';' in meaning: examples=meaning.split('; ')[1] verboseprint("INFO: examples:", examples) number_of_examples=len(examples.split(', ')) j=0 for example in examples.split(','): j+=1 if j == number_of_examples: jsonstring+='"'+example+'"' else: jsonstring+='"'+example+'", ' jsonstring+=' ]' #CLOSE MEANING jsonstring+='}' jsonstring+=' ] }' jsonstring+='] }' return jsonstring def json2tex(entries): entries_json=json.loads(entries) entries_tex="" for entry_json in entries_json["entries"]: # entry_json=json.loads(entry) entry_tex='\dictentrysorted{'+entry_json['entry']+'}{'+entry_json['key']+'}{' for meaning in entry_json['meanings']: #TYPE wordtype=meaning['type'] if wordtype in shorttypes: wordtype=shorttypes[wordtype] else: print("VERBOSE: stderr: "+wordtype+" is not in the the known word type list!") entry_tex+='\m ('+wordtype+') ' #DESCRIPTION entry_tex+=meaning['description']+' ' #EXAMPLES verboseprint("INFO:", meaning) if len(meaning['examples']) != 0: for example in meaning['examples']: entry_tex+='; '+example+' ' entry_tex+='}' entries_tex+=entry_tex entries_tex+="\n" return entries_tex #def tex2json(entry): # entry_json=json.dumps(json.loads(entry), ensure_ascii=False) # return entry_json if __name__ == "__main__": if informat == "json": data = get_data_from_jsonfile(inputfile) elif informat == 'tex': data = get_data_from_texfile(inputfile) if outformat == 'json': print(data) elif outformat == 'tex': print(json2tex(data)) exit(0)