diff --git a/texdict/texdict.py b/texdict/texdict.py index 3e7945d..25d945a 100755 --- a/texdict/texdict.py +++ b/texdict/texdict.py @@ -45,8 +45,114 @@ def get_data_from_jsonfile(filename): verboseprint("INFO:", json.dumps(data, ensure_ascii=False)) return json.dumps(data, ensure_ascii=False) +def find_matching_parens(s, braces=None): + openers = braces or {"{": "}"} + closers = {v: k for k, v in openers.items()} + stack = [] + result = [] + + for i, c in enumerate(s): + if c in openers: + stack.append([c, i]) + elif c in closers: + if not stack: + raise ValueError(f"tried to close brace without an open at position {i}") + pair, idx = stack.pop() + result.append([idx, i]) + + if pair != closers[c]: + raise ValueError(f"mismatched brace at position {i}") + if stack: + raise ValueError(f"no closing brace at position {i}") + return result + # READ TEX FILE def get_data_from_texfile(filename): +# # delimiters +# meaning_delim='\m ' +# type_delim=['(',') '] +# description_delim=', ' +# examples_delim='; ' +# example_delim=', ' # what if sentence has komma? maybe doch use '. ' what if example is a question??? +# entry_command='\dictentrysorted' +# verboseprint("INFO: opening file:", filename) +# with open(filename) as infile: +# oldline='' +# i=0 +# jsonstring='{ "entries" : [ ' +# for line in infile: +# i+=1 +# line=line.strip() +# line=line.split('%')[0] +# if line.startswith('%'): +# continue +# if line == '\n': +# continue +# if oldline != '': +# if line.startswith(entry_command): +# raise ValueError(f"new entry starts at line {k}, but previous entry not closed!") +# else: +# line=oldline+line +# +# try: +# matches=find_matching_parens(line) +# split_indices=[ +# matches[0][0], matches[0][1], +# matches[1][0], matches[1][1], +# matches[-1][0], matches[-1][1] +# ] +# substrings=[line[start:end] for start, end in zip([0] + split_indices, split_indices + [None])] +# entry=substrings[1][1:] +# key=substring[3][1:] +# meanings=substrings[5][1:] +# +# #end +# oldline='' +# except: +# oldline=line +# continue +# #ENTRY +# jsonstring+='{"entry": "' +# jsonstring+=entry +# jsonstring+='", ' +# #KEY +# jsonstring+='"key": "' +# jsonstring+=key +# jsonstring+='", meanings": [ ' +# #MEANINGS +# j=0 +# for meaning in meanings.split('\m'): +# j+=1 +# #WORD TYPE +# #TODO what if no word type??? +# type_matches=find_matching_parens(meaning, {'(':')'}) +# type_split_indices=[0] +# meaning_substrings=[meaning[start:end] for start, end in zip([0] + type_split_indices, type_split_indices + [None])] +# word_type=meaning_substrings[1][1:] +# jsonstring+=' { "type": "' +# jsonstring+=word_type +# jsonstring+='", ' +# # DESCRIPTION +# descriptions=meaning_substrings[2][2:].split(examples_delim)[0] +# jsonstring+='"description": "' +# jsonstring+=description +# jsonstring+='", ' +# # EXAMPLES +# # only execute if try: +# # except: "examples" : [] +# examples_raw=meaning_substrings[2][2:].split(examples_delim)[1] +# examples=examples.split(example_delim) +# jsonstring+='"examples": [' +# k=0 +# for example in examples: +# k+=1 +# jsonstring+='"' +# jsonstring+=example +# jsonstring+='", ' +# jsonstring+=']????' + + + #OLD VERSION verboseprint("INFO: opening file:", filename) with open(filename) as infile: jsonstring='{ "entries" : [ ' @@ -134,18 +240,19 @@ def json2tex(entries): entries_tex+="\n" return entries_tex -def tex2json(entry): - entry_json=json.dumps(json.loads(entry), ensure_ascii=False) - return entry_json +#def tex2json(entry): +# entry_json=json.dumps(json.loads(entry), ensure_ascii=False) +# return entry_json -if informat == "json": - data = get_data_from_jsonfile(inputfile) -elif informat == 'tex': - data = get_data_from_texfile(inputfile) +if __name__ == "__main__": + if informat == "json": + data = get_data_from_jsonfile(inputfile) + elif informat == 'tex': + data = get_data_from_texfile(inputfile) -if outformat == 'json': - print(data) -elif outformat == 'tex': - print(json2tex(data)) + if outformat == 'json': + print(data) + elif outformat == 'tex': + print(json2tex(data)) -exit(0) + exit(0) diff --git a/texdict/todo.md b/texdict/todo.md index a7f7e4a..6692666 100644 --- a/texdict/todo.md +++ b/texdict/todo.md @@ -97,6 +97,21 @@ example.json ] } ``` +\glossary{entry}{key}{ + \m (type) description, description, description ; example, example + \m +} +- tex2json +``` +def tex2json(dict_tex) + meaning_delim='\m ' + type_delim=['(',') '] + description_delim=', ' + examples_delim='; ' + example_delim=', ' + # how to cope with multiline entries? + - [find closing brackets](https://stackoverflow.com/q/63382152) +``` - notes: - `type` could also include gender/genus? - where should I put info like 'go, went, gone' or plural forms for some words? @@ -105,3 +120,12 @@ example.json - `dialect/usage/gebrauch` - https://www.duden.de/hilfe/gebrauch - statt `description` = `meaning`? + - what about `see also`s and confere (cf.) + - what about `aussprache`? + +``` +\glossary{entry}{key}{ + \m (type) description, description, description ; example, example + \m +} +- tex2json