texdict.py: get_data_from_texfile() cleaner rewrite attempt
This commit is contained in:
parent
64de297467
commit
c08442f2ea
@ -45,8 +45,114 @@ def get_data_from_jsonfile(filename):
|
||||
verboseprint("INFO:", json.dumps(data, ensure_ascii=False))
|
||||
return json.dumps(data, ensure_ascii=False)
|
||||
|
||||
def find_matching_parens(s, braces=None):
|
||||
openers = braces or {"{": "}"}
|
||||
closers = {v: k for k, v in openers.items()}
|
||||
stack = []
|
||||
result = []
|
||||
|
||||
for i, c in enumerate(s):
|
||||
if c in openers:
|
||||
stack.append([c, i])
|
||||
elif c in closers:
|
||||
if not stack:
|
||||
raise ValueError(f"tried to close brace without an open at position {i}")
|
||||
pair, idx = stack.pop()
|
||||
result.append([idx, i])
|
||||
|
||||
if pair != closers[c]:
|
||||
raise ValueError(f"mismatched brace at position {i}")
|
||||
if stack:
|
||||
raise ValueError(f"no closing brace at position {i}")
|
||||
return result
|
||||
|
||||
# READ TEX FILE
|
||||
def get_data_from_texfile(filename):
|
||||
# # delimiters
|
||||
# meaning_delim='\m '
|
||||
# type_delim=['(',') ']
|
||||
# description_delim=', '
|
||||
# examples_delim='; '
|
||||
# example_delim=', ' # what if sentence has komma? maybe doch use '. ' what if example is a question???
|
||||
# entry_command='\dictentrysorted'
|
||||
# verboseprint("INFO: opening file:", filename)
|
||||
# with open(filename) as infile:
|
||||
# oldline=''
|
||||
# i=0
|
||||
# jsonstring='{ "entries" : [ '
|
||||
# for line in infile:
|
||||
# i+=1
|
||||
# line=line.strip()
|
||||
# line=line.split('%')[0]
|
||||
# if line.startswith('%'):
|
||||
# continue
|
||||
# if line == '\n':
|
||||
# continue
|
||||
# if oldline != '':
|
||||
# if line.startswith(entry_command):
|
||||
# raise ValueError(f"new entry starts at line {k}, but previous entry not closed!")
|
||||
# else:
|
||||
# line=oldline+line
|
||||
#
|
||||
# try:
|
||||
# matches=find_matching_parens(line)
|
||||
# split_indices=[
|
||||
# matches[0][0], matches[0][1],
|
||||
# matches[1][0], matches[1][1],
|
||||
# matches[-1][0], matches[-1][1]
|
||||
# ]
|
||||
# substrings=[line[start:end] for start, end in zip([0] + split_indices, split_indices + [None])]
|
||||
# entry=substrings[1][1:]
|
||||
# key=substring[3][1:]
|
||||
# meanings=substrings[5][1:]
|
||||
#
|
||||
# #end
|
||||
# oldline=''
|
||||
# except:
|
||||
# oldline=line
|
||||
# continue
|
||||
# #ENTRY
|
||||
# jsonstring+='{"entry": "'
|
||||
# jsonstring+=entry
|
||||
# jsonstring+='", '
|
||||
# #KEY
|
||||
# jsonstring+='"key": "'
|
||||
# jsonstring+=key
|
||||
# jsonstring+='", meanings": [ '
|
||||
# #MEANINGS
|
||||
# j=0
|
||||
# for meaning in meanings.split('\m'):
|
||||
# j+=1
|
||||
# #WORD TYPE
|
||||
# #TODO what if no word type???
|
||||
# type_matches=find_matching_parens(meaning, {'(':')'})
|
||||
# type_split_indices=[0]
|
||||
# meaning_substrings=[meaning[start:end] for start, end in zip([0] + type_split_indices, type_split_indices + [None])]
|
||||
# word_type=meaning_substrings[1][1:]
|
||||
# jsonstring+=' { "type": "'
|
||||
# jsonstring+=word_type
|
||||
# jsonstring+='", '
|
||||
# # DESCRIPTION
|
||||
# descriptions=meaning_substrings[2][2:].split(examples_delim)[0]
|
||||
# jsonstring+='"description": "'
|
||||
# jsonstring+=description
|
||||
# jsonstring+='", '
|
||||
# # EXAMPLES
|
||||
# # only execute if try:
|
||||
# # except: "examples" : []
|
||||
# examples_raw=meaning_substrings[2][2:].split(examples_delim)[1]
|
||||
# examples=examples.split(example_delim)
|
||||
# jsonstring+='"examples": ['
|
||||
# k=0
|
||||
# for example in examples:
|
||||
# k+=1
|
||||
# jsonstring+='"'
|
||||
# jsonstring+=example
|
||||
# jsonstring+='", '
|
||||
# jsonstring+=']????'
|
||||
|
||||
|
||||
#OLD VERSION
|
||||
verboseprint("INFO: opening file:", filename)
|
||||
with open(filename) as infile:
|
||||
jsonstring='{ "entries" : [ '
|
||||
@ -134,18 +240,19 @@ def json2tex(entries):
|
||||
entries_tex+="\n"
|
||||
return entries_tex
|
||||
|
||||
def tex2json(entry):
|
||||
entry_json=json.dumps(json.loads(entry), ensure_ascii=False)
|
||||
return entry_json
|
||||
#def tex2json(entry):
|
||||
# entry_json=json.dumps(json.loads(entry), ensure_ascii=False)
|
||||
# return entry_json
|
||||
|
||||
if informat == "json":
|
||||
if __name__ == "__main__":
|
||||
if informat == "json":
|
||||
data = get_data_from_jsonfile(inputfile)
|
||||
elif informat == 'tex':
|
||||
elif informat == 'tex':
|
||||
data = get_data_from_texfile(inputfile)
|
||||
|
||||
if outformat == 'json':
|
||||
if outformat == 'json':
|
||||
print(data)
|
||||
elif outformat == 'tex':
|
||||
elif outformat == 'tex':
|
||||
print(json2tex(data))
|
||||
|
||||
exit(0)
|
||||
exit(0)
|
||||
|
@ -97,6 +97,21 @@ example.json
|
||||
]
|
||||
}
|
||||
```
|
||||
\glossary{entry}{key}{
|
||||
\m (type) description, description, description ; example, example
|
||||
\m
|
||||
}
|
||||
- tex2json
|
||||
```
|
||||
def tex2json(dict_tex)
|
||||
meaning_delim='\m '
|
||||
type_delim=['(',') ']
|
||||
description_delim=', '
|
||||
examples_delim='; '
|
||||
example_delim=', '
|
||||
# how to cope with multiline entries?
|
||||
- [find closing brackets](https://stackoverflow.com/q/63382152)
|
||||
```
|
||||
- notes:
|
||||
- `type` could also include gender/genus?
|
||||
- where should I put info like 'go, went, gone' or plural forms for some words?
|
||||
@ -105,3 +120,12 @@ example.json
|
||||
- `dialect/usage/gebrauch`
|
||||
- https://www.duden.de/hilfe/gebrauch
|
||||
- statt `description` = `meaning`?
|
||||
- what about `see also`s and confere (cf.)
|
||||
- what about `aussprache`?
|
||||
|
||||
```
|
||||
\glossary{entry}{key}{
|
||||
\m (type) description, description, description ; example, example
|
||||
\m
|
||||
}
|
||||
- tex2json
|
||||
|
Loading…
Reference in New Issue
Block a user