texdict.py: get_data_from_texfile() cleaner rewrite attempt
This commit is contained in:
parent
64de297467
commit
c08442f2ea
@ -45,8 +45,114 @@ def get_data_from_jsonfile(filename):
|
|||||||
verboseprint("INFO:", json.dumps(data, ensure_ascii=False))
|
verboseprint("INFO:", json.dumps(data, ensure_ascii=False))
|
||||||
return json.dumps(data, ensure_ascii=False)
|
return json.dumps(data, ensure_ascii=False)
|
||||||
|
|
||||||
|
def find_matching_parens(s, braces=None):
|
||||||
|
openers = braces or {"{": "}"}
|
||||||
|
closers = {v: k for k, v in openers.items()}
|
||||||
|
stack = []
|
||||||
|
result = []
|
||||||
|
|
||||||
|
for i, c in enumerate(s):
|
||||||
|
if c in openers:
|
||||||
|
stack.append([c, i])
|
||||||
|
elif c in closers:
|
||||||
|
if not stack:
|
||||||
|
raise ValueError(f"tried to close brace without an open at position {i}")
|
||||||
|
pair, idx = stack.pop()
|
||||||
|
result.append([idx, i])
|
||||||
|
|
||||||
|
if pair != closers[c]:
|
||||||
|
raise ValueError(f"mismatched brace at position {i}")
|
||||||
|
if stack:
|
||||||
|
raise ValueError(f"no closing brace at position {i}")
|
||||||
|
return result
|
||||||
|
|
||||||
# READ TEX FILE
|
# READ TEX FILE
|
||||||
def get_data_from_texfile(filename):
|
def get_data_from_texfile(filename):
|
||||||
|
# # delimiters
|
||||||
|
# meaning_delim='\m '
|
||||||
|
# type_delim=['(',') ']
|
||||||
|
# description_delim=', '
|
||||||
|
# examples_delim='; '
|
||||||
|
# example_delim=', ' # what if sentence has komma? maybe doch use '. ' what if example is a question???
|
||||||
|
# entry_command='\dictentrysorted'
|
||||||
|
# verboseprint("INFO: opening file:", filename)
|
||||||
|
# with open(filename) as infile:
|
||||||
|
# oldline=''
|
||||||
|
# i=0
|
||||||
|
# jsonstring='{ "entries" : [ '
|
||||||
|
# for line in infile:
|
||||||
|
# i+=1
|
||||||
|
# line=line.strip()
|
||||||
|
# line=line.split('%')[0]
|
||||||
|
# if line.startswith('%'):
|
||||||
|
# continue
|
||||||
|
# if line == '\n':
|
||||||
|
# continue
|
||||||
|
# if oldline != '':
|
||||||
|
# if line.startswith(entry_command):
|
||||||
|
# raise ValueError(f"new entry starts at line {k}, but previous entry not closed!")
|
||||||
|
# else:
|
||||||
|
# line=oldline+line
|
||||||
|
#
|
||||||
|
# try:
|
||||||
|
# matches=find_matching_parens(line)
|
||||||
|
# split_indices=[
|
||||||
|
# matches[0][0], matches[0][1],
|
||||||
|
# matches[1][0], matches[1][1],
|
||||||
|
# matches[-1][0], matches[-1][1]
|
||||||
|
# ]
|
||||||
|
# substrings=[line[start:end] for start, end in zip([0] + split_indices, split_indices + [None])]
|
||||||
|
# entry=substrings[1][1:]
|
||||||
|
# key=substring[3][1:]
|
||||||
|
# meanings=substrings[5][1:]
|
||||||
|
#
|
||||||
|
# #end
|
||||||
|
# oldline=''
|
||||||
|
# except:
|
||||||
|
# oldline=line
|
||||||
|
# continue
|
||||||
|
# #ENTRY
|
||||||
|
# jsonstring+='{"entry": "'
|
||||||
|
# jsonstring+=entry
|
||||||
|
# jsonstring+='", '
|
||||||
|
# #KEY
|
||||||
|
# jsonstring+='"key": "'
|
||||||
|
# jsonstring+=key
|
||||||
|
# jsonstring+='", meanings": [ '
|
||||||
|
# #MEANINGS
|
||||||
|
# j=0
|
||||||
|
# for meaning in meanings.split('\m'):
|
||||||
|
# j+=1
|
||||||
|
# #WORD TYPE
|
||||||
|
# #TODO what if no word type???
|
||||||
|
# type_matches=find_matching_parens(meaning, {'(':')'})
|
||||||
|
# type_split_indices=[0]
|
||||||
|
# meaning_substrings=[meaning[start:end] for start, end in zip([0] + type_split_indices, type_split_indices + [None])]
|
||||||
|
# word_type=meaning_substrings[1][1:]
|
||||||
|
# jsonstring+=' { "type": "'
|
||||||
|
# jsonstring+=word_type
|
||||||
|
# jsonstring+='", '
|
||||||
|
# # DESCRIPTION
|
||||||
|
# descriptions=meaning_substrings[2][2:].split(examples_delim)[0]
|
||||||
|
# jsonstring+='"description": "'
|
||||||
|
# jsonstring+=description
|
||||||
|
# jsonstring+='", '
|
||||||
|
# # EXAMPLES
|
||||||
|
# # only execute if try:
|
||||||
|
# # except: "examples" : []
|
||||||
|
# examples_raw=meaning_substrings[2][2:].split(examples_delim)[1]
|
||||||
|
# examples=examples.split(example_delim)
|
||||||
|
# jsonstring+='"examples": ['
|
||||||
|
# k=0
|
||||||
|
# for example in examples:
|
||||||
|
# k+=1
|
||||||
|
# jsonstring+='"'
|
||||||
|
# jsonstring+=example
|
||||||
|
# jsonstring+='", '
|
||||||
|
# jsonstring+=']????'
|
||||||
|
|
||||||
|
|
||||||
|
#OLD VERSION
|
||||||
verboseprint("INFO: opening file:", filename)
|
verboseprint("INFO: opening file:", filename)
|
||||||
with open(filename) as infile:
|
with open(filename) as infile:
|
||||||
jsonstring='{ "entries" : [ '
|
jsonstring='{ "entries" : [ '
|
||||||
@ -134,18 +240,19 @@ def json2tex(entries):
|
|||||||
entries_tex+="\n"
|
entries_tex+="\n"
|
||||||
return entries_tex
|
return entries_tex
|
||||||
|
|
||||||
def tex2json(entry):
|
#def tex2json(entry):
|
||||||
entry_json=json.dumps(json.loads(entry), ensure_ascii=False)
|
# entry_json=json.dumps(json.loads(entry), ensure_ascii=False)
|
||||||
return entry_json
|
# return entry_json
|
||||||
|
|
||||||
if informat == "json":
|
if __name__ == "__main__":
|
||||||
data = get_data_from_jsonfile(inputfile)
|
if informat == "json":
|
||||||
elif informat == 'tex':
|
data = get_data_from_jsonfile(inputfile)
|
||||||
data = get_data_from_texfile(inputfile)
|
elif informat == 'tex':
|
||||||
|
data = get_data_from_texfile(inputfile)
|
||||||
|
|
||||||
if outformat == 'json':
|
if outformat == 'json':
|
||||||
print(data)
|
print(data)
|
||||||
elif outformat == 'tex':
|
elif outformat == 'tex':
|
||||||
print(json2tex(data))
|
print(json2tex(data))
|
||||||
|
|
||||||
exit(0)
|
exit(0)
|
||||||
|
@ -97,6 +97,21 @@ example.json
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
\glossary{entry}{key}{
|
||||||
|
\m (type) description, description, description ; example, example
|
||||||
|
\m
|
||||||
|
}
|
||||||
|
- tex2json
|
||||||
|
```
|
||||||
|
def tex2json(dict_tex)
|
||||||
|
meaning_delim='\m '
|
||||||
|
type_delim=['(',') ']
|
||||||
|
description_delim=', '
|
||||||
|
examples_delim='; '
|
||||||
|
example_delim=', '
|
||||||
|
# how to cope with multiline entries?
|
||||||
|
- [find closing brackets](https://stackoverflow.com/q/63382152)
|
||||||
|
```
|
||||||
- notes:
|
- notes:
|
||||||
- `type` could also include gender/genus?
|
- `type` could also include gender/genus?
|
||||||
- where should I put info like 'go, went, gone' or plural forms for some words?
|
- where should I put info like 'go, went, gone' or plural forms for some words?
|
||||||
@ -105,3 +120,12 @@ example.json
|
|||||||
- `dialect/usage/gebrauch`
|
- `dialect/usage/gebrauch`
|
||||||
- https://www.duden.de/hilfe/gebrauch
|
- https://www.duden.de/hilfe/gebrauch
|
||||||
- statt `description` = `meaning`?
|
- statt `description` = `meaning`?
|
||||||
|
- what about `see also`s and confere (cf.)
|
||||||
|
- what about `aussprache`?
|
||||||
|
|
||||||
|
```
|
||||||
|
\glossary{entry}{key}{
|
||||||
|
\m (type) description, description, description ; example, example
|
||||||
|
\m
|
||||||
|
}
|
||||||
|
- tex2json
|
||||||
|
Loading…
Reference in New Issue
Block a user