Compare commits
No commits in common. "52f4d78fa39d4c26f73dbcbc38a614ae98371e5a" and "4f283b0c7fcaaead72e541ff31b10dda9f9e347e" have entirely different histories.
52f4d78fa3
...
4f283b0c7f
@ -1 +0,0 @@
|
|||||||
{"entry": "asɛm", "key": "asem", "meanings": [ { "type": "noun", "description": "word", "examples": ["Wonim saa asɛm yi?", " Wo nim sa ara asɛm yi? " ]}, { "type": "noun", "description": "issue", "examples": ["Dɛn ne asɛm no? " ]}, { "type": "noun", "description": "news ", "examples": [ ]} ] }
|
|
@ -1 +0,0 @@
|
|||||||
\dictentrysorted{asɛm}{asem}{\m (n) word ; Wonim saa asɛm yi?, Wo nim sa ara asɛm yi? \m (n) issue ; Dɛn ne asɛm no? \m (n) news }
|
|
@ -1,136 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
import json
|
|
||||||
import argparse
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
prog='texdict',
|
|
||||||
description='convert LaTeX glossary to json',
|
|
||||||
epilog='onipa'
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument('file', help='input file')
|
|
||||||
parser.add_argument('-i', '--input', choices=('json', 'tex'), help='input format: json, tex, sqlite (default: file extension')
|
|
||||||
parser.add_argument('-f', '--format', choices=('json', 'tex'), help='choose output format: json, tex, sqlite (default: json)')
|
|
||||||
parser.add_argument('-v', '--verbose', action='store_true', help='boolean verbose switch (default: false)')
|
|
||||||
#parser.add_argument('-e', '--entry', help='add a entry to a file')
|
|
||||||
#parser.add_argument('-o', '--output', help='write dictionary file to file')
|
|
||||||
#parser.add_argument('-w', '--write', action='store_true', required=False, help='overwrite input file')
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
# verboseprint https://stackoverflow.com/a/5980173
|
|
||||||
verboseprint = print if args.verbose else lambda *a, **k: None
|
|
||||||
inputfile=args.file
|
|
||||||
informat=inputfile.split('.')[-1]
|
|
||||||
#verboseprint(args.input)
|
|
||||||
if args.input:
|
|
||||||
informat=args.input
|
|
||||||
verboseprint("input file format:", informat)
|
|
||||||
outformat='json'
|
|
||||||
if args.format:
|
|
||||||
outformat=args.format
|
|
||||||
verboseprint("output file format:", outformat)
|
|
||||||
|
|
||||||
# KNOWN WORD TYPES
|
|
||||||
shorttypes={
|
|
||||||
'noun':'n'
|
|
||||||
}
|
|
||||||
shorttypes_inversed= {v: k for k, v in shorttypes.items()}
|
|
||||||
verboseprint("Word types:",shorttypes)
|
|
||||||
#
|
|
||||||
# READ JSON FILE
|
|
||||||
def get_data_from_jsonfile(filename):
|
|
||||||
verboseprint("opening file:", filename)
|
|
||||||
with open(filename) as infile:
|
|
||||||
data = json.load(infile)
|
|
||||||
verboseprint(json.dumps(data, ensure_ascii=False))
|
|
||||||
return json.dumps(data, ensure_ascii=False)
|
|
||||||
|
|
||||||
# READ TEX FILE
|
|
||||||
def get_data_from_texfile(filename):
|
|
||||||
verboseprint("opening file:", filename)
|
|
||||||
with open(filename) as infile:
|
|
||||||
for line in infile:
|
|
||||||
jsonstring='{"entry": "'
|
|
||||||
#ENTRY
|
|
||||||
jsonstring+=line.split('{')[1].split('}')[0]+'", '
|
|
||||||
#KEY
|
|
||||||
jsonstring+='"key": "'
|
|
||||||
jsonstring+=line.split('{')[2].split('}')[0]+'", '
|
|
||||||
#MEANINGS
|
|
||||||
number_of_meanings=line.count('\m')
|
|
||||||
verboseprint("Meanings found:", number_of_meanings)
|
|
||||||
jsonstring+='"meanings": [ '
|
|
||||||
for i in range(number_of_meanings):
|
|
||||||
#MEANING
|
|
||||||
verboseprint(i)
|
|
||||||
meaning=line.split("\m")[i+1][1:]
|
|
||||||
if i == (number_of_meanings-1):
|
|
||||||
meaning=meaning[:-2]
|
|
||||||
verboseprint("Meaning:",meaning)
|
|
||||||
#TYPE
|
|
||||||
#todo what if no type
|
|
||||||
if i == 0:
|
|
||||||
jsonstring+='{ "type": "'
|
|
||||||
else:
|
|
||||||
jsonstring+=', { "type": "'
|
|
||||||
short_wordtype=meaning.split("(")[1].split(")")[0]
|
|
||||||
wordtype=shorttypes_inversed[short_wordtype]
|
|
||||||
jsonstring+=wordtype+'", '
|
|
||||||
#DESCRIPTION
|
|
||||||
jsonstring+='"description": "'
|
|
||||||
jsonstring+=meaning.split(") ")[1].split(' ;')[0]+'"'
|
|
||||||
#EXAMPLES
|
|
||||||
jsonstring+=', "examples": ['
|
|
||||||
if ';' in meaning:
|
|
||||||
examples=meaning.split('; ')[1]
|
|
||||||
verboseprint("examples:", examples)
|
|
||||||
number_of_examples=len(examples.split(', '))
|
|
||||||
j=0
|
|
||||||
for example in examples.split(','):
|
|
||||||
j+=1
|
|
||||||
if j == number_of_examples:
|
|
||||||
jsonstring+='"'+example+'"'
|
|
||||||
else:
|
|
||||||
jsonstring+='"'+example+'", '
|
|
||||||
jsonstring+=' ]'
|
|
||||||
#CLOSE MEANING
|
|
||||||
jsonstring+='}'
|
|
||||||
jsonstring+=' ] }'
|
|
||||||
return jsonstring
|
|
||||||
|
|
||||||
def json2tex(entry):
|
|
||||||
entry_json=json.loads(entry)
|
|
||||||
entry_tex='\dictentrysorted{'+entry_json['entry']+'}{'+entry_json['key']+'}{'
|
|
||||||
for meaning in entry_json['meanings']:
|
|
||||||
#TYPE
|
|
||||||
wordtype=meaning['type']
|
|
||||||
if wordtype in shorttypes:
|
|
||||||
wordtype=shorttypes[wordtype]
|
|
||||||
else:
|
|
||||||
print("VERBOSE: stderr: "+wordtype+" is not in the the known word type list!")
|
|
||||||
entry_tex+='\m ('+wordtype+') '
|
|
||||||
#DESCRIPTION
|
|
||||||
entry_tex+=meaning['description']+' '
|
|
||||||
#EXAMPLES
|
|
||||||
verboseprint(meaning)
|
|
||||||
if len(meaning['examples']) != 0:
|
|
||||||
for example in meaning['examples']:
|
|
||||||
entry_tex+='; '+example+' '
|
|
||||||
entry_tex+='}'
|
|
||||||
return entry_tex
|
|
||||||
|
|
||||||
def tex2json(entry):
|
|
||||||
entry_json=json.dumps(json.loads(entry), ensure_ascii=False)
|
|
||||||
return entry_json
|
|
||||||
|
|
||||||
if informat == "json":
|
|
||||||
data = get_data_from_jsonfile(inputfile)
|
|
||||||
elif informat == 'tex':
|
|
||||||
data = get_data_from_texfile(inputfile)
|
|
||||||
|
|
||||||
if outformat == 'json':
|
|
||||||
print(data)
|
|
||||||
elif outformat == 'tex':
|
|
||||||
print(json2tex(data))
|
|
||||||
|
|
||||||
exit(0)
|
|
@ -1,2 +0,0 @@
|
|||||||
# TODO:
|
|
||||||
- change such that two entries can be converted
|
|
Loading…
Reference in New Issue
Block a user