forked from AllenDowney/ThinkPython2
-
Notifications
You must be signed in to change notification settings - Fork 12
Expand file tree
/
Copy pathsplit_glossary.py
More file actions
executable file
·117 lines (90 loc) · 3.69 KB
/
split_glossary.py
File metadata and controls
executable file
·117 lines (90 loc) · 3.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env python3
"""
Generate consolidated RST glossary file and chapter include files from
the CSV glossary files.
"""
import glob
import os
import csv
import collections
import operator
import unicodedata
import string
from join_glossary import expected_entries
RST_PATH = '../book/'
GLOSSARY_DATA_PATH = '../data'
FIELDS = 'term us_term br_term chapter order us_definition br_definition'.split()
GLOSSARY_HEAD = '''\
Glossário Consolidado
=====================
Este glossário é a união de todos os glossários dos capítulos.
Cada entrada está vinculada ao capítulo onde ela aparece, por exemplo:
*bug* :ref:`[1] <glossary01>`.
Note que alguns termos aparecem em mais de um capítulo, às vezes
com definições diferentes, de acordo com o contexto.
\n\n'''
GlossaryEntry = collections.namedtuple('GlossaryEntry', FIELDS)
def asciize(txt):
"""Return only ASCII letters from text"""
return ''.join(c for c in unicodedata.normalize('NFD', txt)
if c in string.ascii_lowercase)
def master_order(entry):
return asciize(entry.term.casefold()) + '|' + entry.chapter
def read_glossary():
master_glossary = []
glossaries = collections.defaultdict(list)
paths = glob.glob(os.path.join(GLOSSARY_DATA_PATH, '*.csv'))
for nome_arq in paths:
with open(nome_arq) as csvfile:
reader = csv.DictReader(csvfile, FIELDS)
next(reader) # skip header line
for row in reader:
row['order'] = int(row['order'])
import pdb; pdb.set_trace()
entry = GlossaryEntry(**row)
#print(entry)
glossaries[row['chapter']].append(entry)
master_glossary.append(entry)
return master_glossary, glossaries
def formatted_head(entry):
us_term = entry.us_term
if '``' in us_term:
if us_term != '``None``':
symbol, noun = us_term.split()
us_term = '{} *{}*'.format(symbol, noun)
else:
us_term = '*{}*'.format(us_term)
if entry.br_term == '-': # no BR term
head = us_term
elif entry.term != entry.us_term: # adopted BR term
head = '{} ({})'.format(entry.term, us_term)
else: # adopted US term
head = '{} ({})'.format(us_term, entry.br_term)
return head
def main():
master_glossary, chapter_glossaries = read_glossary()
link_fmt = ':ref:`[1] <glossary01>`'
out_path = os.path.join(RST_PATH, 'C-glossary.rst')
with open(out_path, 'wt', encoding='utf-8') as out_file:
out_file.write(GLOSSARY_HEAD)
for entry in sorted(master_glossary, key=master_order):
short_chapter = entry.chapter.lstrip('0')
definition = entry.br_definition.strip() or entry.us_definition.strip()
out_file.write('{} :ref:`[{}] <glossary{}>`\n {}\n\n'
.format(formatted_head(entry), short_chapter,
entry.chapter, definition))
for chapter_id, entries_qty in expected_entries:
chapter_glob = os.path.join(RST_PATH, chapter_id+'*.rst')
found = glob.glob(chapter_glob)
assert len(found) == 1, 'found: {}'.format(len(found))
out_path = os.path.join(RST_PATH, 'glossary', chapter_id+'.txt')
glossary = chapter_glossaries[chapter_id]
print(out_path)
with open(out_path, 'wt', encoding='utf-8') as out_file:
for entry in sorted(glossary, key=operator.attrgetter('order')):
print('\t', entry)
definition = entry.br_definition.strip() or entry.us_definition.strip()
out_file.write('{}\n {}\n\n'.format(formatted_head(entry), definition))
print()
if __name__ == '__main__':
main()