X Tutup
Skip to content

Commit ee8e772

Browse files
committed
consolidated glossary: translated terms
1 parent a0a8d16 commit ee8e772

File tree

4 files changed

+87
-0
lines changed

4 files changed

+87
-0
lines changed

book/01-the-way.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,7 @@ useful for many activities beyond programming. At the end of each
372372
chapter there is a section, like this one, with my suggestions for
373373
debugging. I hope they help!
374374

375+
375376
Glossary
376377
--------
377378

tools/build_glossary.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import glob
2+
import re
3+
import os
4+
import collections
5+
6+
GLOSSARY = r'Gloss[^-]{3,7}-{8,15}\n(.*)'
7+
8+
GLOSSARY_RE = re.compile(GLOSSARY, re.DOTALL)
9+
10+
GLOSSARY_SECTION_RE = re.compile(GLOSSARY + r'-{8,15}', re.DOTALL)
11+
12+
expected_entries = [ # glossary entries per chapter
13+
('01', 21),
14+
('02', 19),
15+
('03', 22),
16+
('04', 11),
17+
('05', 15),
18+
('06', 5),
19+
('07', 8),
20+
('08', 12),
21+
('09', 3),
22+
('10', 14),
23+
('11', 20),
24+
('12', 8),
25+
('13', 6),
26+
('14', 14),
27+
('15', 9),
28+
('16', 7),
29+
('17', 9),
30+
('18', 13),
31+
('19', 5),
32+
( 'B', 11),
33+
]
34+
35+
expected_entries_dic = dict(expected_entries)
36+
37+
# \n(.*?)\n\n
38+
ENTRY_RE = re.compile(r'([^\n]+):\n[ ]+(.*?)\n\n', re.DOTALL)
39+
40+
GlossaryEntry = collections.namedtuple('GlossaryEntry', 'term definition')
41+
Definition = collections.namedtuple('Definition', 'chapter_id position text')
42+
43+
44+
def parse_entries(text, chapter_id):
45+
matches = ENTRY_RE.findall(text)
46+
entries = []
47+
for position, match in enumerate(matches, 1):
48+
term = match[0]
49+
definition_text = ' '.join(match[1].split())
50+
#print(term, '::', definition_text)
51+
entries.append(GlossaryEntry(term,
52+
Definition(chapter_id, position, definition_text)))
53+
return entries
54+
55+
56+
def scan_files(*paths):
57+
entries = collections.defaultdict(list)
58+
for path in paths:
59+
for name in glob.glob(os.path.join(path, '*.rst')):
60+
chapter_id = os.path.basename(name).split('-')[0]
61+
62+
with open(name, encoding='utf-8') as infile:
63+
rst = infile.read()
64+
gloss_match = (GLOSSARY_SECTION_RE.search(rst) or
65+
GLOSSARY_RE.search(rst))
66+
if gloss_match:
67+
#print('*' * 40, name)
68+
new_entries = parse_entries(gloss_match.group(1), chapter_id)
69+
for term, definition in new_entries:
70+
#if term in entries:
71+
# print('duplicate term:', term)
72+
entries[term].append(definition)
73+
#print(len(new_entries))
74+
assert expected_entries_dic[chapter_id] == len(new_entries), (
75+
chapter_id, expected_entries_dic[chapter_id], len(new_entries))
76+
for term in sorted(entries, key=str.upper):
77+
definitions = entries[term]
78+
for i, (chapter_id, position, definition) in enumerate(sorted(definitions)):
79+
if i:
80+
term = '\t'
81+
print(term, chapter_id, position, definition, sep='|')
82+
83+
84+
if __name__ == '__main__':
85+
import sys
86+
scan_files(*sys.argv[1:])

tools/chapter-plan.ods

1 KB
Binary file not shown.

tools/glossary.ods

66.4 KB
Binary file not shown.

0 commit comments

Comments
 (0)
X Tutup