PensePython2/tools/split_glossary.py at master · PensePython/PensePython2

History

executable file

117 lines (90 loc) · 3.69 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

#!/usr/bin/env python3

"""

Generate consolidated RST glossary file and chapter include files from

the CSV glossary files.

"""

import glob

import os

import csv

import collections

import operator

import unicodedata

import string

from join_glossary import expected_entries

RST_PATH = '../book/'

GLOSSARY_DATA_PATH = '../data'

FIELDS = 'term us_term br_term chapter order us_definition br_definition'.split()

GLOSSARY_HEAD = '''\

Glossário Consolidado

=====================

Este glossário é a união de todos os glossários dos capítulos.

Cada entrada está vinculada ao capítulo onde ela aparece, por exemplo:

*bug* :ref:`[1] <glossary01>`.

Note que alguns termos aparecem em mais de um capítulo, às vezes

com definições diferentes, de acordo com o contexto.

\n\n'''

GlossaryEntry = collections.namedtuple('GlossaryEntry', FIELDS)

def asciize(txt):

"""Return only ASCII letters from text"""

return ''.join(c for c in unicodedata.normalize('NFD', txt)

if c in string.ascii_lowercase)

def master_order(entry):

return asciize(entry.term.casefold()) + '|' + entry.chapter

def read_glossary():

master_glossary = []

glossaries = collections.defaultdict(list)

paths = glob.glob(os.path.join(GLOSSARY_DATA_PATH, '*.csv'))

for nome_arq in paths:

with open(nome_arq) as csvfile:

reader = csv.DictReader(csvfile, FIELDS)

next(reader) # skip header line

for row in reader:

row['order'] = int(row['order'])

import pdb; pdb.set_trace()

entry = GlossaryEntry(**row)

#print(entry)

glossaries[row['chapter']].append(entry)

master_glossary.append(entry)

return master_glossary, glossaries

def formatted_head(entry):

us_term = entry.us_term

if '``' in us_term:

if us_term != '``None``':

symbol, noun = us_term.split()

us_term = '{} *{}*'.format(symbol, noun)

else:

us_term = '*{}*'.format(us_term)

if entry.br_term == '-': # no BR term

head = us_term

elif entry.term != entry.us_term: # adopted BR term

head = '{} ({})'.format(entry.term, us_term)

else: # adopted US term

head = '{} ({})'.format(us_term, entry.br_term)

return head

def main():

master_glossary, chapter_glossaries = read_glossary()

link_fmt = ':ref:`[1] <glossary01>`'

out_path = os.path.join(RST_PATH, 'C-glossary.rst')

with open(out_path, 'wt', encoding='utf-8') as out_file:

out_file.write(GLOSSARY_HEAD)

for entry in sorted(master_glossary, key=master_order):

short_chapter = entry.chapter.lstrip('0')

definition = entry.br_definition.strip() or entry.us_definition.strip()

out_file.write('{} :ref:`[{}] <glossary{}>`\n {}\n\n'

.format(formatted_head(entry), short_chapter,

entry.chapter, definition))

for chapter_id, entries_qty in expected_entries:

chapter_glob = os.path.join(RST_PATH, chapter_id+'*.rst')

found = glob.glob(chapter_glob)

assert len(found) == 1, 'found: {}'.format(len(found))

out_path = os.path.join(RST_PATH, 'glossary', chapter_id+'.txt')

glossary = chapter_glossaries[chapter_id]

print(out_path)

with open(out_path, 'wt', encoding='utf-8') as out_file:

for entry in sorted(glossary, key=operator.attrgetter('order')):

print('\t', entry)

definition = entry.br_definition.strip() or entry.us_definition.strip()

out_file.write('{}\n {}\n\n'.format(formatted_head(entry), definition))

print()

if __name__ == '__main__':

main()

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

split_glossary.py

Latest commit

History

split_glossary.py

File metadata and controls