forked from csev/py4e
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutil.py
More file actions
69 lines (57 loc) · 1.98 KB
/
util.py
File metadata and controls
69 lines (57 loc) · 1.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# https://pypi.org/project/python-youtube/
import math
import sys
import re
import hashlib
# 1.89 -> 00:00:01,890
def time2str(ticks) :
frac = int( ticks * 1000 ) % 1000
ticks = int(math.floor(ticks))
hh = int(math.floor(ticks / (60*60)))
ticks = ticks - (hh*60*60)
mm = int(math.floor(ticks / 60))
ticks = ticks - mm * 60
return f"{hh:02}:{mm:02}:{ticks:02},{frac:03}"
# {'text': 'Hello everybody and welcome to chapter', 'start': 0.0, 'duration': 1.89}
# {'text': "one of Python for Everybody. I'm Charles", 'start': 1.89, 'duration': 1.92}
# 1
# 00:00:00,000 --> 00:00:01,890
# Hello everybody and welcome to chapter
#
# 2
# 00:00:01,890 --> 00:00:03,810
# one of Python for Everybody. I'm Charles
def caption2srt(captions) :
retval = ''
for i in range(len(captions)):
caption = captions[i]
text = caption["text"]
start = caption["start"]
duration = caption["duration"]
if i < len(captions)-1 :
end = captions[i+1]["start"]
else :
end = start + duration
retval = retval + str(i+1) + "\n"
retval = retval + time2str(start)+' --> '+time2str(end) + "\n"
retval = retval + text + "\n"
retval = retval + "\n"
return retval
# https://stackoverflow.com/questions/92438/stripping-non-printable-characters-from-a-string-in-python
# build a table mapping all non-printable characters to None
def make_printable(a_string):
"""Replace non-printable characters in a string."""
filtered_characters = list(s for s in a_string if s.isprintable())
filtered_string = ''.join(filtered_characters)
return filtered_string
def hash_srt(srt) :
srt = make_printable(srt)
srt = srt.replace(' ', '')
srt = re.sub(r':[0-9][0-9],[0-9]+', '', srt)
hval = hashlib.md5(srt.encode()).hexdigest()
return hval
def get_videoid(f) :
pieces = f.replace('.srt', '').split()
if len(pieces) < 2 : return ''
videoId = pieces[len(pieces)-1]
return videoId