forked from bear/python-twitter
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path_file_cache.py
More file actions
161 lines (135 loc) · 5.46 KB
/
_file_cache.py
File metadata and controls
161 lines (135 loc) · 5.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/usr/bin/env python
import errno
import os
import re
import tempfile
from hashlib import md5
class _FileCacheError(Exception):
"""Base exception class for FileCache related errors"""
class _FileCache(object):
DEPTH = 3
def __init__(self, root_directory=None):
self._InitializeRootDirectory(root_directory)
def Get(self, key):
path = self._GetPath(key)
if os.path.exists(path):
with open(path) as f:
return f.read()
else:
return None
def Set(self, key, data):
path = self._GetPath(key)
directory = os.path.dirname(path)
if not os.path.exists(directory):
os.makedirs(directory)
if not os.path.isdir(directory):
raise _FileCacheError('%s exists but is not a directory' % directory)
temp_fd, temp_path = tempfile.mkstemp()
temp_fp = os.fdopen(temp_fd, 'w')
temp_fp.write(data)
temp_fp.close()
if not path.startswith(self._root_directory):
raise _FileCacheError('%s does not appear to live under %s' %
(path, self._root_directory))
if os.path.exists(path):
os.remove(path)
os.rename(temp_path, path)
def Remove(self, key):
path = self._GetPath(key)
if not path.startswith(self._root_directory):
raise _FileCacheError('%s does not appear to live under %s' %
(path, self._root_directory ))
if os.path.exists(path):
os.remove(path)
def GetCachedTime(self, key):
path = self._GetPath(key)
if os.path.exists(path):
return os.path.getmtime(path)
else:
return None
def _GetUsername(self):
"""Attempt to find the username in a cross-platform fashion."""
try:
return os.getenv('USER') or \
os.getenv('LOGNAME') or \
os.getenv('USERNAME') or \
os.getlogin() or \
'nobody'
except (AttributeError, IOError, OSError):
return 'nobody'
def _GetTmpCachePath(self):
username = self._GetUsername()
cache_directory = 'python.cache_' + username
return os.path.join(tempfile.gettempdir(), cache_directory)
def _InitializeRootDirectory(self, root_directory):
if not root_directory:
root_directory = self._GetTmpCachePath()
root_directory = os.path.abspath(root_directory)
try:
os.mkdir(root_directory)
except OSError as e:
if e.errno == errno.EEXIST and os.path.isdir(root_directory):
# directory already exists
pass
else:
# exists but is a file, or no permissions, or...
raise
self._root_directory = root_directory
def _GetPath(self, key):
try:
hashed_key = md5(key.encode('utf-8')).hexdigest()
except TypeError:
hashed_key = md5.new(key).hexdigest()
return os.path.join(self._root_directory,
self._GetPrefix(hashed_key),
hashed_key)
def _GetPrefix(self, hashed_key):
return os.path.sep.join(hashed_key[0:_FileCache.DEPTH])
class ParseTweet(object):
# compile once on import
regexp = {"RT": "^RT", "MT": r"^MT", "ALNUM": r"(@[a-zA-Z0-9_]+)",
"HASHTAG": r"(#[\w\d]+)", "URL": r"([http://]?[a-zA-Z\d\/]+[\.]+[a-zA-Z\d\/\.]+)"}
regexp = dict((key, re.compile(value)) for key, value in list(regexp.items()))
def __init__(self, timeline_owner, tweet):
""" timeline_owner : twitter handle of user account. tweet - 140 chars from feed; object does all computation on construction
properties:
RT, MT - boolean
URLs - list of URL
Hashtags - list of tags
"""
self.Owner = timeline_owner
self.tweet = tweet
self.UserHandles = ParseTweet.getUserHandles(tweet)
self.Hashtags = ParseTweet.getHashtags(tweet)
self.URLs = ParseTweet.getURLs(tweet)
self.RT = ParseTweet.getAttributeRT(tweet)
self.MT = ParseTweet.getAttributeMT(tweet)
# additional intelligence
if ( self.RT and len(self.UserHandles) > 0 ): # change the owner of tweet?
self.Owner = self.UserHandles[0]
return
def __str__(self):
""" for display method """
return "owner %s, urls: %d, hashtags %d, user_handles %d, len_tweet %d, RT = %s, MT = %s" % (
self.Owner, len(self.URLs), len(self.Hashtags), len(self.UserHandles),
len(self.tweet), self.RT, self.MT)
@staticmethod
def getAttributeRT(tweet):
""" see if tweet is a RT """
return re.search(ParseTweet.regexp["RT"], tweet.strip()) is not None
@staticmethod
def getAttributeMT(tweet):
""" see if tweet is a MT """
return re.search(ParseTweet.regexp["MT"], tweet.strip()) is not None
@staticmethod
def getUserHandles(tweet):
""" given a tweet we try and extract all user handles in order of occurrence"""
return re.findall(ParseTweet.regexp["ALNUM"], tweet)
@staticmethod
def getHashtags(tweet):
""" return all hashtags"""
return re.findall(ParseTweet.regexp["HASHTAG"], tweet)
@staticmethod
def getURLs(tweet):
""" URL : [http://]?[\w\.?/]+"""
return re.findall(ParseTweet.regexp["URL"], tweet)