forked from bear/python-twitter
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path_file_cache.py
More file actions
155 lines (130 loc) · 5.38 KB
/
_file_cache.py
File metadata and controls
155 lines (130 loc) · 5.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
from builtins import object
#!/usr/bin/env python
from hashlib import md5
import os
import re
import tempfile
class _FileCacheError(Exception):
"""Base exception class for FileCache related errors"""
class _FileCache(object):
DEPTH = 3
def __init__(self, root_directory=None):
self._InitializeRootDirectory(root_directory)
def Get(self, key):
path = self._GetPath(key)
if os.path.exists(path):
with open(path) as f:
return f.read()
else:
return None
def Set(self, key, data):
path = self._GetPath(key)
directory = os.path.dirname(path)
if not os.path.exists(directory):
os.makedirs(directory)
if not os.path.isdir(directory):
raise _FileCacheError('%s exists but is not a directory' % directory)
temp_fd, temp_path = tempfile.mkstemp()
temp_fp = os.fdopen(temp_fd, 'w')
temp_fp.write(data)
temp_fp.close()
if not path.startswith(self._root_directory):
raise _FileCacheError('%s does not appear to live under %s' %
(path, self._root_directory))
if os.path.exists(path):
os.remove(path)
os.rename(temp_path, path)
def Remove(self, key):
path = self._GetPath(key)
if not path.startswith(self._root_directory):
raise _FileCacheError('%s does not appear to live under %s' %
(path, self._root_directory ))
if os.path.exists(path):
os.remove(path)
def GetCachedTime(self, key):
path = self._GetPath(key)
if os.path.exists(path):
return os.path.getmtime(path)
else:
return None
def _GetUsername(self):
"""Attempt to find the username in a cross-platform fashion."""
try:
return os.getenv('USER') or \
os.getenv('LOGNAME') or \
os.getenv('USERNAME') or \
os.getlogin() or \
'nobody'
except (AttributeError, IOError, OSError) as e:
return 'nobody'
def _GetTmpCachePath(self):
username = self._GetUsername()
cache_directory = 'python.cache_' + username
return os.path.join(tempfile.gettempdir(), cache_directory)
def _InitializeRootDirectory(self, root_directory):
if not root_directory:
root_directory = self._GetTmpCachePath()
root_directory = os.path.abspath(root_directory)
if not os.path.exists(root_directory):
os.mkdir(root_directory)
if not os.path.isdir(root_directory):
raise _FileCacheError('%s exists but is not a directory' %
root_directory)
self._root_directory = root_directory
def _GetPath(self, key):
try:
hashed_key = md5(key.encode('utf-8')).hexdigest()
except TypeError:
hashed_key = md5.new(key).hexdigest()
return os.path.join(self._root_directory,
self._GetPrefix(hashed_key),
hashed_key)
def _GetPrefix(self, hashed_key):
return os.path.sep.join(hashed_key[0:_FileCache.DEPTH])
class ParseTweet(object):
# compile once on import
regexp = {"RT": "^RT", "MT": r"^MT", "ALNUM": r"(@[a-zA-Z0-9_]+)",
"HASHTAG": r"(#[\w\d]+)", "URL": r"([http://]?[a-zA-Z\d\/]+[\.]+[a-zA-Z\d\/\.]+)"}
regexp = dict((key, re.compile(value)) for key, value in list(regexp.items()))
def __init__(self, timeline_owner, tweet):
""" timeline_owner : twitter handle of user account. tweet - 140 chars from feed; object does all computation on construction
properties:
RT, MT - boolean
URLs - list of URL
Hashtags - list of tags
"""
self.Owner = timeline_owner
self.tweet = tweet
self.UserHandles = ParseTweet.getUserHandles(tweet)
self.Hashtags = ParseTweet.getHashtags(tweet)
self.URLs = ParseTweet.getURLs(tweet)
self.RT = ParseTweet.getAttributeRT(tweet)
self.MT = ParseTweet.getAttributeMT(tweet)
# additional intelligence
if ( self.RT and len(self.UserHandles) > 0 ): # change the owner of tweet?
self.Owner = self.UserHandles[0]
return
def __str__(self):
""" for display method """
return "owner %s, urls: %d, hashtags %d, user_handles %d, len_tweet %d, RT = %s, MT = %s" % (
self.Owner, len(self.URLs), len(self.Hashtags), len(self.UserHandles), len(self.tweet), self.RT, self.MT)
@staticmethod
def getAttributeRT(tweet):
""" see if tweet is a RT """
return re.search(ParseTweet.regexp["RT"], tweet.strip()) != None
@staticmethod
def getAttributeMT(tweet):
""" see if tweet is a MT """
return re.search(ParseTweet.regexp["MT"], tweet.strip()) != None
@staticmethod
def getUserHandles(tweet):
""" given a tweet we try and extract all user handles in order of occurrence"""
return re.findall(ParseTweet.regexp["ALNUM"], tweet)
@staticmethod
def getHashtags(tweet):
""" return all hashtags"""
return re.findall(ParseTweet.regexp["HASHTAG"], tweet)
@staticmethod
def getURLs(tweet):
""" URL : [http://]?[\w\.?/]+"""
return re.findall(ParseTweet.regexp["URL"], tweet)