forked from chris1610/pbpython
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpanda_gform.py
More file actions
55 lines (51 loc) · 2.56 KB
/
panda_gform.py
File metadata and controls
55 lines (51 loc) · 2.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
"""
See http://pbpython.com/pandas-google-forms-part1.html for more details
and explanation of how to create the SECRETS_FILE
Purpose of this example is to pull google sheet data into
a pandas DataFrame.
"""
from __future__ import print_function
import gspread
from oauth2client.client import SignedJwtAssertionCredentials
import pandas as pd
import json
SCOPE = ["https://spreadsheets.google.com/feeds"]
SECRETS_FILE = "Pbpython-key.json"
SPREADSHEET = "PBPython User Survey (Responses)"
# Based on docs here - http://gspread.readthedocs.org/en/latest/oauth2.html
# Load in the secret JSON key (must be a service account)
json_key = json.load(open(SECRETS_FILE))
# Authenticate using the signed key
credentials = SignedJwtAssertionCredentials(json_key['client_email'],
json_key['private_key'], SCOPE)
gc = gspread.authorize(credentials)
print("The following sheets are available")
for sheet in gc.openall():
print("{} - {}".format(sheet.title, sheet.id))
# Open up the workbook based on the spreadsheet name
workbook = gc.open(SPREADSHEET)
# Get the first sheet
sheet = workbook.sheet1
# Extract all data into a dataframe
data = pd.DataFrame(sheet.get_all_records())
# Do some minor cleanups on the data
# Rename the columns to make it easier to manipulate
# The data comes in through a dictionary so we can not assume order stays the
# same so must name each column
column_names = {'Timestamp': 'timestamp',
'What version of python would you like to see used for the examples on the site?': 'version',
'How useful is the content on practical business python?': 'useful',
'What suggestions do you have for future content?': 'suggestions',
'How frequently do you use the following tools? [Python]': 'freq-py',
'How frequently do you use the following tools? [SQL]': 'freq-sql',
'How frequently do you use the following tools? [R]': 'freq-r',
'How frequently do you use the following tools? [Javascript]': 'freq-js',
'How frequently do you use the following tools? [VBA]': 'freq-vba',
'How frequently do you use the following tools? [Ruby]': 'freq-ruby',
'Which OS do you use most frequently?': 'os',
'Which python distribution do you primarily use?': 'distro',
'How would you like to be notified about new articles on this site?': 'notify'
}
data.rename(columns=column_names, inplace=True)
data.timestamp = pd.to_datetime(data.timestamp)
print(data.head())