forked from xdarov/PiscinePythonDataScience
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlinks.py
More file actions
80 lines (69 loc) · 2.82 KB
/
links.py
File metadata and controls
80 lines (69 loc) · 2.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import os, sys, urllib, requests, beautifulsoup, json, pytest, collections, functools, datetime, re
class Links:
"""
Analyzing data from links.csv
"""
def __init__(self, path_to_the_file):
"""
Put here any fields that you think you will need.
"""
self.path_to_the_file = path_to_the_file
def get_file_data(self):
result = []
try:
with open(self.path_to_the_file, 'r') as f:
for line in f:
yield line
except (OSError, ValueError) as e:
print(f"Error reading file - {e}")
return
@staticmethod
def get_imdb(list_of_movies, list_of_fields):
"""
The method returns a list of lists [movieId, field1, field2, field3, ...] for the list of movies given as the argument (movieId).
For example, [movieId, Director, Budget, Cumulative Worldwide Gross, Runtime].
The values should be parsed from the IMDB webpages of the movies.
Sort it by movieId descendingly.
"""
for line in list_of_movies:
line = line.strip()
if len(line) == 0:
continue
fields = line.split(',')
if len(fields) == 0:
continue
return imdb_info
def top_directors(self, n):
directors = self.get_imdb(self.get_file_data(), ['Director'])
"""
The method returns a dict with top-n directors where the keys are directors and
the values are numbers of movies created by them. Sort it by numbers descendingly.
"""
return directors
def most_expensive(self, n):
"""
The method returns a dict with top-n movies where the keys are movie titles and
the values are their budgets. Sort it by budgets descendingly.
"""
return budgets
def most_profitable(self, n):
"""
The method returns a dict with top-n movies where the keys are movie titles and
the values are the difference between cumulative worldwide gross and budget.
Sort it by the difference descendingly.
"""
return profits
def longest(self, n):
"""
The method returns a dict with top-n movies where the keys are movie titles and
the values are their runtime. If there are more than one version – choose any.
Sort it by runtime descendingly.
"""
return runtimes
def top_cost_per_minute(self, n):
"""
The method returns a dict with top-n movies where the keys are movie titles and
the values are the budgets divided by their runtime. The budgets can be in different currencies – do not pay attention to it.
The values should be rounded to 2 decimals. Sort it by the division descendingly.
"""
return costs