-
Notifications
You must be signed in to change notification settings - Fork 38
Expand file tree
/
Copy pathsynthetic_data_1.py
More file actions
executable file
·48 lines (39 loc) · 1.54 KB
/
synthetic_data_1.py
File metadata and controls
executable file
·48 lines (39 loc) · 1.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#!/usr/bin/env python3
"""
Generate a synthetic ride-sharing CSV with 10 000 rows
Columns: source, destination, fare_usd, distance_km, driver_gender, ride_type, customer_name
"""
import csv
import random
from faker import Faker
fake = Faker()
random.seed(42)
Faker.seed(42)
# ---------- Configurable parameters ----------
NUM_ROWS = 10_000
OUTFILE = "rides.csv"
CITIES = ["New York", "Chicago", "San Francisco", "Boston",
"Los Angeles", "Seattle", "Austin", "Denver",
"Miami", "Atlanta", "Houston", "Phoenix"]
GENDERS = ["Male", "Female"]
RIDE_TYPES = ["shared", "individual"]
# ---------- CSV generation ----------
with open(OUTFILE, mode="w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(["source_city",
"destination_city",
"fare_usd",
"distance_km",
"driver_gender",
"ride_type",
"customer_name"])
for _ in range(NUM_ROWS):
# Ensure source != destination
source, destination = random.sample(CITIES, 2)
fare = round(random.uniform(5, 120), 2) # $5.00–$120.00
distance = round(random.uniform(1, 60), 2) # 1–60 km
gender = random.choice(GENDERS)
ride_t = random.choice(RIDE_TYPES)
cust = fake.name()
writer.writerow([source, destination, fare, distance, gender, ride_t, cust])
print(f"✅ Generated {NUM_ROWS} rows into {OUTFILE}")