CREATE TABLE fiscal_year_2021_2022."Oct_2021"
(ride_id VARCHAR,
rideable_type VARCHAR,
started_at timestamp without time zone,
ended_at timestamp without time zone,
start_station_name VARCHAR,
start_station_id VARCHAR,
end_station_name VARCHAR,
end_station_id VARCHAR,
start_lat double precision,
start_lng double precision,
end_lat double precision,
end_lng double precision,
member_casual VARCHAR);
SELECT * FROM fiscal_year_2021_2022."Oct_2021"
LIMIT 100
SELECT
DISTINCT rideable_type
FROM fiscal_year_2021_2022."Oct_2021";
INSERT INTO fiscal_year_2021_2022."1Q"
SELECT * FROM fiscal_year_2021_2022."Oct_2021";
INSERT INTO fiscal_year_2021_2022."1Q"
SELECT * FROM fiscal_year_2021_2022."Nov_2021";
INSERT INTO fiscal_year_2021_2022."1Q"
SELECT * FROM fiscal_year_2021_2022."Dec_2021"
ON CONFLICT DO NOTHING;
CREATE TABLE fiscal_year_2021_2022."total_rides" AS
(SELECT * FROM fiscal_year_2021_2022."1Q"
UNION ALL
SELECT * FROM fiscal_year_2021_2022."2Q"
UNION ALL
SELECT * FROM fiscal_year_2021_2022."3Q"
UNION ALL
SELECT * FROM fiscal_year_2021_2022."4Q");
Checked for NULL values
Example:
SELECT ride_id, rideable_type, started_at, ended_at, start_station_name, start_station_id, end_station_name, end_station_id, start_lat, start_lng, end_lat, end_lng, member_casual
FROM fiscal_year_2021_2022.total_rides
WHERE member_casual IS NULL;
ride_id IS NULL
rideable_type IS NULL
started_at IS NULL
ended_at IS NULL
start_station_name = 895032
start_station_id = 895032
end_station_name = 958227
end_station_id = 958227
start_lat IS NULL
start_lng IS NULL
end_lat is 5844
end_lng is 5844
member_casual IS NULLCREATE TABLE fiscal_year_2021_2022."total_rides_V2" AS
(SELECT *
FROM fiscal_year_2021_2022."total_rides"
WHERE start_station_name NOT LIKE '%NULL%'
AND start_station_id NOT LIKE '%NULL%'
AND end_station_name NOT LIKE '%NULL%'
AND end_station_id NOT LIKE '%NULL%'
AND end_lat NOT LIKE '%NULL%'
AND end_lng NOT LIKE '%NULL%');
SELECT ride_id FROM fiscal_year_2021_2022."total_rides_V2"
Checked for leading or trailing spaces from start_station_name and end_station_name
Example:
SELECT *
FROM fiscal_year_2021_2022."total_rides_V2"
WHERE start_station_name LIKE ' %' or start_station_name LIKE '% ';
UPDATE fiscal_year_2021_2022."total_rides_V2"
SET start_station_name = TRIM(start_station_name);
SELECT LENGTH(ride_id)
FROM fiscal_year_2021_2022."total_rides_V2";
SELECT ride_id
FROM fiscal_year_2021_2022."total_rides_V2"
WHERE LENGTH(ride_id) <> 16
CREATE TABLE fiscal_year_2021_2022."total_rides_V3" AS
(SELECT *
FROM fiscal_year_2021_2022."total_rides_V2"
WHERE start_station_name NOT LIKE '%Base - 2132 W Hubbard%'
AND start_station_name NOT LIKE '%Base - 2132 W Hubbard Warehouse%'
AND start_station_name NOT LIKE '%Hastings WH 2%'
AND start_station_name NOT LIKE '%DIVVY CASSETTE REPAIR MOBILE STATION%'
AND start_station_name NOT LIKE '%Throop/Hastings Mobile Station%'
AND start_station_name NOT LIKE '%Bissell St & Armitage Ave - Charging%'
AND start_station_name NOT LIKE '%Lincoln Ave & Roscoe St - Charging%'
AND start_station_name NOT LIKE '%Pawel Bialowas - Test- PBSC charging station%'
AND start_station_name NOT LIKE '%Wilton Ave & Diversey Pkwy - Charging%');
UPDATE fiscal_year_2021_2022."total_rides_V3"
SET end_station_name =
WHERE end_station_name NOT LIKE '%Base - 2132 W Hubbard%'
AND end_station_name NOT LIKE '%Base - 2132 W Hubbard Warehouse%'
AND end_station_name NOT LIKE '%Hastings WH 2%'
AND end_station_name NOT LIKE '%DIVVY CASSETTE REPAIR MOBILE STATION%'
AND end_station_name NOT LIKE '%Throop/Hastings Mobile Station%'
AND end_station_name NOT LIKE '%Bissell St & Armitage Ave - Charging%'
AND end_station_name NOT LIKE '%Lincoln Ave & Roscoe St - Charging%'
AND end_station_name NOT LIKE '%Pawel Bialowas - Test- PBSC charging station%'
AND end_station_name NOT LIKE '%Wilton Ave & Diversey Pkwy - Charging%';
SELECT COUNT(*) AS num_of_rows_to_delete
FROM fiscal_year_2021_2022."total_rides_V4"
WHERE start_station_name LIKE '%Temp%';
Example:
ALTER TABLE fiscal_year_2021_2022."total_rides_V3"
RENAME COLUMN ride_id to trip_id
ALTER TABLE fiscal_year_2021_2022."total_rides_V4" ADD COLUMN start_date date;
ALTER TABLE fiscal_year_2021_2022."total_rides_V4" ADD COLUMN start_time time;
UPDATE fiscal_year_2021_2022."total_rides_V4"
SET start_date = started_at :: date,
start_time = started_at :: time;
ALTER TABLE fiscal_year_2021_2022."total_rides_V5" ADD COLUMN month VARCHAR;
UPDATE fiscal_year_2021_2022."total_rides_V4"
SET month = TO_CHAR (start_date, 'Month');
ALTER TABLE fiscal_year_2021_2022."total_rides_V4" ADD COLUMN day VARCHAR;
UPDATE fiscal_year_2021_2022."total_rides_V4"
SET day = TO_CHAR (start_date, 'Day');
ALTER TABLE fiscal_year_2021_2022."total_rides_V4" ADD COLUMN trip_duration INTEGER;
UPDATE fiscal_year_2021_2022."total_rides_V4"
SET trip_duration = EXTRACT(EPOCH FROM (ended_at - started_at));
SELECT
MIN(trip_duration),
MAX(trip_duration)
FROM fiscal_year_2021_2022."total_rides_V4";
SELECT *
FROM fiscal_year_2021_2022."total_rides_V4"
WHERE trip_duration <= 60 or trip_duration >= 86400;
CREATE TABLE fiscal_year_2021_2022."total_rides_CLEAN" AS
SELECT *
FROM fiscal_year_2021_2022."total_rides_V4"
WHERE trip_duration > 60 AND trip_duration < 86400
ORDER BY trip_duration DESC;
SELECT user_type, COUNT(*) AS number_of_rides
FROM fiscal_year_2021_2022."total_rides_CLEAN"
GROUP BY user_type
ORDER BY user_type DESC
user_type total
"member" 2618743
"casual" 1779144SELECT user_type,
COUNT(user_type) AS total,
ROUND (COUNT(user_type) * 100.0 /
(SELECT COUNT(*)
FROM fiscal_year_2021_2022."total_rides_CLEAN")) AS percent
FROM fiscal_year_2021_2022."total_rides_CLEAN"
GROUP BY user_type
user_type total percent
"casual" 1779144 40
"member" 2618743 60SELECT month, user_type,
COUNT(*) AS total,
ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*)
FROM fiscal_year_2021_2022."total_rides_CLEAN")) AS percent
FROM fiscal_year_2021_2022."total_rides_CLEAN"
GROUP BY month, user_type
ORDER BY month
Example:
"month" "user_type" "total" "percent"
"April" "casual" 90747 2
"April" "member" 177666 4
"August" "casual" 265563 6
"August" "member" 328365 7
"December" "casual" 44644 1
"December" "member" 129282 3SELECT month, user_type,
COUNT(*) AS total,
ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*)
FROM fiscal_year_2021_2022."total_rides_CLEAN")) AS percent
FROM fiscal_year_2021_2022."total_rides_CLEAN"
GROUP BY month, user_type
ORDER BY CASE WHEN month='January' THEN 1
WHEN month='February' THEN 2
WHEN month='March' THEN 3
WHEN month='April' THEN 4
WHEN month='May' THEN 5
WHEN month='June' THEN 6
WHEN month='July' THEN 7
WHEN month='August' THEN 8
WHEN month='September' THEN 9
WHEN month='October' THEN 10
WHEN month='November' THEN 11
ELSE 12
END;
SELECT LENGTH (month)
FROM fiscal_year_2021_2022."total_rides_CLEAN"
GROUP BY month;
UPDATE fiscal_year_2021_2022."total_rides_CLEAN"
SET month = TRIM(month), day = TRIM (day);
"length"
5
6
8
8
7
4
4
5
3
8
7
9
"month" "user_type" "total" "percent"
"January" "member" 66554 2
"January" "casual" 12461 0
"February" "casual" 14950 0
"February" "member" 72656 2
"March" "casual" 66329 2
"March" "member" 146390 3
"April" "casual" 90747 2
"April" "member" 177666 4
"May" "casual" 216860 5
"May" "member" 277063 6
"June" "casual" 287406 7
"June" "member" 322107 7
"July" "casual" 306378 7
"July" "member" 324096 7
"August" "member" 328365 7
"August" "casual" 265563 6
"September" "member" 307658 7
"September" "casual" 217375 5
"October" "casual" 187206 4
"October" "member" 284038 6
"November" "member" 182868 4
"November" "casual" 69225 2
"December" "member" 129282 3
"December" "casual" 44644 1
SELECT day, user_type,
COUNT(*) AS total,
ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*)
FROM fiscal_year_2021_2022."total_rides_CLEAN")) AS percent
FROM fiscal_year_2021_2022."total_rides_CLEAN"
GROUP BY day, user_type
ORDER BY CASE WHEN day = 'Sunday' THEN 1
WHEN day = 'Monday' THEN 2
WHEN day = 'Tuesday' THEN 3
WHEN day = 'Wednesday' THEN 4
WHEN day = 'Thursday' THEN 5
WHEN day = 'Friday' THEN 6
ELSE 7
END;
"day" "user_type" "total" "percent"
"Sunday" "casual" 309192 7
"Sunday" "member" 296518 7
"Monday" "casual" 206822 5
"Monday" "member" 366622 8
"Tuesday" "member" 420848 10
"Tuesday" "casual" 199917 5
"Wednesday" "member" 415557 9
"Wednesday" "casual" 203274 5
"Thursday" "casual" 221681 5
"Thursday" "member" 404778 9
"Friday" "member" 370376 8
"Friday" "casual" 255838 6
"Saturday" "casual" 382420 9
"Saturday" "member" 344044 8
SELECT ROUND(AVG(trip_duration/60)) AS average_ride_duration
FROM fiscal_year_2021_2022."total_rides_CLEAN"
SELECT user_type,
ROUND(AVG(trip_duration/60)) AS average_ride_duration,
MIN(trip_duration/60) AS MIN_ride_duration,
MAX(trip_duration/60) AS MAX_ride_duration
FROM fiscal_year_2021_2022."total_rides_CLEAN"
GROUP BY user_type
"user_type" "average_ride_duration" "min_ride_duration" "max_ride_duration"
"casual" 24 1 1439
"member" 12 1 1435
SELECT month, user_type,
COUNT(*) AS total,
ROUND(AVG(trip_duration/60)) AS average_ride_duration
FROM fiscal_year_2021_2022."total_rides_CLEAN"
GROUP BY month, user_type
ORDER BY CASE WHEN month='January' THEN 1
WHEN month='February' THEN 2
WHEN month='March' THEN 3
WHEN month='April' THEN 4
WHEN month='May' THEN 5
WHEN month='June' THEN 6
WHEN month='July' THEN 7
WHEN month='August' THEN 8
WHEN month='September' THEN 9
WHEN month='October' THEN 10
WHEN month='November' THEN 11
ELSE 12
END;
"month" "user_type" "total" "average_ride_duration"
"January" "member" 66554 10
"January" "casual" 12461 18
"February" "casual" 14950 21
"February" "member" 72656 10
"March" "casual" 66329 26
"March" "member" 146390 11
"April" "casual" 90747 25
"April" "member" 177666 11
"May" "casual" 216860 27
"May" "member" 277063 13
"June" "casual" 287406 25
"June" "member" 322107 13
"July" "casual" 306378 25
"July" "member" 324096 13
"August" "member" 328365 13
"August" "casual" 265563 23
"September" "member" 307658 12
"September" "casual" 217375 22
"October" "casual" 187206 24
"October" "member" 284038 12
"November" "member" 182868 11
"November" "casual" 69225 20
"December" "member" 129282 10
"December" "casual" 44644 20
SELECT day, user_type,
COUNT(*) AS total,
ROUND(AVG(trip_duration/60)) AS average_ride_duration
FROM fiscal_year_2021_2022."total_rides_CLEAN"
GROUP BY day, user_type
ORDER BY CASE WHEN day = 'Sunday' THEN 1
WHEN day = 'Monday' THEN 2
WHEN day = 'Tuesday' THEN 3
WHEN day = 'Wednesday' THEN 4
WHEN day = 'Thursday' THEN 5
WHEN day = 'Friday' THEN 6
ELSE 7
END;
"day" "user_type" "total" "average_ride_duration"
"Sunday" "casual" 309192 28
"Sunday" "member" 296518 14
"Monday" "casual" 206822 25
"Monday" "member" 366622 12
"Tuesday" "member" 420848 12
"Tuesday" "casual" 199917 22
"Wednesday" "member" 415557 12
"Wednesday" "casual" 203274 21
"Thursday" "casual" 221681 21
"Thursday" "member" 404778 12
"Friday" "member" 370376 12
"Friday" "casual" 255838 22
"Saturday" "casual" 382420 27
"Saturday" "member" 344044 14
SELECT bike_type,
COUNT(bike_type) AS total_rides,
ROUND(AVG(trip_duration/60)) AS average_ride_duration,
ROUND (COUNT(bike_type) * 100.0 /
(SELECT COUNT(*)
FROM fiscal_year_2021_2022."total_rides_CLEAN")) AS percent
FROM fiscal_year_2021_2022."total_rides_CLEAN"
GROUP BY bike_type
"bike_type" "total_rides" "average_ride_duration" "percent"
"classic_bike" 2695565 17 61
"docked_bike" 188124 48 4
"electric_bike" 1514198 14 34
SELECT user_type, bike_type,
COUNT(user_type) AS total,
ROUND(AVG(trip_duration/60)) AS average_ride_duration,
ROUND(COUNT(user_type) * 100.0 / (SELECT COUNT(*) FROM fiscal_year_2021_2022."total_rides_CLEAN")) AS percent
FROM fiscal_year_2021_2022."total_rides_CLEAN"
GROUP BY user_type, bike_type
"user_type" "bike_type" "total" "average_ride_duration" "percent"
"casual" "classic_bike" 925549 24 21
"casual" "docked_bike" 188124 48 4
"casual" "electric_bike" 665471 17 15
"member" "classic_bike" 1770016 13 40
"member" "electric_bike" 848727 11 19
SELECT user_type, COUNT (*) AS number_of_round_trips
FROM fiscal_year_2021_2022."total_rides_CLEAN"
WHERE start_station_id = end_station_id
GROUP BY user_type
"user_type" "number_of_round_trips"
"casual" 154171
"member" 72341
SELECT start_station_id AS most_used_station,
COUNT(*) AS num_trips,
ROUND(AVG(trip_duration)/60) AS duration_in_mins
FROM fiscal_year_2021_2022."total_rides_CLEAN"
GROUP BY
start_station_id
ORDER BY
num_trips DESC
LIMIT 6
"most_used_station" "num_trips" "duration_in_mins "
"13022" 70719 33
"13300" 39310 34
"LF-005" 37616 25
"13042" 37111 31
"TA1308000050" 35990 14
"13008" 33958 35
SELECT start_station_id, start_station_name
FROM fiscal_year_2021_2022."total_rides_CLEAN"
WHERE start_station_id IN ('13022', '13300', 'LF-005', '13042', 'TA1308000050', '13008')
GROUP BY start_station_id, start_station_name;
"start_station_id" "start_station_name"
"13008" "Millennium Park"
"13022" "Streeter Dr & Grand Ave"
"13042" "Michigan Ave & Oak St"
"13300" "DuSable Lake Shore Dr & Monroe St"
"LF-005" "DuSable Lake Shore Dr & North Blvd"
"TA1308000050" "Wells St & Concord Ln"
SELECT user_type, start_station_id AS most_used_station, start_station_name,
COUNT(start_station_id) AS num_trips,
ROUND(AVG(trip_duration)/60) AS duration_in_mins
FROM fiscal_year_2021_2022."total_rides_CLEAN"
WHERE user_type = 'casual'
GROUP BY
user_type, start_station_id, start_station_name
ORDER BY
num_trips DESC
LIMIT 6
"user_type" "most_used_station" "start_station_name" "num_trips" "duration_in_mins"
"casual" "13022" "Streeter Dr & Grand Ave" 54792 37
"casual" "13300" "DuSable Lake Shore Dr & Monroe St" 30270 37
"casual" "13008" "Millennium Park" 25080 41
"casual" "13042" "Michigan Ave & Oak St" 23659 37
"casual" "LF-005" "DuSable Lake Shore Dr & North Blvd" 22130 30
"casual" "15544" "Shedd Aquarium" 19293 31
SELECT user_type, start_station_id AS most_used_station, start_station_name,
COUNT(start_station_id) AS num_trips,
ROUND(AVG(trip_duration)/60) AS duration_in_mins
FROM fiscal_year_2021_2022."total_rides_CLEAN"
WHERE user_type = 'member'
GROUP BY
user_type, start_station_id, start_station_name
ORDER BY
num_trips DESC
LIMIT 6
"user_type" "most_used_station" "start_station_name" "num_trips" "duration_in_mins"
"member" "KA1503000043" "Kingsbury St & Kinzie St" 24567 9
"member" "TA1307000039" "Clark St & Elm St" 21451 12
"member" "TA1308000050" "Wells St & Concord Ln" 20645 12
"member" "WL-012" "Clinton St & Washington Blvd" 18654 11
"member" "TA1305000032" "Clinton St & Madison St" 18483 11
"member" "KA1504000135" "Wells St & Elm St" 18242 11
SELECT start_station_id AS most_used_station, start_station_name,
COUNT(start_station_id) AS num_trips,
ROUND(AVG(trip_duration)/60) AS duration_in_mins
FROM fiscal_year_2021_2022."total_rides_CLEAN"
GROUP BY
start_station_id, start_station_name
ORDER BY
num_trips ASC
LIMIT 6
"most_used_station" "start_station_name" "num_trips" "duration_in_mins"
"1032" "Public Rack - Kedvale Ave & 63rd St" 1 5
"1033" "Public Rack - Pulaski Rd & 65th St" 1 11
"1018" "Public Rack - Kostner Ave & Wrightwood Ave" 1 102
"1030" "Public Rack - Lawndale & 63rd St" 1 37
"1015" "Public Rack - Peterson Ave & Drake Ave" 1 25
"1034" "Public Rack - Kenneth Ave & 63rd St E" 1 8
"user_type" "most_used_station" "start_station_name" "num_trips" "duration_in_mins"
"casual" "1036" "Public Rack - Kedzie Ave & 60th St" 1 3
"casual" "1038" "Public Rack - Kedzie Ave & 62nd Pl" 1 6
"casual" "1032" "Public Rack - Kedvale Ave & 63rd St" 1 5
"casual" "1030" "Public Rack - Lawndale & 63rd St" 1 37
"casual" "1018" "Public Rack - Kostner Ave & Wrightwood Ave" 1 102
"casual" "1040" "Public Rack - Talman Ave & Pershing Rd" 1 40
"user_type" "most_used_station" "start_station_name" "num_trips" "duration_in_mins"
"member" "1034" "Public Rack - Kenneth Ave & 63rd St E" 1 8
"member" "1036" "Public Rack - Kedzie Ave & 60th St" 1 6
"member" "1033" "Public Rack - Pulaski Rd & 65th St" 1 11
"member" "1015" "Public Rack - Peterson Ave & Drake Ave" 1 25
"member" "1016" "Public Rack - Peterson Ave & Bernard Ave" 1 9
"member" "1039" "Public Rack - Kedzie Ave & 61st Pl W" 1 4