Cyclistic

Instalo la libreria que se necesita

library(tidyverse)
library(lubridate) library(ggplot2)

Codigo para subir la base de datos a R #

d3 <- read.csv(“202403-divvy-tripdata.csv”,header = TRUE,sep = “,”) d4 <- read.csv(“202404-divvy-tripdata.csv”,header = TRUE,sep = “,”) d5 <- read.csv(“202405-divvy-tripdata.csv”,header = TRUE,sep = “,”) d6 <- read.csv(“202406-divvy-tripdata.csv”,header = TRUE,sep = “,”) d7 <- read.csv(“202407-divvy-tripdata.csv”,header = TRUE,sep = “,”) d8 <- read.csv(“202408-divvy-tripdata.csv”,header = TRUE,sep = “,”) d9 <- read.csv(“202409-divvy-tripdata.csv”,header = TRUE,sep = “,”) d10 <- read.csv(“202410-divvy-tripdata.csv”,header = TRUE,sep = “,”) d11 <- read.csv(“202411-divvy-tripdata.csv”,header = TRUE,sep = “,”) d12 <- read.csv(“202412-divvy-tripdata.csv”,header = TRUE,sep = “,”) d1 <- read.csv(“202501-divvy-tripdata.csv”,header = TRUE,sep = “,”) d2 <- read.csv(“202502-divvy-tripdata.csv”,header = TRUE,sep = “,”) total_datos <- rbind(d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,d11,d12) view(total_datos) # #

Reviso que los títulos de las columnas sean iguales

colnames(d4) [1] “ride_id” “rideable_type” “started_at” “ended_at” “start_station_name” “start_station_id” “end_station_name” “end_station_id” “start_lat” “start_lng”
[11] “end_lat” “end_lng” “member_casual”
colnames(d3) [1] “ride_id” “rideable_type” “started_at” “ended_at” “start_station_name” “start_station_id” “end_station_name” “end_station_id” “start_lat” “start_lng”
[11] “end_lat” “end_lng” “member_casual”
colnames(d5) [1] “ride_id” “rideable_type” “started_at” “ended_at” “start_station_name” “start_station_id” “end_station_name” “end_station_id” “start_lat” “start_lng”
[11] “end_lat” “end_lng” “member_casual”
colnames(d6) [1] “ride_id” “rideable_type” “started_at” “ended_at” “start_station_name” “start_station_id” “end_station_name” “end_station_id” “start_lat” “start_lng”
[11] “end_lat” “end_lng” “member_casual”
colnames(d7) [1] “ride_id” “rideable_type” “started_at” “ended_at” “start_station_name” “start_station_id” “end_station_name” “end_station_id” “start_lat” “start_lng”
[11] “end_lat” “end_lng” “member_casual”
colnames(d8) [1] “ride_id” “rideable_type” “started_at” “ended_at” “start_station_name” “start_station_id” “end_station_name” “end_station_id” “start_lat” “start_lng”
[11] “end_lat” “end_lng” “member_casual”
colnames(d9) [1] “ride_id” “rideable_type” “started_at” “ended_at” “start_station_name” “start_station_id” “end_station_name” “end_station_id” “start_lat” “start_lng”
[11] “end_lat” “end_lng” “member_casual”
colnames(d10) [1] “ride_id” “rideable_type” “started_at” “ended_at” “start_station_name” “start_station_id” “end_station_name” “end_station_id” “start_lat” “start_lng”
[11] “end_lat” “end_lng” “member_casual”
colnames(d11) [1] “ride_id” “rideable_type” “started_at” “ended_at” “start_station_name” “start_station_id” “end_station_name” “end_station_id” “start_lat” “start_lng”
[11] “end_lat” “end_lng” “member_casual”
colnames(d12) [1] “ride_id” “rideable_type” “started_at” “ended_at” “start_station_name” “start_station_id” “end_station_name” “end_station_id” “start_lat” “start_lng”
[11] “end_lat” “end_lng” “member_casual”
# # Reviso estructura d elos datos # str(d1) ‘data.frame’: 138689 obs. of 13 variables: $ ride_id : chr “7569BC890583FCD7” “013609308856B7FC” “EACACD3CE0607C0D” “EAA2485BA64710D3” … $ rideable_type : chr “classic_bike” “electric_bike” “classic_bike” “classic_bike” … $ started_at : chr “2025-01-21 17:23:54.538” “2025-01-11 15:44:06.795” “2025-01-02 15:16:27.730” “2025-01-23 08:49:05.814” … $ ended_at : chr “2025-01-21 17:37:52.015” “2025-01-11 15:49:11.139” “2025-01-02 15:28:03.230” “2025-01-23 08:52:40.047” … $ start_station_name: chr “Wacker Dr & Washington St” “Halsted St & Wrightwood Ave” “Southport Ave & Waveland Ave” “Southport Ave & Waveland Ave” … $ start_station_id : chr “KA1503000072” “TA1309000061” “13235” “13235” … $ end_station_name : chr “McClurg Ct & Ohio St” “Racine Ave & Belmont Ave” “Broadway & Cornelia Ave” “Southport Ave & Roscoe St” … $ end_station_id : chr “TA1306000029” “TA1308000019” “13278” “13071” … $ start_lat : num 41.9 41.9 41.9 41.9 41.9 … $ start_lng : num -87.6 -87.6 -87.7 -87.7 -87.7 … $ end_lat : num 41.9 41.9 41.9 41.9 41.9 … $ end_lng : num -87.6 -87.7 -87.6 -87.7 -87.7 … $ member_casual : chr “member” “member” “member” “member” … str(d2) ‘data.frame’: 151880 obs. of 13 variables: $ ride_id : chr “A246CA24873F7C5C” “303C0906F3F068AE” “A0F65F3531F1FB2B” “CE663C815B6A6D73” … $ rideable_type : chr “classic_bike” “classic_bike” “electric_bike” “electric_bike” … $ started_at : chr “2025-02-25 21:21:21.171” “2025-02-08 14:55:13.493” “2025-02-24 00:32:56.553” “2025-02-07 17:00:38.646” … $ ended_at : chr “2025-02-25 21:30:09.941” “2025-02-08 15:13:39.890” “2025-02-24 00:38:21.711” “2025-02-07 17:34:29.012” … $ start_station_name: chr “Michigan Ave & Lake St” “Ogden Ave & Race Ave” “Michigan Ave & Lake St” “Ogden Ave & Race Ave” … $ start_station_id : chr “TA1305000011” “13194” “TA1305000011” “13194” … $ end_station_name : chr “Clark St & Elm St” “Clark St & Elm St” “Wabash Ave & 9th St” “Clark St & Elm St” … $ end_station_id : chr “TA1307000039” “TA1307000039” “TA1309000010” “TA1307000039” … $ start_lat : num 41.9 41.9 41.9 41.9 41.9 … $ start_lng : num -87.6 -87.7 -87.6 -87.7 -87.6 … $ end_lat : num 41.9 41.9 41.9 41.9 41.9 … $ end_lng : num -87.6 -87.6 -87.6 -87.6 -87.6 … $ member_casual : chr “member” “member” “casual” “casual” … str(d3) ‘data.frame’: 301687 obs. of 13 variables: $ ride_id : chr “64FBE3BAED5F29E6” “9991629435C5E20E” “E5C9FECD5B71BEBD” “4CEA3EC8906DAEA8” … $ rideable_type : chr “electric_bike” “electric_bike” “electric_bike” “electric_bike” … $ started_at : chr “2024-03-05 18:33:11” “2024-03-06 17:15:14” “2024-03-06 17:16:36” “2024-03-03 22:55:54” … $ ended_at : chr “2024-03-05 18:51:48” “2024-03-06 17:16:04” “2024-03-06 17:19:28” “2024-03-03 22:58:08” … $ start_station_name: chr “” “” “” “” … $ start_station_id : chr “” “” “” “” … $ end_station_name : chr “” “” “” “” … $ end_station_id : chr “” “” “” “” … $ start_lat : num 41.9 41.9 41.9 41.9 41.9 … $ start_lng : num -87.7 -87.6 -87.6 -87.6 -87.7 … $ end_lat : num 42 41.9 41.9 41.9 41.9 … $ end_lng : num -87.7 -87.6 -87.6 -87.6 -87.7 … $ member_casual : chr “member” “member” “member” “member” … str(d4) ‘data.frame’: 415025 obs. of 13 variables: $ ride_id : chr “743252713F32516B” “BE90D33D2240C614” “D47BBDDE7C40DD61” “6684E760BF9EA9B5” … $ rideable_type : chr “classic_bike” “electric_bike” “classic_bike” “classic_bike” … $ started_at : chr “2024-04-22 19:08:21” “2024-04-11 06:19:24” “2024-04-20 11:13:13” “2024-04-04 18:39:20” … $ ended_at : chr “2024-04-22 19:12:56” “2024-04-11 06:22:21” “2024-04-20 11:29:31” “2024-04-04 18:43:06” … $ start_station_name: chr “Aberdeen St & Jackson Blvd” “Aberdeen St & Jackson Blvd” “Sheridan Rd & Montrose Ave” “Aberdeen St & Jackson Blvd” … $ start_station_id : chr “13157” “13157” “TA1307000107” “13157” … $ end_station_name : chr “Desplaines St & Jackson Blvd” “Desplaines St & Jackson Blvd” “Ashland Ave & Belle Plaine Ave” “Desplaines St & Jackson Blvd” … $ end_station_id : chr “15539” “15539” “13249” “15539” … $ start_lat : num 41.9 41.9 42 41.9 42 … $ start_lng : num -87.7 -87.7 -87.7 -87.7 -87.7 … $ end_lat : num 41.9 41.9 42 41.9 41.9 … $ end_lng : num -87.6 -87.6 -87.7 -87.6 -87.6 … $ member_casual : chr “member” “member” “member” “member” … str(d5) ‘data.frame’: 609493 obs. of 13 variables: $ ride_id : chr “7D9F0CE9EC2A1297” “02EC47687411416F” “101370FB2D3402BE” “E97E396331ED6913” … $ rideable_type : chr “classic_bike” “classic_bike” “classic_bike” “electric_bike” … $ started_at : chr “2024-05-25 15:52:42” “2024-05-14 15:11:51” “2024-05-30 17:46:04” “2024-05-17 20:21:54” … $ ended_at : chr “2024-05-25 16:11:50” “2024-05-14 15:22:00” “2024-05-30 18:09:16” “2024-05-17 20:40:32” … $ start_station_name: chr “Streeter Dr & Grand Ave” “Sheridan Rd & Greenleaf Ave” “Streeter Dr & Grand Ave” “Streeter Dr & Grand Ave” … $ start_station_id : chr “13022” “KA1504000159” “13022” “13022” … $ end_station_name : chr “Clark St & Elm St” “Sheridan Rd & Loyola Ave” “Wabash Ave & 9th St” “Sheffield Ave & Wellington Ave” … $ end_station_id : chr “TA1307000039” “RP-009” “TA1309000010” “TA1307000052” … $ start_lat : num 41.9 42 41.9 41.9 41.9 … $ start_lng : num -87.6 -87.7 -87.6 -87.6 -87.6 … $ end_lat : num 41.9 42 41.9 41.9 41.9 … $ end_lng : num -87.6 -87.7 -87.6 -87.7 -87.6 … $ member_casual : chr “casual” “casual” “member” “member” … str(d6) ‘data.frame’: 710721 obs. of 13 variables: $ ride_id : chr “CDE6023BE6B11D2F” “462B48CD292B6A18” “9CFB6A858D23ABF7” “6365EFEB64231153” … $ rideable_type : chr “electric_bike” “electric_bike” “electric_bike” “electric_bike” … $ started_at : chr “2024-06-11 17:20:06.289” “2024-06-11 17:19:21.567” “2024-06-11 17:25:27.089” “2024-06-11 11:53:50.769” … $ ended_at : chr “2024-06-11 17:21:39.464” “2024-06-11 17:19:36.377” “2024-06-11 17:30:13.035” “2024-06-11 12:08:13.382” … $ start_station_name: chr “” “” “” “” … $ start_station_id : chr “” “” “” “” … $ end_station_name : chr “” “” “” “” … $ end_station_id : chr “” “” “” “” … $ start_lat : num 41.9 41.9 41.9 41.9 41.9 … $ start_lng : num -87.7 -87.7 -87.7 -87.6 -87.6 … $ end_lat : num 41.9 41.9 41.9 41.9 41.9 … $ end_lng : num -87.7 -87.7 -87.7 -87.6 -87.6 … $ member_casual : chr “casual” “casual” “casual” “casual” … str(d7) ‘data.frame’: 748962 obs. of 13 variables: $ ride_id : chr “2658E319B13141F9” “B2176315168A47CE” “C2A9D33DF7EBB422” “8BFEA406DF01D8AD” … $ rideable_type : chr “electric_bike” “electric_bike” “electric_bike” “electric_bike” … $ started_at : chr “2024-07-11 08:15:14.784” “2024-07-11 15:45:07.851” “2024-07-11 08:24:48.192” “2024-07-11 08:46:06.864” … $ ended_at : chr “2024-07-11 08:17:56.335” “2024-07-11 16:06:04.243” “2024-07-11 08:28:05.237” “2024-07-11 09:14:11.664” … $ start_station_name: chr “” “” “” “” … $ start_station_id : chr “” “” “” “” … $ end_station_name : chr “” “” “” “” … $ end_station_id : chr “” “” “” “” … $ start_lat : num 41.8 41.8 41.8 41.9 42 … $ start_lng : num -87.6 -87.6 -87.6 -87.6 -87.6 … $ end_lat : num 41.8 41.8 41.8 41.9 41.9 … $ end_lng : num -87.6 -87.6 -87.6 -87.7 -87.6 … $ member_casual : chr “casual” “casual” “casual” “casual” … str(d8) ‘data.frame’: 755639 obs. of 13 variables: $ ride_id : chr “BAA154388A869E64” “8752245932EFF67A” “44DDF9F57A9A161F” “44AAAF069B0C78C3” … $ rideable_type : chr “classic_bike” “electric_bike” “classic_bike” “electric_bike” … $ started_at : chr “2024-08-02 13:35:14.403” “2024-08-02 15:33:13.965” “2024-08-16 15:44:06.233” “2024-08-19 18:47:11.855” … $ ended_at : chr “2024-08-02 13:48:24.426” “2024-08-02 15:55:23.865” “2024-08-16 15:57:52.109” “2024-08-19 18:56:33.269” … $ start_station_name: chr “State St & Randolph St” “Franklin St & Monroe St” “Franklin St & Monroe St” “Clark St & Elm St” … $ start_station_id : chr “TA1305000029” “TA1309000007” “TA1309000007” “TA1307000039” … $ end_station_name : chr “Wabash Ave & 9th St” “Damen Ave & Cortland St” “Clark St & Elm St” “McClurg Ct & Ohio St” … $ end_station_id : chr “TA1309000010” “13133” “TA1307000039” “TA1306000029” … $ start_lat : num 41.9 41.9 41.9 41.9 42 … $ start_lng : num -87.6 -87.6 -87.6 -87.6 -87.7 … $ end_lat : num 41.9 41.9 41.9 41.9 42 … $ end_lng : num -87.6 -87.7 -87.6 -87.6 -87.7 … $ member_casual : chr “member” “member” “member” “member” … str(d9) ‘data.frame’: 821276 obs. of 13 variables: $ ride_id : chr “31D38723D5A8665A” “67CB39987F4E895B” “DA61204FD26EC681” “06F160D46AF235DD” … $ rideable_type : chr “electric_bike” “electric_bike” “electric_bike” “electric_bike” … $ started_at : chr “2024-09-26 15:30:58.150” “2024-09-26 15:31:32.529” “2024-09-26 15:00:33.012” “2024-09-26 18:19:06.491” … $ ended_at : chr “2024-09-26 15:30:59.437” “2024-09-26 15:53:13.501” “2024-09-26 15:02:25.406” “2024-09-26 18:38:53.515” … $ start_station_name: chr “” “” “” “” … $ start_station_id : chr “” “” “” “” … $ end_station_name : chr “” “” “” “” … $ end_station_id : chr “” “” “” “” … $ start_lat : num 41.9 41.9 41.9 41.9 41.9 … $ start_lng : num -87.6 -87.6 -87.6 -87.6 -87.7 … $ end_lat : num 41.9 41.9 41.9 41.9 41.9 … $ end_lng : num -87.6 -87.6 -87.6 -87.6 -87.6 … $ member_casual : chr “member” “member” “member” “member” … str(d10) ‘data.frame’: 616281 obs. of 13 variables: $ ride_id : chr “4422E707103AA4FF” “19DB722B44CBE82F” “20AE2509FD68C939” “D0F17580AB9515A9” … $ rideable_type : chr “electric_bike” “electric_bike” “electric_bike” “electric_bike” … $ started_at : chr “2024-10-14 03:26:04.083” “2024-10-13 19:33:38.926” “2024-10-13 23:40:48.522” “2024-10-14 02:13:41.602” … $ ended_at : chr “2024-10-14 03:32:56.535” “2024-10-13 19:39:04.490” “2024-10-13 23:48:02.339” “2024-10-14 02:25:40.057” … $ start_station_name: chr “” “” “” “” … $ start_station_id : chr “” “” “” “” … $ end_station_name : chr “” “” “” “” … $ end_station_id : chr “” “” “” “” … $ start_lat : num 42 42 42 42 42 … $ start_lng : num -87.7 -87.7 -87.7 -87.7 -87.7 … $ end_lat : num 42 42 42 42 42 … $ end_lng : num -87.7 -87.7 -87.7 -87.7 -87.7 … $ member_casual : chr “member” “member” “member” “member” … str(d11) ‘data.frame’: 335075 obs. of 13 variables: $ ride_id : chr “578DDD7CE1771FFA” “78B141C50102ABA6” “1E794CF36394E2D7” “E5DD2CAB58D73F98” … $ rideable_type : chr “classic_bike” “classic_bike” “classic_bike” “classic_bike” … $ started_at : chr “2024-11-07 19:21:58.206” “2024-11-22 14:49:00.431” “2024-11-08 09:24:00.238” “2024-11-24 17:51:14.144” … $ ended_at : chr “2024-11-07 19:28:57.301” “2024-11-22 14:56:15.475” “2024-11-08 09:28:33.480” “2024-11-24 18:05:32.574” … $ start_station_name: chr “Walsh Park” “Walsh Park” “Walsh Park” “Clark St & Elm St” … $ start_station_id : chr “18067” “18067” “18067” “TA1307000039” … $ end_station_name : chr “Leavitt St & North Ave” “Leavitt St & Armitage Ave” “Damen Ave & Cortland St” “Clark St & Drummond Pl” … $ end_station_id : chr “TA1308000005” “TA1309000029” “13133” “TA1307000142” … $ start_lat : num 41.9 41.9 41.9 41.9 41.9 … $ start_lng : num -87.7 -87.7 -87.7 -87.6 -87.6 … $ end_lat : num 41.9 41.9 41.9 41.9 41.9 … $ end_lng : num -87.7 -87.7 -87.7 -87.6 -87.6 … $ member_casual : chr “member” “member” “member” “member” … str(d12) ‘data.frame’: 178372 obs. of 13 variables: $ ride_id : chr “6C960DEB4F78854E” “C0913EEB2834E7A2” “848A37DD4723078A” “3FA09C762ECB48BD” … $ rideable_type : chr “electric_bike” “classic_bike” “classic_bike” “electric_bike” … $ started_at : chr “2024-12-31 01:38:35.018” “2024-12-21 18:41:26.478” “2024-12-21 11:41:01.664” “2024-12-26 13:07:27.526” … $ ended_at : chr “2024-12-31 01:48:45.775” “2024-12-21 18:47:33.871” “2024-12-21 11:52:45.094” “2024-12-26 13:10:54.130” … $ start_station_name: chr “Halsted St & Roscoe St” “Clark St & Wellington Ave” “Sheridan Rd & Montrose Ave” “Aberdeen St & Jackson Blvd” … $ start_station_id : chr “TA1309000025” “TA1307000136” “TA1307000107” “13157” … $ end_station_name : chr “Clark St & Winnemac Ave” “Halsted St & Roscoe St” “Broadway & Barry Ave” “Green St & Randolph St*” … $ end_station_id : chr “TA1309000035” “TA1309000025” “13137” “chargingstx3” … $ start_lat : num 41.9 41.9 42 41.9 41.9 … $ start_lng : num -87.6 -87.6 -87.7 -87.7 -87.7 … $ end_lat : num 42 41.9 41.9 41.9 41.9 … $ end_lng : num -87.7 -87.6 -87.6 -87.6 -87.7 … $ member_casual : chr “member” “member” “member” “member” …

## # # Se añaden columnas para la fecha # # > total_datos\(date <- as.Date(total_datos\)started_at) > total_datos\(month <- format(as.Date(total_datos\)date), “%m”) > total_datos\(day <- format(as.Date(total_datos\)date), “%d”) > total_datos\(year <- format(as.Date(total_datos\)date), “%Y”) > total_datos\(day_of_week <- format(as.Date(total_datos\)date), “%A”) # # Se añade la función “ride_length” para calcular los segundos # total_datos\(ride_length <- difftime(total_datos\)ended_at,total_datos\(started_at) # Convertir ride_length a numero # > is.factor(total_datos\)ride_length) [1] FALSE > total_datos\(ride_length <- as.numeric(as.character(total_datos\)ride_length)) > is.numeric(total_datos\(ride_length) [1] TRUE # # Remover los datos malos # > total_datos_v2 <- total_datos[!(total_datos\)start_station_name == “HQ QR” | total_datos\(ride_length<0),] > View(total_datos_v2) # ### Paso 4 análisis descriptivo ### > mean(total_datos_v2\)ride_length) [1] 1027.102 > median(total_datos_v2\(ride_length) [1] 580.602 > max(total_datos_v2\)ride_length) [1] 93596 > min(total_datos_v2\(ride_length) [1] 0 > summary(total_datos_v2\)ride_length) Min. 1st Qu. Median Mean 3rd Qu. Max. 0.0 331.0 580.6 1027.1 1032.0 93596.0

Comparar miembros y casuales de ride_length, # > aggregate(total_datos_v2\(ride_length ~ total_datos_v2\)member_casual, FUN = mean) total_datos_v2\(member_casual total_datos_v2\)ride_length 1 casual 1496.249 2 member 753.166 > aggregate(total_datos_v2\(ride_length ~ total_datos_v2\)member_casual, FUN = median) total_datos_v2\(member_casual total_datos_v2\)ride_length 1 casual 721.346 2 member 518.938 > aggregate(total_datos_v2\(ride_length ~ total_datos_v2\)member_casual, FUN = max) total_datos_v2\(member_casual total_datos_v2\)ride_length 1 casual 93596 2 member 93588 > aggregate(total_datos_v2\(ride_length ~ total_datos_v2\)member_casual, FUN = min) total_datos_v2\(member_casual total_datos_v2\)ride_length 1 casual 0 2 member 0

Comparar el promedio de miembros y casulaes para cada tiempo de uso ### > aggregate(total_datos_v2\(ride_length ~ total_datos_v2\)member_casual + total_datos_v2\(day_of_week, FUN = mean) total_datos_v2\)member_casual total_datos_v2\(day_of_week total_datos_v2\)ride_length 1 casual domingo 1753.3308 2 member domingo 842.7955 3 casual jueves 1305.6601 4 member jueves 724.6653 5 casual lunes 1428.6812 6 member lunes 718.9007 7 casual martes 1270.0357 8 member martes 720.9285 9 casual miércoles 1326.6843 10 member miércoles 727.3092 11 casual sábado 1680.5909 12 member sábado 834.3117 13 casual viernes 1455.3394 14 member viernes 736.2591