This analysis examines the relationship between household vehicle ownership and travel distance using mobility tracking data.
library(readr)
library(dplyr)
library(ggplot2)
demographics <- read_csv("demographics_clean.csv")
## Rows: 180 Columns: 43
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (28): _id, user_token, user_id, At_your_primary_job_do_you_ha, Which_be...
## dbl (14): data_ts, How_many_days_do_you_usually_w_001, Including_yourself_h...
## dttm (1): data_fmt_time
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
trips <- read_csv("trips_filtered.csv")
## Rows: 37907 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): user_token, user_id, data_start_loc_coordinates, data_end_loc_coor...
## dbl (8): data_duration_minutes, data_distance, data_distance_meters, data_d...
## dttm (2): data_start_fmt_time, data_end_fmt_time
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
demographics_unique <- demographics %>%
distinct(user_id, .keep_all = TRUE)
data <- trips %>%
left_join(demographics_unique, by = "user_id")
distance <- data$data_distance_miles
vehicles <- data$How_many_motor_vehicles_are_ow
summary(distance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.06236 0.69737 2.36948 4.58279 6.27473 68.86137
summary(vehicles)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.0000 1.0000 0.6417 1.0000 3.0000 2049
sum(is.na(distance))
## [1] 0
sum(is.na(vehicles))
## [1] 2049
data_clean <- data %>%
filter(!is.na(data_distance_miles),
!is.na(How_many_motor_vehicles_are_ow))
nrow(data_clean)
## [1] 35858
hist(data_clean$data_distance_miles,
main="Distribution of Trip Distance",
xlab="Distance (Miles)")
plot(data_clean$How_many_motor_vehicles_are_ow,
data_clean$data_distance_miles,
xlab="Household Vehicles",
ylab="Trip Distance (Miles)",
main="Vehicle Ownership vs Trip Distance")
ggplot(data_clean,
aes(x = How_many_motor_vehicles_are_ow,
y = data_distance_miles)) +
geom_point() +
labs(title="Relationship Between Vehicle Ownership and Trip Distance",
x="Number of Vehicles in Household",
y="Trip Distance (Miles)")
cor(data_clean$How_many_motor_vehicles_are_ow,
data_clean$data_distance_miles)
## [1] 0.1513782