getwd()
## [1] "/Users/victoriaecheverri/Desktop/UTSA Classes/Applied Quantitative Methods/My Class Stuff/Monday Class/victoriasustainabilitydata"
library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(pastecs)
## 
## Attaching package: 'pastecs'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
bike_trend <- read_csv("bike_trend.csv")
## Rows: 198 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (2): Bicycling_Trips, Rolling_Avg
## date (1): date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

For this assignment, I selected the variable Bicycling_Trips from the bike_trend.csv dataset. This variable measures the total number of bicycling trips recorded for each observation in the data. It is useful because it provides a direct measure of bicycling activity over time and helps describe patterns in transportation behavior.

library(readr)
library(dplyr)
library(pastecs)
bike_trend <- read_csv("bike_trend.csv")
## Rows: 198 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl  (2): Bicycling_Trips, Rolling_Avg
## date (1): date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
stat.desc(bike_trend$Bicycling_Trips)
##      nbr.val     nbr.null       nbr.na          min          max        range 
##  198.0000000    0.0000000    0.0000000    1.0000000   24.0000000   23.0000000 
##          sum       median         mean      SE.mean CI.mean.0.95          var 
## 1500.0000000    6.0000000    7.5757576    0.3509556    0.6921123   24.3876327 
##      std.dev     coef.var 
##    4.9383836    0.6518666
bike_trend_clean <- bike_trend %>%
  filter(!is.na(Bicycling_Trips))
hist(bike_trend_clean$Bicycling_Trips,
     main = "Histogram of Bicycling Trips",
     xlab = "Bicycling Trips")

bike_trend_clean <- bike_trend_clean %>%
  mutate(log_Bicycling_Trips = log(Bicycling_Trips + 1))
hist(bike_trend_clean$log_Bicycling_Trips,
     main = "Histogram of Log-Transformed Bicycling Trips",
     xlab = "Log(Bicycling Trips + 1)")