library(readr) library(tidyr) library(ggplot2) library(dplyr) library(lubridate)

#First, let’s examine the summary statistics of hourly bikeshare rentals for a few times in the day. This will give us an intuition for what normal usage looks like. SeoulBikeData |> filter(hour == 8) |> summarise( min_rented = min(rented_bikes), max_rented = max(rented_bikes), mean_rented = mean(rented_bikes), median_rented = median(rented_bikes) ) SeoulBikeData |> filter(hour == 12) |> summarise( min_rented = min(rented_bikes), max_rented = max(rented_bikes), mean_rented = mean(rented_bikes), median_rented = median(rented_bikes) )

SeoulBikeData |> filter(hour == 16) |> summarise( min_rented = min(rented_bikes), max_rented = max(rented_bikes), mean_rented = mean(rented_bikes), median_rented = median(rented_bikes) )

SeoulBikeData |> filter(hour == 19) |> summarise( min_rented = min(rented_bikes), max_rented = max(rented_bikes), mean_rented = mean(rented_bikes), median_rented = median(rented_bikes) )

#We can tell from the summary statistics that commuting hours are not peak hours for bikeshare. This is suprising because from an American point of view, I assumed that commuting hours would have the highest demand, however 9pm is far busier than 8am, 12pm, or 4pm. This makes me wonder how demand varies throughout the day, and the cultural and systemic drivers of this.

#Next I will take the summary statistics of solar radiation at noon. This field describes the amount of solar energy per square meter. I’d like to what values to expect.

SeoulBikeData |> filter(hour == 12) |>
summarise( min_rads = min(solar_radiation_mJ_m2), max_rads = max(solar_radiation_mJ_m2), mean_rads = mean(solar_radiation_mJ_m2), median_rads = median(solar_radiation_mJ_m2) )

#A normal amount of solar radiation seems to be around 1.8 MJ/M^2, however occasionally it has reached a high of 3.44. What caused this condition, how damaging is it to human health, and how does this flucuate throughout the year?

#Next I will calculate value counts from seasons, holidays, and functioning_days to see how many observations fall in each category and check their unique values. table(SeoulBikeData\(seasons) #Interestingly, seasonal demand is flat. table(SeoulBikeData\)holiday) #Not many holidays. table(SeoulBikeData$functioning_day) #Not many non-functional days.

#Question 1: How does temperature and precipitation affect bikeshare usage? #Question 2: What is the expected decrease in riders per inch of snow? #Question 3: What times of day have the greatest and least demand for bikeshare?

aggregate(rented_bikes ~ hour, data = SeoulBikeData, FUN = mean)

#This information tell us that there is a spike of demand at 8am, a slight decrease in the following two hours, then a steady rise until peak hours between 5-9pm, and finally a steady fall to lows between 4-5 am. The large spike at 6pm may be caused by many people leaving work. The data shows that while commuting is an important driver of bikeshare demand, the evening hours exhibit heavy demand. The data also shows that there is still plenty of bikeshare activity happening during the night. This output would be important for bikeshare business operations and planning.

ggplot(SeoulBikeData, mapping = aes(x = temp_c, y = rented_bikes)) + geom_point(mapping = aes(color = seasons)) + labs( title = “Scatter plot of Temperature and Bike Rentals by Season”, x = “Temperature(celsius)”, y = “Number of Rented Bikes” ) + theme_minimal()

#The data below shows a clear correlation between temperature and bikeshare usage. As expected, winter sees fewer riders while summer has the most. However, when temperature is very hot, ridership decreases.

ggplot(SeoulBikeData, aes(x = as.factor(hour), y = rented_bikes)) + geom_boxplot() + labs( title = “Boxplot of Rented Bikes by Hour”, x = “Hour”, y = “Number of Rented Bikes” ) + theme_minimal()

#Unusable Code below. #SeoulBikeData <- SeoulBikeData |> # separate(date, into = c(“day”, “month”, “year”), sep = “/”) |> # mutate( # day = as.numeric(day), # month = as.numeric(month), # year = as.numeric(year) # ) # #solar_month <- SeoulBikeData |> # group_by(month, seasons) |> # summarise(mean_solar_radiation = mean(solar_radiation_mJ_m2)) |> # arrange(as.numeric(month)) #
#solar_day <- SeoulBikeData |> # group_by(day, seasons) |> # summarise(mean_solar_radiation = mean(solar_radiation_mJ_m2)) |> # arrange(as.numeric(day)) # # #ggplot(solar_day, aes(x = as.numeric(day), y = mean_solar_radiation, color = seasons)) + # geom_point(size = 3) + # geom_line() + # labs( # title = “Seasonality of Solar Radiation”, # x = “Day”, # y = “Mean Solar Radiation (mJ/m²)”, # color = “Season” # ) + # theme_minimal()