This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
# Load necessary libraries
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ✔ readr 2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Load the dataset from the specified location
bike_data <- read.csv("D:/dataset/bike.csv")
# Set working directory to the folder containing bike.csv
setwd("D:/dataset")
# Inspect the data
head(bike_data)
## Date Rented.Bike.Count Hour Temperature Humidity Wind.speed Visibility
## 1 1/12/2017 254 0 -5.2 37 2.2 2000
## 2 1/12/2017 204 1 -5.5 38 0.8 2000
## 3 1/12/2017 173 2 -6.0 39 1.0 2000
## 4 1/12/2017 107 3 -6.2 40 0.9 2000
## 5 1/12/2017 78 4 -6.0 36 2.3 2000
## 6 1/12/2017 100 5 -6.4 37 1.5 2000
## Dew.point.temperature Solar.Radiation Rainfall Snowfall Seasons Holiday
## 1 -17.6 0 0 0 Winter No Holiday
## 2 -17.6 0 0 0 Winter No Holiday
## 3 -17.7 0 0 0 Winter No Holiday
## 4 -17.6 0 0 0 Winter No Holiday
## 5 -18.6 0 0 0 Winter No Holiday
## 6 -18.7 0 0 0 Winter No Holiday
## Functioning.Day
## 1 Yes
## 2 Yes
## 3 Yes
## 4 Yes
## 5 Yes
## 6 Yes
str(bike_data)
## 'data.frame': 8760 obs. of 14 variables:
## $ Date : chr "1/12/2017" "1/12/2017" "1/12/2017" "1/12/2017" ...
## $ Rented.Bike.Count : int 254 204 173 107 78 100 181 460 930 490 ...
## $ Hour : int 0 1 2 3 4 5 6 7 8 9 ...
## $ Temperature : num -5.2 -5.5 -6 -6.2 -6 -6.4 -6.6 -7.4 -7.6 -6.5 ...
## $ Humidity : int 37 38 39 40 36 37 35 38 37 27 ...
## $ Wind.speed : num 2.2 0.8 1 0.9 2.3 1.5 1.3 0.9 1.1 0.5 ...
## $ Visibility : int 2000 2000 2000 2000 2000 2000 2000 2000 2000 1928 ...
## $ Dew.point.temperature: num -17.6 -17.6 -17.7 -17.6 -18.6 -18.7 -19.5 -19.3 -19.8 -22.4 ...
## $ Solar.Radiation : num 0 0 0 0 0 0 0 0 0.01 0.23 ...
## $ Rainfall : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Snowfall : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Seasons : chr "Winter" "Winter" "Winter" "Winter" ...
## $ Holiday : chr "No Holiday" "No Holiday" "No Holiday" "No Holiday" ...
## $ Functioning.Day : chr "Yes" "Yes" "Yes" "Yes" ...
summary(bike_data)
## Date Rented.Bike.Count Hour Temperature
## Length:8760 Min. : 0.0 Min. : 0.00 Min. :-17.80
## Class :character 1st Qu.: 191.0 1st Qu.: 5.75 1st Qu.: 3.50
## Mode :character Median : 504.5 Median :11.50 Median : 13.70
## Mean : 704.6 Mean :11.50 Mean : 12.88
## 3rd Qu.:1065.2 3rd Qu.:17.25 3rd Qu.: 22.50
## Max. :3556.0 Max. :23.00 Max. : 39.40
## Humidity Wind.speed Visibility Dew.point.temperature
## Min. : 0.00 Min. :0.000 Min. : 27 Min. :-30.600
## 1st Qu.:42.00 1st Qu.:0.900 1st Qu.: 940 1st Qu.: -4.700
## Median :57.00 Median :1.500 Median :1698 Median : 5.100
## Mean :58.23 Mean :1.725 Mean :1437 Mean : 4.074
## 3rd Qu.:74.00 3rd Qu.:2.300 3rd Qu.:2000 3rd Qu.: 14.800
## Max. :98.00 Max. :7.400 Max. :2000 Max. : 27.200
## Solar.Radiation Rainfall Snowfall Seasons
## Min. :0.0000 Min. : 0.0000 Min. :0.00000 Length:8760
## 1st Qu.:0.0000 1st Qu.: 0.0000 1st Qu.:0.00000 Class :character
## Median :0.0100 Median : 0.0000 Median :0.00000 Mode :character
## Mean :0.5691 Mean : 0.1487 Mean :0.07507
## 3rd Qu.:0.9300 3rd Qu.: 0.0000 3rd Qu.:0.00000
## Max. :3.5200 Max. :35.0000 Max. :8.80000
## Holiday Functioning.Day
## Length:8760 Length:8760
## Class :character Class :character
## Mode :character Mode :character
##
##
##
# Visualize rented bike count over time
ggplot(bike_data, aes(x = Hour, y = Rented.Bike.Count)) +
geom_point() +
geom_smooth(method="lm", se=FALSE)
## `geom_smooth()` using formula = 'y ~ x'
# Calculate correlation between temperature and rentals
cor(bike_data$Rented.Bike.Count, bike_data$Temperature)
## [1] 0.5385582
# Compare rentals across weather situations
ggplot(bike_data, aes(x = Seasons, y = Rented.Bike.Count)) +
geom_boxplot()
# Conduct t-test to compare rentals on holidays vs non-holidays
t.test(Rented.Bike.Count ~ Holiday, bike_data)
##
## Welch Two Sample t-test
##
## data: Rented.Bike.Count by Holiday
## t = -7.5973, df = 490.23, p-value = 1.545e-13
## alternative hypothesis: true difference in means between group Holiday and group No Holiday is not equal to 0
## 95 percent confidence interval:
## -271.1960 -159.7461
## sample estimates:
## mean in group Holiday mean in group No Holiday
## 499.7569 715.2280
# Additional analysis as needed...