R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

# Load necessary libraries
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ✔ readr     2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Load the dataset from the specified location
bike_data <- read.csv("D:/dataset/bike.csv")
# Set working directory to the folder containing bike.csv
setwd("D:/dataset") 

# Inspect the data
head(bike_data)
##        Date Rented.Bike.Count Hour Temperature Humidity Wind.speed Visibility
## 1 1/12/2017               254    0        -5.2       37        2.2       2000
## 2 1/12/2017               204    1        -5.5       38        0.8       2000
## 3 1/12/2017               173    2        -6.0       39        1.0       2000
## 4 1/12/2017               107    3        -6.2       40        0.9       2000
## 5 1/12/2017                78    4        -6.0       36        2.3       2000
## 6 1/12/2017               100    5        -6.4       37        1.5       2000
##   Dew.point.temperature Solar.Radiation Rainfall Snowfall Seasons    Holiday
## 1                 -17.6               0        0        0  Winter No Holiday
## 2                 -17.6               0        0        0  Winter No Holiday
## 3                 -17.7               0        0        0  Winter No Holiday
## 4                 -17.6               0        0        0  Winter No Holiday
## 5                 -18.6               0        0        0  Winter No Holiday
## 6                 -18.7               0        0        0  Winter No Holiday
##   Functioning.Day
## 1             Yes
## 2             Yes
## 3             Yes
## 4             Yes
## 5             Yes
## 6             Yes
str(bike_data)
## 'data.frame':    8760 obs. of  14 variables:
##  $ Date                 : chr  "1/12/2017" "1/12/2017" "1/12/2017" "1/12/2017" ...
##  $ Rented.Bike.Count    : int  254 204 173 107 78 100 181 460 930 490 ...
##  $ Hour                 : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ Temperature          : num  -5.2 -5.5 -6 -6.2 -6 -6.4 -6.6 -7.4 -7.6 -6.5 ...
##  $ Humidity             : int  37 38 39 40 36 37 35 38 37 27 ...
##  $ Wind.speed           : num  2.2 0.8 1 0.9 2.3 1.5 1.3 0.9 1.1 0.5 ...
##  $ Visibility           : int  2000 2000 2000 2000 2000 2000 2000 2000 2000 1928 ...
##  $ Dew.point.temperature: num  -17.6 -17.6 -17.7 -17.6 -18.6 -18.7 -19.5 -19.3 -19.8 -22.4 ...
##  $ Solar.Radiation      : num  0 0 0 0 0 0 0 0 0.01 0.23 ...
##  $ Rainfall             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Snowfall             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Seasons              : chr  "Winter" "Winter" "Winter" "Winter" ...
##  $ Holiday              : chr  "No Holiday" "No Holiday" "No Holiday" "No Holiday" ...
##  $ Functioning.Day      : chr  "Yes" "Yes" "Yes" "Yes" ...
summary(bike_data)
##      Date           Rented.Bike.Count      Hour        Temperature    
##  Length:8760        Min.   :   0.0    Min.   : 0.00   Min.   :-17.80  
##  Class :character   1st Qu.: 191.0    1st Qu.: 5.75   1st Qu.:  3.50  
##  Mode  :character   Median : 504.5    Median :11.50   Median : 13.70  
##                     Mean   : 704.6    Mean   :11.50   Mean   : 12.88  
##                     3rd Qu.:1065.2    3rd Qu.:17.25   3rd Qu.: 22.50  
##                     Max.   :3556.0    Max.   :23.00   Max.   : 39.40  
##     Humidity       Wind.speed      Visibility   Dew.point.temperature
##  Min.   : 0.00   Min.   :0.000   Min.   :  27   Min.   :-30.600      
##  1st Qu.:42.00   1st Qu.:0.900   1st Qu.: 940   1st Qu.: -4.700      
##  Median :57.00   Median :1.500   Median :1698   Median :  5.100      
##  Mean   :58.23   Mean   :1.725   Mean   :1437   Mean   :  4.074      
##  3rd Qu.:74.00   3rd Qu.:2.300   3rd Qu.:2000   3rd Qu.: 14.800      
##  Max.   :98.00   Max.   :7.400   Max.   :2000   Max.   : 27.200      
##  Solar.Radiation     Rainfall          Snowfall         Seasons         
##  Min.   :0.0000   Min.   : 0.0000   Min.   :0.00000   Length:8760       
##  1st Qu.:0.0000   1st Qu.: 0.0000   1st Qu.:0.00000   Class :character  
##  Median :0.0100   Median : 0.0000   Median :0.00000   Mode  :character  
##  Mean   :0.5691   Mean   : 0.1487   Mean   :0.07507                     
##  3rd Qu.:0.9300   3rd Qu.: 0.0000   3rd Qu.:0.00000                     
##  Max.   :3.5200   Max.   :35.0000   Max.   :8.80000                     
##    Holiday          Functioning.Day   
##  Length:8760        Length:8760       
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 
# Visualize rented bike count over time  
ggplot(bike_data, aes(x = Hour, y = Rented.Bike.Count)) +
  geom_point() +
  geom_smooth(method="lm", se=FALSE)
## `geom_smooth()` using formula = 'y ~ x'

# Calculate correlation between temperature and rentals
cor(bike_data$Rented.Bike.Count, bike_data$Temperature)
## [1] 0.5385582
# Compare rentals across weather situations  
ggplot(bike_data, aes(x = Seasons, y = Rented.Bike.Count)) +
  geom_boxplot() 

# Conduct t-test to compare rentals on holidays vs non-holidays
t.test(Rented.Bike.Count ~ Holiday, bike_data)
## 
##  Welch Two Sample t-test
## 
## data:  Rented.Bike.Count by Holiday
## t = -7.5973, df = 490.23, p-value = 1.545e-13
## alternative hypothesis: true difference in means between group Holiday and group No Holiday is not equal to 0
## 95 percent confidence interval:
##  -271.1960 -159.7461
## sample estimates:
##    mean in group Holiday mean in group No Holiday 
##                 499.7569                 715.2280
# Additional analysis as needed...