R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.2
data <- read.csv('D:/dataset/db1bike.csv')

# Explore the structure of your dataset
str(data)
## 'data.frame':    199 obs. of  14 variables:
##  $ Date                 : chr  "01-12-2017" "01-12-2017" "01-12-2017" "01-12-2017" ...
##  $ Rented_Bike_Count    : int  254 204 173 107 78 100 181 460 930 490 ...
##  $ Hour                 : int  0 1 2 3 4 5 6 7 8 9 ...
##  $ Temperature          : num  -5.2 -5.5 -6 -6.2 -6 -6.4 -6.6 -7.4 -7.6 -6.5 ...
##  $ Humidity             : int  37 38 39 40 36 37 35 38 37 27 ...
##  $ Wind_speed           : num  2.2 0.8 1 0.9 2.3 1.5 1.3 0.9 1.1 0.5 ...
##  $ Visibility           : int  2000 2000 2000 2000 2000 2000 2000 2000 2000 1928 ...
##  $ Dew.point.temperature: num  -17.6 -17.6 -17.7 -17.6 -18.6 -18.7 -19.5 -19.3 -19.8 -22.4 ...
##  $ Solar.Radiation      : num  0 0 0 0 0 0 0 0 0.01 0.23 ...
##  $ Rainfall             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Snowfall             : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Seasons              : chr  "Winter" "Winter" "Winter" "Winter" ...
##  $ Holiday              : chr  "No Holiday" "No Holiday" "No Holiday" "No Holiday" ...
##  $ Functioning.Day      : chr  "Yes" "Yes" "Yes" "Yes" ...
# Summary statistics
summary(data)
##      Date           Rented_Bike_Count      Hour       Temperature    
##  Length:199         Min.   : 13.0     Min.   : 0.0   Min.   :-8.100  
##  Class :character   1st Qu.:191.5     1st Qu.: 5.0   1st Qu.:-3.700  
##  Mode  :character   Median :341.0     Median :11.0   Median :-0.800  
##                     Mean   :335.5     Mean   :11.2   Mean   :-0.601  
##                     3rd Qu.:430.5     3rd Qu.:17.0   3rd Qu.: 3.050  
##                     Max.   :937.0     Max.   :23.0   Max.   : 8.000  
##     Humidity       Wind_speed      Visibility     Dew.point.temperature
##  Min.   :21.00   Min.   :0.000   Min.   :  66.0   Min.   :-22.40       
##  1st Qu.:37.00   1st Qu.:0.800   1st Qu.: 873.5   1st Qu.:-15.60       
##  Median :51.00   Median :1.500   Median :1808.0   Median : -7.90       
##  Mean   :57.02   Mean   :1.673   Mean   :1412.0   Mean   : -8.95       
##  3rd Qu.:79.00   3rd Qu.:2.450   3rd Qu.:2000.0   3rd Qu.: -3.10       
##  Max.   :96.00   Max.   :5.800   Max.   :2000.0   Max.   :  3.80       
##  Solar.Radiation     Rainfall          Snowfall         Seasons         
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.00000   Length:199        
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000   Class :character  
##  Median :0.0000   Median :0.00000   Median :0.00000   Mode  :character  
##  Mean   :0.2103   Mean   :0.02714   Mean   :0.09548                     
##  3rd Qu.:0.2350   3rd Qu.:0.00000   3rd Qu.:0.00000                     
##  Max.   :1.2200   Max.   :2.50000   Max.   :1.00000                     
##    Holiday          Functioning.Day   
##  Length:199         Length:199        
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 
# Check for missing values
# Example: If your dataset is named 'data'
colSums(is.na(data))
##                  Date     Rented_Bike_Count                  Hour 
##                     0                     0                     0 
##           Temperature              Humidity            Wind_speed 
##                     0                     0                     0 
##            Visibility Dew.point.temperature       Solar.Radiation 
##                     0                     0                     0 
##              Rainfall              Snowfall               Seasons 
##                     0                     0                     0 
##               Holiday       Functioning.Day 
##                     0                     0
# Summary statistics
summary(data)
##      Date           Rented_Bike_Count      Hour       Temperature    
##  Length:199         Min.   : 13.0     Min.   : 0.0   Min.   :-8.100  
##  Class :character   1st Qu.:191.5     1st Qu.: 5.0   1st Qu.:-3.700  
##  Mode  :character   Median :341.0     Median :11.0   Median :-0.800  
##                     Mean   :335.5     Mean   :11.2   Mean   :-0.601  
##                     3rd Qu.:430.5     3rd Qu.:17.0   3rd Qu.: 3.050  
##                     Max.   :937.0     Max.   :23.0   Max.   : 8.000  
##     Humidity       Wind_speed      Visibility     Dew.point.temperature
##  Min.   :21.00   Min.   :0.000   Min.   :  66.0   Min.   :-22.40       
##  1st Qu.:37.00   1st Qu.:0.800   1st Qu.: 873.5   1st Qu.:-15.60       
##  Median :51.00   Median :1.500   Median :1808.0   Median : -7.90       
##  Mean   :57.02   Mean   :1.673   Mean   :1412.0   Mean   : -8.95       
##  3rd Qu.:79.00   3rd Qu.:2.450   3rd Qu.:2000.0   3rd Qu.: -3.10       
##  Max.   :96.00   Max.   :5.800   Max.   :2000.0   Max.   :  3.80       
##  Solar.Radiation     Rainfall          Snowfall         Seasons         
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.00000   Length:199        
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000   Class :character  
##  Median :0.0000   Median :0.00000   Median :0.00000   Mode  :character  
##  Mean   :0.2103   Mean   :0.02714   Mean   :0.09548                     
##  3rd Qu.:0.2350   3rd Qu.:0.00000   3rd Qu.:0.00000                     
##  Max.   :1.2200   Max.   :2.50000   Max.   :1.00000                     
##    Holiday          Functioning.Day   
##  Length:199         Length:199        
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 
# Visualize data
hist(data$Hour)

boxplot(data$Hour ~ data$Rented_Bike_Count)

# Correlation analysis
cor.test(data$Hour, data$Rented_Bike_Count)
## 
##  Pearson's product-moment correlation
## 
## data:  data$Hour and data$Rented_Bike_Count
## t = 7.7106, df = 197, p-value = 6.056e-13
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3669733 0.5816255
## sample estimates:
##      cor 
## 0.481487
# Linear regression
fit <- lm(Hour ~ Rented_Bike_Count + Temperature, data = data)
summary(fit) # view model results
## 
## Call:
## lm(formula = Hour ~ Rented_Bike_Count + Temperature, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.986  -3.572  -0.707   3.070  15.122 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       5.953089   0.881600   6.753 1.61e-10 ***
## Rented_Bike_Count 0.016295   0.002262   7.205 1.22e-11 ***
## Temperature       0.364873   0.106421   3.429  0.00074 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.994 on 196 degrees of freedom
## Multiple R-squared:  0.2753, Adjusted R-squared:  0.2679 
## F-statistic: 37.23 on 2 and 196 DF,  p-value: 1.976e-14
confint(fit) # 95% CIs for model coefficients
##                       2.5 %     97.5 %
## (Intercept)       4.2144502 7.69172869
## Rented_Bike_Count 0.0118348 0.02075537
## Temperature       0.1549948 0.57475081
# ANOVA
fit2 <- aov(Rented_Bike_Count ~ Temperature, data = data) 
summary(fit2)
##              Df  Sum Sq Mean Sq F value Pr(>F)  
## Temperature   1  217110  217110    6.09 0.0144 *
## Residuals   197 7023320   35651                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Kmeans clustering 
set.seed(123)
clusters <- kmeans(data[, 5:7], 3) 

# Aggregate by cluster
aggregate(Temperature ~ cluster, 
          data=cbind(cluster=clusters$cluster, data), # bind clusters to data
          FUN=mean)
##   cluster Temperature
## 1       1  -0.3707317
## 2       2   2.9255319
## 3       3  -2.1792793