This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
library(knitr)
library(ggplot2)
library(ggpubr)
library(modelr)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(broom)
##
## Attaching package: 'broom'
##
## The following object is masked from 'package:modelr':
##
## bootstrap
bike_data <- read.csv('D:/dataset/db1bike.csv')
#head(bike)
str(bike_data)
## 'data.frame': 199 obs. of 14 variables:
## $ Date : chr "01-12-2017" "01-12-2017" "01-12-2017" "01-12-2017" ...
## $ Rented_Bike_Count : int 254 204 173 107 78 100 181 460 930 490 ...
## $ Hour : int 0 1 2 3 4 5 6 7 8 9 ...
## $ Temperature : num -5.2 -5.5 -6 -6.2 -6 -6.4 -6.6 -7.4 -7.6 -6.5 ...
## $ Humidity : int 37 38 39 40 36 37 35 38 37 27 ...
## $ Wind_speed : num 2.2 0.8 1 0.9 2.3 1.5 1.3 0.9 1.1 0.5 ...
## $ Visibility : int 2000 2000 2000 2000 2000 2000 2000 2000 2000 1928 ...
## $ Dew.point.temperature: num -17.6 -17.6 -17.7 -17.6 -18.6 -18.7 -19.5 -19.3 -19.8 -22.4 ...
## $ Solar.Radiation : num 0 0 0 0 0 0 0 0 0.01 0.23 ...
## $ Rainfall : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Snowfall : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Seasons : chr "Winter" "Winter" "Winter" "Winter" ...
## $ Holiday : chr "No Holiday" "No Holiday" "No Holiday" "No Holiday" ...
## $ Functioning.Day : chr "Yes" "Yes" "Yes" "Yes" ...
aggregate(Rented_Bike_Count ~ Visibility, bike_data, mean)
## Visibility Rented_Bike_Count
## 1 66 204.0000
## 2 119 62.0000
## 3 129 22.0000
## 4 130 41.0000
## 5 148 137.0000
## 6 168 149.0000
## 7 173 560.5000
## 8 175 83.0000
## 9 180 205.0000
## 10 194 303.0000
## 11 201 350.0000
## 12 206 65.0000
## 13 207 112.0000
## 14 209 193.0000
## 15 216 285.0000
## 16 218 102.0000
## 17 221 186.0000
## 18 227 218.0000
## 19 228 190.0000
## 20 231 152.0000
## 21 233 222.0000
## 22 235 165.0000
## 23 240 198.0000
## 24 244 85.0000
## 25 247 385.0000
## 26 253 239.5000
## 27 257 200.0000
## 28 265 415.0000
## 29 278 13.0000
## 30 293 144.0000
## 31 309 422.0000
## 32 311 321.0000
## 33 321 273.0000
## 34 327 443.0000
## 35 331 145.0000
## 36 349 333.0000
## 37 358 545.0000
## 38 371 200.0000
## 39 390 495.0000
## 40 399 598.0000
## 41 422 563.0000
## 42 433 621.0000
## 43 485 227.0000
## 44 628 333.0000
## 45 702 498.0000
## 46 793 484.0000
## 47 830 228.0000
## 48 833 431.0000
## 49 914 32.0000
## 50 954 937.0000
## 51 990 308.0000
## 52 994 328.0000
## 53 1011 117.0000
## 54 1060 76.0000
## 55 1167 89.0000
## 56 1178 70.0000
## 57 1202 79.0000
## 58 1210 244.0000
## 59 1221 167.0000
## 60 1224 356.0000
## 61 1256 262.0000
## 62 1261 381.0000
## 63 1265 323.0000
## 64 1269 507.0000
## 65 1276 146.0000
## 66 1296 342.0000
## 67 1309 334.0000
## 68 1324 440.0000
## 69 1352 366.0000
## 70 1380 398.0000
## 71 1406 388.0000
## 72 1434 552.0000
## 73 1436 219.0000
## 74 1446 354.0000
## 75 1448 377.0000
## 76 1469 442.0000
## 77 1487 328.0000
## 78 1492 395.0000
## 79 1516 261.0000
## 80 1518 802.0000
## 81 1558 795.0000
## 82 1559 780.0000
## 83 1565 239.0000
## 84 1585 359.0000
## 85 1595 310.0000
## 86 1620 390.0000
## 87 1666 432.0000
## 88 1687 405.0000
## 89 1697 489.0000
## 90 1728 355.0000
## 91 1741 379.0000
## 92 1767 394.0000
## 93 1776 387.0000
## 94 1780 606.0000
## 95 1799 379.0000
## 96 1808 78.0000
## 97 1817 471.0000
## 98 1823 393.0000
## 99 1844 389.0000
## 100 1853 158.0000
## 101 1862 117.0000
## 102 1864 142.0000
## 103 1887 259.0000
## 104 1888 509.0000
## 105 1891 572.0000
## 106 1893 233.0000
## 107 1902 230.0000
## 108 1906 350.0000
## 109 1913 426.0000
## 110 1918 131.0000
## 111 1928 490.0000
## 112 1934 87.0000
## 113 1936 215.5000
## 114 1938 391.0000
## 115 1941 84.0000
## 116 1945 618.0000
## 117 1953 589.0000
## 118 1955 468.0000
## 119 1959 334.0000
## 120 1962 375.5000
## 121 1963 354.0000
## 122 1966 416.0000
## 123 1967 268.0000
## 124 1968 412.0000
## 125 1974 462.0000
## 126 1975 289.3333
## 127 1977 362.0000
## 128 1978 99.0000
## 129 1982 58.0000
## 130 1983 237.0000
## 131 1992 169.0000
## 132 1996 339.0000
## 133 1999 56.0000
## 134 2000 387.0893
anova_model <- aov(Rented_Bike_Count ~ Visibility, data = bike_data)
summary(anova_model)
## Df Sum Sq Mean Sq F value Pr(>F)
## Visibility 1 361960 361960 10.37 0.0015 **
## Residuals 197 6878469 34916
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(bike_data, aes(Temperature, Rented_Bike_Count)) +
geom_point() +
geom_smooth(method="lm")
## `geom_smooth()` using formula = 'y ~ x'
temp_model <- lm(Rented_Bike_Count ~ Temperature, data = bike_data)
summary(temp_model)
##
## Call:
## lm(formula = Rented_Bike_Count ~ Temperature, data = bike_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -355.11 -145.19 2.26 94.41 651.52
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 340.410 13.531 25.158 <2e-16 ***
## Temperature 8.148 3.302 2.468 0.0144 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 188.8 on 197 degrees of freedom
## Multiple R-squared: 0.02999, Adjusted R-squared: 0.02506
## F-statistic: 6.09 on 1 and 197 DF, p-value: 0.01445
par(mfrow=c(2,2))
plot(temp_model)
multi_model <- lm(Rented_Bike_Count ~ Temperature + Visibility, data = bike_data)
summary(multi_model)
##
## Call:
## lm(formula = Rented_Bike_Count ~ Temperature + Visibility, data = bike_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -329.78 -100.51 -17.25 67.39 660.18
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 177.75636 31.12463 5.711 4.11e-08 ***
## Temperature 19.09835 3.61523 5.283 3.37e-07 ***
## Visibility 0.11985 0.02098 5.712 4.10e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 175.3 on 196 degrees of freedom
## Multiple R-squared: 0.1684, Adjusted R-squared: 0.1599
## F-statistic: 19.84 on 2 and 196 DF, p-value: 1.418e-08