library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ISLR2)
## Warning: package 'ISLR2' was built under R version 4.3.3
library(gtsummary)
library(broom)
#EXPLORE DATA
head(Bikeshare)
## season mnth day hr holiday weekday workingday weathersit temp atemp hum
## 1 1 Jan 1 0 0 6 0 clear 0.24 0.2879 0.81
## 2 1 Jan 1 1 0 6 0 clear 0.22 0.2727 0.80
## 3 1 Jan 1 2 0 6 0 clear 0.22 0.2727 0.80
## 4 1 Jan 1 3 0 6 0 clear 0.24 0.2879 0.75
## 5 1 Jan 1 4 0 6 0 clear 0.24 0.2879 0.75
## 6 1 Jan 1 5 0 6 0 cloudy/misty 0.24 0.2576 0.75
## windspeed casual registered bikers
## 1 0.0000 3 13 16
## 2 0.0000 8 32 40
## 3 0.0000 5 27 32
## 4 0.0000 3 10 13
## 5 0.0000 0 1 1
## 6 0.0896 0 1 1
glimpse(Bikeshare)
## Rows: 8,645
## Columns: 15
## $ season <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ mnth <fct> Jan, Jan, Jan, Jan, Jan, Jan, Jan, Jan, Jan, Jan, Jan, Jan,…
## $ day <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ hr <fct> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1…
## $ holiday <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ weekday <dbl> 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,…
## $ workingday <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ weathersit <fct> clear, clear, clear, clear, clear, cloudy/misty, clear, cle…
## $ temp <dbl> 0.24, 0.22, 0.22, 0.24, 0.24, 0.24, 0.22, 0.20, 0.24, 0.32,…
## $ atemp <dbl> 0.2879, 0.2727, 0.2727, 0.2879, 0.2879, 0.2576, 0.2727, 0.2…
## $ hum <dbl> 0.81, 0.80, 0.80, 0.75, 0.75, 0.75, 0.80, 0.86, 0.75, 0.76,…
## $ windspeed <dbl> 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0896, 0.0000, 0.0…
## $ casual <dbl> 3, 8, 5, 3, 0, 0, 2, 1, 1, 8, 12, 26, 29, 47, 35, 40, 41, 1…
## $ registered <dbl> 13, 32, 27, 10, 1, 1, 0, 2, 7, 6, 24, 30, 55, 47, 71, 70, 5…
## $ bikers <dbl> 16, 40, 32, 13, 1, 1, 2, 3, 8, 14, 36, 56, 84, 94, 106, 110…
summary(Bikeshare)
## season mnth day hr
## Min. :1.000 May : 744 Min. : 1.0 16 : 365
## 1st Qu.:2.000 July : 744 1st Qu.: 94.0 17 : 365
## Median :3.000 Oct : 743 Median :185.0 12 : 364
## Mean :2.514 Dec : 741 Mean :184.4 13 : 364
## 3rd Qu.:3.000 Aug : 731 3rd Qu.:275.0 14 : 364
## Max. :4.000 March : 730 Max. :365.0 15 : 364
## (Other):4212 (Other):6459
## holiday weekday workingday weathersit
## Min. :0.00000 Min. :0.000 Min. :0.0000 clear :5645
## 1st Qu.:0.00000 1st Qu.:1.000 1st Qu.:0.0000 cloudy/misty :2218
## Median :0.00000 Median :3.000 Median :1.0000 light rain/snow: 781
## Mean :0.02765 Mean :3.013 Mean :0.6837 heavy rain/snow: 1
## 3rd Qu.:0.00000 3rd Qu.:5.000 3rd Qu.:1.0000
## Max. :1.00000 Max. :6.000 Max. :1.0000
##
## temp atemp hum windspeed
## Min. :0.0200 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.3200 1st Qu.:0.3182 1st Qu.:0.4900 1st Qu.:0.1045
## Median :0.5000 Median :0.4848 Median :0.6500 Median :0.1940
## Mean :0.4891 Mean :0.4690 Mean :0.6434 Mean :0.1912
## 3rd Qu.:0.6600 3rd Qu.:0.6212 3rd Qu.:0.8100 3rd Qu.:0.2836
## Max. :0.9600 Max. :1.0000 Max. :1.0000 Max. :0.8507
##
## casual registered bikers
## Min. : 0.0 Min. : 0.0 Min. : 1.0
## 1st Qu.: 3.0 1st Qu.: 26.0 1st Qu.: 31.0
## Median : 14.0 Median : 90.0 Median :109.0
## Mean : 28.6 Mean :115.2 Mean :143.8
## 3rd Qu.: 38.0 3rd Qu.:168.0 3rd Qu.:211.0
## Max. :272.0 Max. :567.0 Max. :651.0
##
ggplot(Bikeshare,aes(y=bikers,x=temp))+geom_point(alpha=0.05)+geom_smooth(method="glm",se=FALSE)+facet_wrap(~weathersit)+theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
it is poisson and not linear regression because as we look at the scatter plot the heteroscadasticity violated.u can see the funnel shape .errors increasing instead of being constant ## visualize
ggplot(Bikeshare,aes(y=bikers,x=temp))+geom_point(alpha=0.05)+geom_smooth(method="glm",se=FALSE,method.args=list(family="poisson"))+facet_wrap(~weathersit)+theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
ggplot(Bikeshare,aes(y=bikers,x=temp,color=weathersit))+geom_point(alpha=0.05)+geom_smooth(method="glm",se=FALSE,method.args=list(family="poisson"))+facet_wrap(~workingday)+scale_color_brewer(palette="Dark2")+ggtitle("visualization of poisson regression")
## `geom_smooth()` using formula = 'y ~ x'
#model
model<-glm(bikers~temp+workingday+weathersit,data=Bikeshare,family = "poisson")
model%>%broom::tidy()%>%gt::gt()
| term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|
| (Intercept) | 3.885009924 | 0.003230805 | 1202.489746 | 0.000000e+00 |
| temp | 2.129054163 | 0.004788834 | 444.587172 | 0.000000e+00 |
| workingday | -0.008888259 | 0.001943032 | -4.574427 | 4.775243e-06 |
| weathersitcloudy/misty | -0.042218993 | 0.002131712 | -19.805205 | 2.684687e-87 |
| weathersitlight rain/snow | -0.432089576 | 0.004022716 | -107.412409 | 0.000000e+00 |
| weathersitheavy rain/snow | -0.760994643 | 0.166681086 | -4.565573 | 4.981322e-06 |
when all the variables are zero ,number of bikers is expo(3.89) appr 48.9
For each additional unit of temp,the bikers increases by expo(2.13) appr 8.4
on working days,bikers decrease by expon(-.1) appr 0.99
in cloudy/misty bikers decrease by expon (-0.1) aprox 0.96
model %>%
tbl_regression(exponentiate = TRUE) %>%
add_global_p()
| Characteristic | IRR1 | 95% CI1 | p-value |
|---|---|---|---|
| temp | 8.41 | 8.33, 8.49 | <0.001 |
| workingday | 0.99 | 0.99, 1.0 | <0.001 |
| weathersit | <0.001 | ||
| clear | — | — | |
| cloudy/misty | 0.96 | 0.95, 0.96 | |
| light rain/snow | 0.65 | 0.64, 0.65 | |
| heavy rain/snow | 0.47 | 0.33, 0.64 | |
| 1 IRR = Incidence Rate Ratio, CI = Confidence Interval | |||