library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ISLR2)
## Warning: package 'ISLR2' was built under R version 4.3.3
library(gtsummary)
library(broom)

#EXPLORE DATA

head(Bikeshare)
##   season mnth day hr holiday weekday workingday   weathersit temp  atemp  hum
## 1      1  Jan   1  0       0       6          0        clear 0.24 0.2879 0.81
## 2      1  Jan   1  1       0       6          0        clear 0.22 0.2727 0.80
## 3      1  Jan   1  2       0       6          0        clear 0.22 0.2727 0.80
## 4      1  Jan   1  3       0       6          0        clear 0.24 0.2879 0.75
## 5      1  Jan   1  4       0       6          0        clear 0.24 0.2879 0.75
## 6      1  Jan   1  5       0       6          0 cloudy/misty 0.24 0.2576 0.75
##   windspeed casual registered bikers
## 1    0.0000      3         13     16
## 2    0.0000      8         32     40
## 3    0.0000      5         27     32
## 4    0.0000      3         10     13
## 5    0.0000      0          1      1
## 6    0.0896      0          1      1
glimpse(Bikeshare)
## Rows: 8,645
## Columns: 15
## $ season     <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ mnth       <fct> Jan, Jan, Jan, Jan, Jan, Jan, Jan, Jan, Jan, Jan, Jan, Jan,…
## $ day        <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ hr         <fct> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1…
## $ holiday    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ weekday    <dbl> 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,…
## $ workingday <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ weathersit <fct> clear, clear, clear, clear, clear, cloudy/misty, clear, cle…
## $ temp       <dbl> 0.24, 0.22, 0.22, 0.24, 0.24, 0.24, 0.22, 0.20, 0.24, 0.32,…
## $ atemp      <dbl> 0.2879, 0.2727, 0.2727, 0.2879, 0.2879, 0.2576, 0.2727, 0.2…
## $ hum        <dbl> 0.81, 0.80, 0.80, 0.75, 0.75, 0.75, 0.80, 0.86, 0.75, 0.76,…
## $ windspeed  <dbl> 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0896, 0.0000, 0.0…
## $ casual     <dbl> 3, 8, 5, 3, 0, 0, 2, 1, 1, 8, 12, 26, 29, 47, 35, 40, 41, 1…
## $ registered <dbl> 13, 32, 27, 10, 1, 1, 0, 2, 7, 6, 24, 30, 55, 47, 71, 70, 5…
## $ bikers     <dbl> 16, 40, 32, 13, 1, 1, 2, 3, 8, 14, 36, 56, 84, 94, 106, 110…
summary(Bikeshare)
##      season           mnth           day              hr      
##  Min.   :1.000   May    : 744   Min.   :  1.0   16     : 365  
##  1st Qu.:2.000   July   : 744   1st Qu.: 94.0   17     : 365  
##  Median :3.000   Oct    : 743   Median :185.0   12     : 364  
##  Mean   :2.514   Dec    : 741   Mean   :184.4   13     : 364  
##  3rd Qu.:3.000   Aug    : 731   3rd Qu.:275.0   14     : 364  
##  Max.   :4.000   March  : 730   Max.   :365.0   15     : 364  
##                  (Other):4212                   (Other):6459  
##     holiday           weekday        workingday               weathersit  
##  Min.   :0.00000   Min.   :0.000   Min.   :0.0000   clear          :5645  
##  1st Qu.:0.00000   1st Qu.:1.000   1st Qu.:0.0000   cloudy/misty   :2218  
##  Median :0.00000   Median :3.000   Median :1.0000   light rain/snow: 781  
##  Mean   :0.02765   Mean   :3.013   Mean   :0.6837   heavy rain/snow:   1  
##  3rd Qu.:0.00000   3rd Qu.:5.000   3rd Qu.:1.0000                         
##  Max.   :1.00000   Max.   :6.000   Max.   :1.0000                         
##                                                                           
##       temp            atemp             hum           windspeed     
##  Min.   :0.0200   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.3200   1st Qu.:0.3182   1st Qu.:0.4900   1st Qu.:0.1045  
##  Median :0.5000   Median :0.4848   Median :0.6500   Median :0.1940  
##  Mean   :0.4891   Mean   :0.4690   Mean   :0.6434   Mean   :0.1912  
##  3rd Qu.:0.6600   3rd Qu.:0.6212   3rd Qu.:0.8100   3rd Qu.:0.2836  
##  Max.   :0.9600   Max.   :1.0000   Max.   :1.0000   Max.   :0.8507  
##                                                                     
##      casual        registered        bikers     
##  Min.   :  0.0   Min.   :  0.0   Min.   :  1.0  
##  1st Qu.:  3.0   1st Qu.: 26.0   1st Qu.: 31.0  
##  Median : 14.0   Median : 90.0   Median :109.0  
##  Mean   : 28.6   Mean   :115.2   Mean   :143.8  
##  3rd Qu.: 38.0   3rd Qu.:168.0   3rd Qu.:211.0  
##  Max.   :272.0   Max.   :567.0   Max.   :651.0  
## 

why poisson and not linear regression

ggplot(Bikeshare,aes(y=bikers,x=temp))+geom_point(alpha=0.05)+geom_smooth(method="glm",se=FALSE)+facet_wrap(~weathersit)+theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

it is poisson and not linear regression because as we look at the scatter plot the heteroscadasticity violated.u can see the funnel shape .errors increasing instead of being constant ## visualize

ggplot(Bikeshare,aes(y=bikers,x=temp))+geom_point(alpha=0.05)+geom_smooth(method="glm",se=FALSE,method.args=list(family="poisson"))+facet_wrap(~weathersit)+theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

ggplot(Bikeshare,aes(y=bikers,x=temp,color=weathersit))+geom_point(alpha=0.05)+geom_smooth(method="glm",se=FALSE,method.args=list(family="poisson"))+facet_wrap(~workingday)+scale_color_brewer(palette="Dark2")+ggtitle("visualization of poisson regression")
## `geom_smooth()` using formula = 'y ~ x'

#model

model<-glm(bikers~temp+workingday+weathersit,data=Bikeshare,family = "poisson")
model%>%broom::tidy()%>%gt::gt()
term estimate std.error statistic p.value
(Intercept) 3.885009924 0.003230805 1202.489746 0.000000e+00
temp 2.129054163 0.004788834 444.587172 0.000000e+00
workingday -0.008888259 0.001943032 -4.574427 4.775243e-06
weathersitcloudy/misty -0.042218993 0.002131712 -19.805205 2.684687e-87
weathersitlight rain/snow -0.432089576 0.004022716 -107.412409 0.000000e+00
weathersitheavy rain/snow -0.760994643 0.166681086 -4.565573 4.981322e-06

interpretation

when all the variables are zero ,number of bikers is expo(3.89) appr 48.9

For each additional unit of temp,the bikers increases by expo(2.13) appr 8.4

on working days,bikers decrease by expon(-.1) appr 0.99

in cloudy/misty bikers decrease by expon (-0.1) aprox 0.96

model %>%
tbl_regression(exponentiate = TRUE) %>%
add_global_p()
Characteristic IRR1 95% CI1 p-value
temp 8.41 8.33, 8.49 <0.001
workingday 0.99 0.99, 1.0 <0.001
weathersit

<0.001
    clear
    cloudy/misty 0.96 0.95, 0.96
    light rain/snow 0.65 0.64, 0.65
    heavy rain/snow 0.47 0.33, 0.64
1 IRR = Incidence Rate Ratio, CI = Confidence Interval

please ask for interpretation