bikedata$Start.Date<- as.Date(bikedata$Start.Date,
format="%d/%m/%Y %H:%M",tz="UTC")
s<- bikedata
y<- count(s)
y$weekdays<- weekdays(y$Start.Date)
y$day <- as.numeric(format(y$Start.Date, "%j"))
colnames(y)[4]<- "day"
y$weekdays<- revalue(y$weekdays, c('Monday'='1','Tuesday'='2','Wednesday'='3','Thursday'='4',
'Friday'='5','Saturday'='6','Sunday'='7'))
y$weekdays<- as.integer(y$weekdays)
head(y,10)
## Start.Date freq weekdays day
## 1 2017-01-01 6536 7 1
## 2 2017-01-02 11979 1 2
## 3 2017-01-03 19622 2 3
## 4 2017-01-04 22144 3 4
## 5 2017-01-05 23594 4 5
## 6 2017-01-06 18975 5 6
## 7 2017-01-07 13881 6 7
## 8 2017-01-08 13538 7 8
## 9 2017-01-09 38091 1 9
## 10 2017-01-10 28106 2 10
ggplot(y,aes(x=y$Start.Date,y=y$freq))+geom_path()

gam8<- mgcv ::gam(y$freq ~ s(as.numeric(y$Start.Date),bs = "cc", k = 300) +
s(y$weekdays,bs = "ps", k = 7)+
ti(as.numeric(y$Start.Date),y$weekdays, k= c(7,7),
bs = c("cc", "ps")),
data = y,
family = "poisson")
dispersiontest(gam8)
##
## Overdispersion test
##
## data: gam8
## z = 9.6193, p-value < 2.2e-16
## alternative hypothesis: true dispersion is greater than 1
## sample estimates:
## dispersion
## 10.32033
#############################################################
gam9 <- mgcv ::gam(y$freq ~ s(as.numeric(y$Start.Date),bs = "cc", k = 290) + ####using
s(y$weekdays,bs = "ps", k = 7)+
ti(as.numeric(y$Start.Date),y$weekdays, k= c(7,7),
bs = c("cc", "ps")),
data = y,
family = "quasipoisson")
acf(gam9$residuals)

pacf(gam9$residuals)

############################################################################
gam15<- mgcv ::gamm(y$freq ~ s(as.numeric(y$Start.Date),bs = "cc", k = 290) + ######using
s(y$weekdays,bs = "ps", k = 7)+
t2(as.numeric(y$Start.Date),y$weekdays, k= c(7,7),
bs = c("cc", "ps"),full = T),
data = y,
family = "quasipoisson")
##
## Maximum number of PQL iterations: 20
## iteration 1
## iteration 2
## iteration 3
acf(resid(gam15$lme))

pacf(resid(gam15$lme))

#################################################################
gam14<- gamm(y$freq ~ s(as.numeric(y$Start.Date),bs = "cc", k = 290) +
s(y$weekdays,bs = "ps", k = 7)+
t2(as.numeric(y$Start.Date),y$weekdays, k= c(7,7),
bs = c("cc", "ps"),full = T),
data = y,
family = "quasipoisson",
correlation = corARMA(form = ~ 1|day, q = 1))
##
## Maximum number of PQL iterations: 20
## iteration 1
## iteration 2
## iteration 3
############################################################################
gam10<- mgcv ::gam(y$freq ~ s(day,bs = "cc", k = 300) + ###################################using
s(y$weekdays,bs = "ps", k = 7)+
ti(as.numeric(y$Start.Date),y$weekdays, k= c(7,7),
bs = c("cc", "ps")),
data = y,
family = "gaussian")
par(mfrow = c(2,2))
AIC(gam9,gam10,gam14$lme,gam15$lme)
## df AIC
## gam9 82.46330 NA
## gam10 99.91576 6619.1186
## gam14$lme 8.00000 -202.1755
## gam15$lme 7.00000 -204.1755
summary(gam9)$r.sq
## [1] 0.7291145
summary(gam10)$r.sq
## [1] 0.7623387
plot(gam9, shade = TRUE)
plot(gam10, shade = TRUE)

plot(gam14$gam, shade = TRUE)

plot(gam15$gam, shade = TRUE)

datas <- data.frame(exp(predict(gam14$gam)),y$Start.Date,y$freq,gam9$fitted.values,
gam10$fitted.values,exp(predict(gam15$gam)))
plot1<- ggplot(data=datas,aes(x=datas$y.Start.Date,y=datas$y.freq))+
geom_line()+
geom_line(data=datas,aes(x=datas$y.Start.Date,y=datas$gam9.fitted.values),
col="red")+ggtitle("gam quasipoisson")
plot2<- ggplot(data=datas,aes(x=datas$y.Start.Date,y=datas$y.freq))+
geom_line()+
geom_line(data=datas,aes(x=datas$y.Start.Date,y=datas$gam10.fitted.values),
col="red")+ggtitle("gaussian")
plot3<-ggplot(data=datas,aes(x=datas$y.Start.Date,y=datas$y.freq))+
geom_line()+
geom_line(data=datas,aes(x=datas$y.Start.Date,y=datas$exp.predict.gam14.gam..),col="red")+
ggtitle("gammquasipoisson one autoerror")
plot4<- ggplot(data=datas,aes(x=datas$y.Start.Date,y=datas$y.freq))+
geom_line()+
geom_line(data=datas,aes(x=datas$y.Start.Date,y=datas$exp.predict.gam15.gam..),col="red")+
ggtitle("gammquasipoisson two autoerror")
grid.arrange(plot1,plot2,plot3,plot4)
