library(forecast) #Forecasting
## Registered S3 method overwritten by 'xts':
## method from
## as.zoo.xts zoo
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Registered S3 methods overwritten by 'forecast':
## method from
## fitted.fracdiff fracdiff
## residuals.fracdiff fracdiff
library(fpp2) #modeling
## Loading required package: ggplot2
## Loading required package: fma
## Loading required package: expsmooth
library(mice)
## Warning: package 'mice' was built under R version 3.6.1
## Loading required package: lattice
##
## Attaching package: 'mice'
## The following objects are masked from 'package:base':
##
## cbind, rbind
library(data.table)
library(readr)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.1
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
##
## hour, isoweek, mday, minute, month, quarter, second, wday,
## week, yday, year
## The following object is masked from 'package:base':
##
## date
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:mice':
##
## complete
test <- read_csv("Data/dengue_features_test.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## city = col_character(),
## week_start_date = col_date(format = "")
## )
## See spec(...) for full column specifications.
train <- read_csv("Data/dengue_features_train.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## city = col_character(),
## week_start_date = col_date(format = "")
## )
## See spec(...) for full column specifications.
labels <- read_csv("Data/dengue_labels_train.csv")
## Parsed with column specification:
## cols(
## city = col_character(),
## year = col_double(),
## weekofyear = col_double(),
## total_cases = col_double()
## )
submission <- read_csv("Data/submission_format.csv")
## Parsed with column specification:
## cols(
## city = col_character(),
## year = col_double(),
## weekofyear = col_double(),
## total_cases = col_double()
## )
#Feature Engineering ----
#### Dates ####
#train
train$yearWeek <- paste(train$year,train$weekofyear, sep="-") # Year - Week
train$month <- lubridate::month(train$week_start_date, label = T) # Month
train$month_num <- lubridate::month(train$week_start_date) # Month number
train <- train %>%
group_by(yearWeek) %>%
mutate(vegIndex = sum(ndvi_ne,ndvi_nw,ndvi_se,ndvi_sw, na.rm = T))
#test
test$yearWeek <- paste(test$year,test$weekofyear, sep="-")
test$month <- lubridate::month(test$week_start_date, label = T)
test$month_num <- lubridate::month(test$week_start_date) # Month number
test <- test %>%
group_by(yearWeek) %>%
mutate(vegIndex = sum(ndvi_ne,ndvi_nw,ndvi_se,ndvi_sw, na.rm = T))
#Exploratory Data Analysis ----
train <- merge(
x= train,
y= labels,
by= c('city','year','weekofyear'),
all = T
)
# Removing week 53 from data
train<-train %>% filter(weekofyear!=53)
train.sj <- train %>% filter(city == 'sj')
train.iq <- train %>% filter(city == 'iq')
setorder(train.sj, year,weekofyear)
setorder(train.iq, year,weekofyear)
###Data imputation####
train.sj.cat <- train.sj[,c(1,4,25,26)]
train.sj.cont <- train.sj[,-c(1,4,25,26)]
newTrain <- mice(train.sj.cont,defaultMethod = 'pmm', printFlag = F,seed=500)
## Warning: Number of logged events: 439
completeData <- mice::complete(newTrain,5)
train.sj <- cbind(train.sj.cat,completeData)
train.iq.cat <- train.iq[,c(1,4,25,26)]
train.iq.cont <- train.iq[,-c(1,4,25,26)]
newTrain <- mice(train.iq.cont,m=10,defaultMethod = 'pmm', printFlag = F,seed=500)
## Warning: Number of logged events: 101
completeData <- mice::complete(newTrain,5)
train.iq <- cbind(train.iq.cat,completeData)
####Test Data Setup####
#Imputation
test.sj <- test %>% filter(city =='sj')
test.iq <- test %>% filter(city =='iq')
setorder(test.sj, year,weekofyear)
setorder(test.iq, year,weekofyear)
test.cat.sj <- test.sj[,c(1,4,25,26)]
test.cont.sj <- test.sj[,-c(1,4,25,26)]
test.cat.iq <- test.iq[,c(1,4,25,26)]
test.cont.iq <- test.iq[,-c(1,4,25,26)]
newTrain <- mice(test.cont.sj,defaultMethod = 'pmm', printFlag = F,seed=500)
## Warning: Number of logged events: 447
completeData <- mice::complete(newTrain,5)
test.sj <- cbind(data.table(test.cat.sj),completeData)
newTrain <- mice(test.cont.iq,m=5,defaultMethod = 'pmm', printFlag = F,seed=500)
## Warning: Number of logged events: 1
completeData <- mice::complete(newTrain)
test.iq <- cbind(data.table(test.cat.iq),completeData)
iquitos <- ts(train.iq$total_cases, start =c(2000,26), end= c(2010,23), frequency = 52)
sanJuan <- ts(train.sj$total_cases, start =c(1990,18), end= c(2008,14), frequency = 52)
#### NNetar ####
#San Juan
myXreg <- as.matrix(train.sj[,c(-1,-2,-3,-4,-19,-29)])
myXreg <- cbind(myXreg,fourier(sanJuan, K = 6))
test.fourier <- fourier(sanJuan, K= 6, h = nrow(test.sj))
myXreg.test <- as.matrix(test.sj[,c(-1,-2,-3,-4,-19)])
myXreg.test <- cbind(myXreg.test, test.fourier)
fit.nnetar.sj <- nnetar(train.sj$total_cases,xreg = myXreg, lambda = 'auto', size =20)
plot(forecast(fit.nnetar.sj,h=nrow(test), xreg = myXreg.test))

#Iquitos
myXreg <- as.matrix(train.iq[,c(-1,-2,-3,-4,-19,-29)])
myXreg <- cbind(myXreg,
fourier(iquitos, K = 6))
test.fourier <- fourier(iquitos, K= 6, h = nrow(test.iq))
myXreg.test <- as.matrix(test.iq[,c(-1,-2,-3,-4,-19)])
myXreg.test <- cbind(myXreg.test, test.fourier)
fit.nnetar.iq <- nnetar(train.iq$total_cases, xreg = myXreg, lambda = 'auto', size = 20)
plot(forecast(fit.nnetar.iq,h=nrow(test), xreg = myXreg.test))
