Import Library

knitr::opts_chunk$set(echo = TRUE)
packages = c('dplyr','forecast','ggplot2','reshape2','readxl')
for (p in packages){
  if(!require(p, character.only = T)){
    install.packages(p) 
  } 
  library(p,character.only = T) 
}

Glimpse of Data

df <- read_excel( "./Singapore_Residents_by_Planning.xlsx")

-
/
                                                                                                                        
head(df)

Filter only Punggol PA & Clean Data

punggol_PA <- df %>%
  filter(`Planning Area` == "Punggol") %>%
  filter(`Subzone` != "Total")  %>%
   filter(`Age Group` != "Total") 
str(punggol_PA)
Classes 'tbl_df', 'tbl' and 'data.frame':   1197 obs. of  13 variables:
 $ Planning Area   : chr  "Punggol" "Punggol" "Punggol" "Punggol" ...
 $ Subzone         : chr  "Coney Island" "Coney Island" "Coney Island" "Coney Island" ...
 $ Age Group       : chr  "0 - 4" "0 - 4" "0 - 4" "0 - 4" ...
 $ Type of Dwelling: chr  "Total" "Total HDB^" "1- and 2-Room Flats*" "3-Room Flats" ...
 $ 2011            : chr  "-" "-" "-" "-" ...
 $ 2012            : chr  "-" "-" "-" "-" ...
 $ 2013            : chr  "-" "-" "-" "-" ...
 $ 2014            : chr  "-" "-" "-" "-" ...
 $ 2015            : chr  "-" "-" "-" "-" ...
 $ 2016            : chr  "-" "-" "-" "-" ...
 $ 2017            : chr  "-" "-" "-" "-" ...
 $ 2018            : chr  "-" "-" "-" "-" ...
 $ 2019            : chr  "-" "-" "-" "-" ...
punggol_PA$`2011`[punggol_PA$`2011` == "-" | is.na(punggol_PA$`2011`)] <- 0
punggol_PA$`2012`[punggol_PA$`2012` == "-" | is.na(punggol_PA$`2012`)] <- 0
punggol_PA$`2013`[punggol_PA$`2013` == "-" | is.na(punggol_PA$`2013`)] <- 0
punggol_PA$`2014`[punggol_PA$`2014` == "-" | is.na(punggol_PA$`2014`)] <- 0
punggol_PA$`2015`[punggol_PA$`2015` == "-" | is.na(punggol_PA$`2015`)] <- 0
punggol_PA$`2016`[punggol_PA$`2016` == "-" | is.na(punggol_PA$`2016`)] <- 0
punggol_PA$`2017`[punggol_PA$`2017` == "-" | is.na(punggol_PA$`2017`)] <- 0
punggol_PA$`2018`[punggol_PA$`2018` == "-" | is.na(punggol_PA$`2018`)] <- 0
punggol_PA$`2019`[punggol_PA$`2019` == "-" | is.na(punggol_PA$`2019`)] <- 0
punggol_PA[,5:13] <- punggol_PA[,5:13] %>% mutate_if(is.character, as.numeric)
punggol_PA <-punggol_PA[ , c(1, 2, 3,5:13)]
tail(punggol_PA,20)

Group By PA,SZ, AGE GROUP

punggol_PA_aggregated <- punggol_PA %>% 
  group_by(`Planning Area`, `Subzone`, `Age Group`) %>% 
  summarize_all(sum)

Classiying age groups based on age group classification

#age groups classification
economic_active <- c('25 - 29','30 - 34','35 - 39','40 - 44','45 - 49', '50 - 54', '55 - 59', '60 - 64')
younger <- c('0 - 4','10 - 14','15 - 19','20 - 24')
#age groups: Aged (0-24) economic active, younger group
punggol_PA_aggregated$`Economic Group` <- ifelse(punggol_PA_aggregated$`Age Group` %in% economic_active, 'Economic  Active', ifelse(punggol_PA_aggregated$`Age Group` %in% younger, 'Younger Group', 'Aged Group'))
                  
tail(punggol_PA_aggregated,20)
NA

Reselect fields and grouping

younger_age_group <-  punggol_PA_aggregated %>% filter(`Economic Group` == "Younger Group")
younger_age_group$colname <- paste(younger_age_group$Subzone,younger_age_group$`Age Group`)
younger_age_group <-younger_age_group[ , c(14,4:12)]
younger_age_group <- younger_age_group %>% 
  group_by(`colname`) %>% 
  summarize_all(sum)
reverse<-data.frame(t(younger_age_group))
colnames(reverse) <- unlist(reverse[row.names(reverse)=='colname',])
reverse$Year <- rownames(reverse)
reverse<-reverse[-1,]
rownames(reverse) <- NULL
reverse[,1:28] <- lapply(reverse, function(x) as.numeric(as.character(x)))
provided 29 variables to replace 28 variables
#Overall diagram
meltdf <- melt(reverse,id="Year")
ggplot(meltdf,aes(x=Year,y=value,colour=variable,group=variable)) + geom_line()

Matilda’s: Younger Age Group Analysis

meltdf <- melt(reverse[, c(5:8,29)],id="Year")
ggplot(meltdf,aes(x=Year,y=value,colour=variable,group=variable)) + geom_line()

Northshore’s: Younger Age Group Analysis

meltdf <- melt(reverse[, c(9:12,29)],id="Year")
ggplot(meltdf,aes(x=Year,y=value,colour=variable,group=variable)) + geom_line()

1 Punggol Field’s: Younger Age Group Analysis

meltdf <- melt(reverse[, c(17:20,29)],id="Year")
ggplot(meltdf,aes(x=Year,y=value,colour=variable,group=variable)) + geom_line()

2 Punggol Town Centre: Younger Age Group Analysis

meltdf <- melt(reverse[, c(21:24,29)],id="Year")
ggplot(meltdf,aes(x=Year,y=value,colour=variable,group=variable)) + geom_line()

3 Waterway East: Younger Age Group Analysis

meltdf <- melt(reverse[, c(25:28,29)],id="Year")
ggplot(meltdf,aes(x=Year,y=value,colour=variable,group=variable)) + geom_line()

punggol_PA_aggregated <-punggol_PA_aggregated[ , c(2,13,4:12)]
punggol_PA_aggregated <- punggol_PA_aggregated %>% 
  group_by(`Subzone`, `Economic Group`) %>% 
  summarize_all(sum)
reverse<-data.frame(t(punggol_PA_aggregated))
reverse
#Conney
conney_grp<-reverse[1:3]
conney_grp<-conney_grp[-1,]
colnames(conney_grp) <- unlist(conney_grp[row.names(conney_grp)=='Economic Group',])
conney_grp$Year <- rownames(conney_grp)
conney_grp<-conney_grp[-1,]
rownames(conney_grp) <- NULL
conney_grp$`Aged Group`<-as.numeric(levels(conney_grp$`Aged Group`)[conney_grp$`Aged Group`])
conney_grp$`Economic  Active`<-as.numeric(levels(conney_grp$`Economic  Active`)[conney_grp$`Economic  Active`])
conney_grp$`Younger Group`<-as.numeric(levels(conney_grp$`Younger Group`)[conney_grp$`Younger Group`])
#Matilda
matilda_grp<-reverse[4:6]
matilda_grp<-matilda_grp[-1,]
colnames(matilda_grp) <- unlist(matilda_grp[row.names(matilda_grp)=='Economic Group',])
matilda_grp$Year <- rownames(matilda_grp)
matilda_grp<-matilda_grp[-1,]
rownames(matilda_grp) <- NULL
matilda_grp$`Aged Group`<-as.numeric(levels(matilda_grp$`Aged Group`)[matilda_grp$`Aged Group`])
matilda_grp$`Economic  Active`<-as.numeric(levels(matilda_grp$`Economic  Active`)[matilda_grp$`Economic  Active`])
matilda_grp$`Younger Group`<-as.numeric(levels(matilda_grp$`Younger Group`)[matilda_grp$`Younger Group`])
#Northshore
northsore_group<-reverse[7:9]
northsore_group<-northsore_group[-1,]
colnames(northsore_group) <- unlist(northsore_group[row.names(northsore_group)=='Economic Group',])
northsore_group$Year <- rownames(northsore_group)
northsore_group<-northsore_group[-1,]
rownames(northsore_group) <- NULL
northsore_group$`Aged Group`<-as.numeric(levels(northsore_group$`Aged Group`)[northsore_group$`Aged Group`])
northsore_group$`Economic  Active`<-as.numeric(levels(northsore_group$`Economic  Active`)[northsore_group$`Economic  Active`])
northsore_group$`Younger Group`<-as.numeric(levels(northsore_group$`Younger Group`)[northsore_group$`Younger Group`])
#Punggol Canal
punggol_canal_group<-reverse[10:12]
punggol_canal_group<-punggol_canal_group[-1,]
colnames(punggol_canal_group) <- unlist(punggol_canal_group[row.names(punggol_canal_group)=='Economic Group',])
punggol_canal_group$Year <- rownames(punggol_canal_group)
punggol_canal_group<-punggol_canal_group[-1,]
rownames(punggol_canal_group) <- NULL
punggol_canal_group$`Aged Group`<-as.numeric(levels(punggol_canal_group$`Aged Group`)[punggol_canal_group$`Aged Group`])
punggol_canal_group$`Economic  Active`<-as.numeric(levels(punggol_canal_group$`Economic  Active`)[punggol_canal_group$`Economic  Active`])
punggol_canal_group$`Younger Group`<-as.numeric(levels(punggol_canal_group$`Younger Group`)[punggol_canal_group$`Younger Group`])
#Punggol Field
punggol_field_group <-reverse[13:15]
punggol_field_group<-punggol_field_group[-1,]
colnames(punggol_field_group) <- unlist(punggol_field_group[row.names(punggol_field_group)=='Economic Group',])
punggol_field_group$Year <- rownames(punggol_field_group)
punggol_field_group<-punggol_field_group[-1,]
rownames(punggol_field_group) <- NULL
punggol_field_group$`Aged Group`<-as.numeric(levels(punggol_field_group$`Aged Group`)[punggol_field_group$`Aged Group`])
punggol_field_group$`Economic  Active`<-as.numeric(levels(punggol_field_group$`Economic  Active`)[punggol_field_group$`Economic  Active`])
punggol_field_group$`Younger Group`<-as.numeric(levels(punggol_field_group$`Younger Group`)[punggol_field_group$`Younger Group`])
#Punggol Town Centre
punggol_tc_group <-reverse[16:18]
punggol_tc_group<-punggol_tc_group[-1,]
colnames(punggol_tc_group) <- unlist(punggol_tc_group[row.names(punggol_tc_group)=='Economic Group',])
punggol_tc_group$Year <- rownames(punggol_tc_group)
punggol_tc_group<-punggol_tc_group[-1,]
rownames(punggol_tc_group) <- NULL
punggol_tc_group$`Aged Group`<-as.numeric(levels(punggol_tc_group$`Aged Group`)[punggol_tc_group$`Aged Group`])
punggol_tc_group$`Economic  Active`<-as.numeric(levels(punggol_tc_group$`Economic  Active`)[punggol_tc_group$`Economic  Active`])
punggol_tc_group$`Younger Group`<-as.numeric(levels(punggol_tc_group$`Younger Group`)[punggol_tc_group$`Younger Group`])
#Waterway East
waterway_east_group <-reverse[19:21]
waterway_east_group<-waterway_east_group[-1,]
colnames(waterway_east_group) <- unlist(waterway_east_group[row.names(waterway_east_group)=='Economic Group',])
waterway_east_group$Year <- rownames(waterway_east_group)
waterway_east_group<-waterway_east_group[-1,]
rownames(waterway_east_group) <- NULL
waterway_east_group$`Aged Group`<-as.numeric(levels(waterway_east_group$`Aged Group`)[waterway_east_group$`Aged Group`])
waterway_east_group$`Economic  Active`<-as.numeric(levels(waterway_east_group$`Economic  Active`)[waterway_east_group$`Economic  Active`])
waterway_east_group$`Younger Group`<-as.numeric(levels(waterway_east_group$`Younger Group`)[waterway_east_group$`Younger Group`])

Matilda’s Current Population

ggplot(matilda_grp, aes(x=matilda_grp$Year)) + 
  geom_line(aes( y=matilda_grp$`Younger Group`, color="steelblue",group=1))+
    geom_line(aes( y=matilda_grp$`Economic  Active`, color="green",group=1))+
  geom_line(aes(y=matilda_grp$`Aged Group`, color = "darkred", group=1)) + 
      scale_color_discrete(name = "Population Group", labels = c("Younger Group (Below 25)","Economic Active (25-64)",  "Aged Group (Above 64)"))+
  labs(title="Matilda's Current Population",
        x ="Year", y = "Total Population")

Northshore’s Current Population

ggplot(northsore_group, aes(x=northsore_group$Year)) + 
  geom_line(aes( y=northsore_group$`Younger Group`, color="steelblue",group=1))+
    geom_line(aes( y=northsore_group$`Economic  Active`, color="green",group=1))+
  geom_line(aes(y=northsore_group$`Aged Group`, color = "darkred", group=1)) + 
      scale_color_discrete(name = "Population Group", labels = c("Younger Group (Below 25)","Economic Active (25-64)",  "Aged Group (Above 64)"))+
  labs(title="Northshore's Current Population",
        x ="Year", y = "Total Population")

Punggol Canal’s Current Population

ggplot(punggol_canal_group, aes(x=punggol_canal_group$Year)) + 
  geom_line(aes( y=punggol_canal_group$`Younger Group`, color="steelblue",group=1))+
    geom_line(aes( y=punggol_canal_group$`Economic  Active`, color="green",group=1))+
  geom_line(aes(y=punggol_canal_group$`Aged Group`, color = "darkred", group=1)) + 
      scale_color_discrete(name = "Population Group", labels = c("Younger Group (Below 25)","Economic Active (25-64)",  "Aged Group (Above 64)"))+
  labs(title="Punggol Canal's Current Population",
        x ="Year", y = "Total Population")

Punggol Field’s Current Population

ggplot(punggol_field_group, aes(x=punggol_field_group$Year)) + 
  geom_line(aes( y=punggol_field_group$`Younger Group`, color="steelblue",group=1))+
    geom_line(aes( y=punggol_field_group$`Economic  Active`, color="green",group=1))+
  geom_line(aes(y=punggol_field_group$`Aged Group`, color = "darkred", group=1)) + 
      scale_color_discrete(name = "Population Group", labels = c("Younger Group (Below 25)","Economic Active (25-64)",  "Aged Group (Above 64)"))+
  labs(title="Punggol Field's Current Population",
        x ="Year", y = "Total Population")

Punggol Town Centre’s Current Population

ggplot(punggol_tc_group, aes(x=punggol_tc_group$Year)) + 
  geom_line(aes( y=punggol_tc_group$`Younger Group`, color="steelblue",group=1))+
    geom_line(aes( y=punggol_tc_group$`Economic  Active`, color="green",group=1))+
  geom_line(aes(y=punggol_tc_group$`Aged Group`, color = "darkred", group=1)) + 
      scale_color_discrete(name = "Population Group", labels = c("Younger Group (Below 25)","Economic Active (25-64)",  "Aged Group (Above 64)"))+
  labs(title="Punggol Town Centre's Current Population",
        x ="Year", y = "Total Population")

Waterway East’s Current Population

ggplot(waterway_east_group, aes(x=waterway_east_group$Year)) + 
  geom_line(aes( y=waterway_east_group$`Younger Group`, color="steelblue",group=1))+
    geom_line(aes( y=waterway_east_group$`Economic  Active`, color="green",group=1))+
  geom_line(aes(y=waterway_east_group$`Aged Group`, color = "darkred", group=1)) + 
      scale_color_discrete(name = "Population Group", labels = c("Younger Group (Below 25)","Economic Active (25-64)",  "Aged Group (Above 64)"))+
  labs(title="Waterway East's Current Population",
        x ="Year", y = "Total Population")

5 Forecasting Matilda’s Population

https://stackoverflow.com/questions/47852567/arima-forecast-keep-getting-error-data-must-be-of-a-vector-type-was-null https://stackoverflow.com/questions/53099289/error-in-array-data-must-be-of-a-vector-type-was-null-in-r

Developing a ARIMA Model to Matilda’s Population Forecast Preassumption that the data is stationary, which means trends and seasonality has been removed due to a consistent demand.

#predict aged group
matilda_grp$`Aged Group`
[1]  2680  3850  5890  9000 11430 14450 17320 20760 23130
matilda_forecast_AG <- ts(matilda_grp$`Aged Group`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
plot(matilda_forecast_AG)

matilda_ARIMAfit_AG <- auto.arima(matilda_forecast_AG)
pred_AG <- forecast(matilda_ARIMAfit_AG, h = 5)
plot(pred_AG)

pred_AG_df <- data.frame(pred_AG)
#predict economic active
matilda_grp$`Economic  Active`
[1] 12560 21230 34360 48880 56200 65590 71980 81070 86090
matilda_forecast_EA <- ts(matilda_grp$`Economic  Active`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
plot(matilda_forecast_EA)

matilda_ARIMAfit_EA <- auto.arima(matilda_forecast_EA)
pred_EA <- forecast(matilda_ARIMAfit_EA, h = 5)
plot(pred_EA)

pred_EA_df <- data.frame(pred_EA)

5.1 Matilda’s Forecast Data Table

#predict younger gen
matilda_grp$`Younger Group`
[1]  4730  7690 12320 18150 21760 26570 29670 32780 34310
matilda_forecast_YG <- ts(matilda_grp$`Younger Group`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
plot(matilda_forecast_YG)

matilda_ARIMAfit_YG <- auto.arima(matilda_forecast_YG)
pred_YG <- forecast(matilda_ARIMAfit_YG, h = 5)
plot(pred_YG)

pred_YG_df <- data.frame(pred_YG)
#add ARIMA prediction to current 
matilda_grp[nrow(matilda_grp) + 1,] <-  c(pred_AG_df[1,]$Point.Forecast, pred_EA_df[1,]$Point.Forecast, pred_YG_df[1,]$Point.Forecast, 2020)
matilda_grp[nrow(matilda_grp) + 1,] <-  c(pred_AG_df[2,]$Point.Forecast, pred_EA_df[2,]$Point.Forecast, pred_YG_df[2,]$Point.Forecast, 2021)
matilda_grp[nrow(matilda_grp) + 1,] <-  c(pred_AG_df[3,]$Point.Forecast, pred_EA_df[3,]$Point.Forecast, pred_YG_df[3,]$Point.Forecast, 2022)
matilda_grp[nrow(matilda_grp) + 1,] <-  c(pred_AG_df[4,]$Point.Forecast, pred_EA_df[4,]$Point.Forecast, pred_YG_df[4,]$Point.Forecast, 2023)
matilda_grp[nrow(matilda_grp) + 1,] <-  c(pred_AG_df[5,]$Point.Forecast, pred_EA_df[5,]$Point.Forecast, pred_YG_df[5,]$Point.Forecast, 2024)
matilda_grp

6 Forecasting Northsore’s Population

An ARIMA(0,0,0) model with zero mean is white noise, means that the errors are uncorrelated across time.

#predict aged group
northsore_group$`Aged Group`
[1] 100 100 120 120 120 120 100  80 120
northsore_forecast_AG <- ts(northsore_group$`Aged Group`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
plot(northsore_forecast_AG)

northsore_ARIMAfit_AG <- auto.arima(northsore_forecast_AG)
pred_AG <- forecast(northsore_ARIMAfit_AG, h = 5)
plot(pred_AG)

pred_AG_df <- data.frame(pred_AG)
#predict economic active
northsore_group$`Economic  Active`
[1] 320 300 360 330 360 340 340 300 340
northsore_forecast_EA <- ts(northsore_group$`Economic  Active`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
plot(northsore_forecast_EA)

northsore_ARIMAfit_EA <- auto.arima(northsore_forecast_EA)
pred_EA <- forecast(northsore_ARIMAfit_EA, h = 5)
plot(pred_EA)

pred_EA_df <- data.frame(pred_EA)
#predict younger gen
northsore_group$`Younger Group`
[1] 120 140 140 140 160 180 160 140 140
northsore_forecast_YG <- ts(northsore_group$`Younger Group`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
plot(northsore_forecast_YG)

northsore_ARIMAfit_YG <- auto.arima(northsore_forecast_YG)
pred_YG <- forecast(northsore_ARIMAfit_YG, h = 5)
plot(pred_YG)

pred_YG_df <- data.frame(pred_YG)

6.1 Northsore’s Forecast Data Table

northsore_group[nrow(northsore_group) + 1,] <-  c(pred_AG_df[1,]$Point.Forecast, pred_EA_df[1,]$Point.Forecast, pred_YG_df[1,]$Point.Forecast, 2020)
northsore_group[nrow(northsore_group) + 1,] <-  c(pred_AG_df[2,]$Point.Forecast, pred_EA_df[2,]$Point.Forecast, pred_YG_df[2,]$Point.Forecast, 2021)
northsore_group[nrow(northsore_group) + 1,] <-  c(pred_AG_df[3,]$Point.Forecast, pred_EA_df[3,]$Point.Forecast, pred_YG_df[3,]$Point.Forecast, 2022)
northsore_group[nrow(northsore_group) + 1,] <-  c(pred_AG_df[4,]$Point.Forecast, pred_EA_df[4,]$Point.Forecast, pred_YG_df[4,]$Point.Forecast, 2023)
northsore_group[nrow(northsore_group) + 1,] <-  c(pred_AG_df[5,]$Point.Forecast, pred_EA_df[5,]$Point.Forecast, pred_YG_df[5,]$Point.Forecast, 2024)
northsore_group

7 Forecasting Punggol Field’s Population

#predict aged group
punggol_field_group$`Aged Group`
[1] 18200 19080 20340 20930 20970 21550 22710 22540 22990
pf_forecast_AG <- ts(punggol_field_group$`Aged Group`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
plot(pf_forecast_AG)

pf_ARIMAfit_AG <- auto.arima(pf_forecast_AG)
pred_AG <- forecast(pf_ARIMAfit_AG, h = 5)
plot(pred_AG)

pred_AG_df <- data.frame(pred_AG)
#predict economic active
punggol_field_group$`Economic  Active`
[1] 77910 77670 79190 78800 77340 79070 82710 81940 82080
pf_forecast_EA <- ts(punggol_field_group$`Economic  Active`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
plot(pf_forecast_EA)

pf_ARIMAfit_EA <- auto.arima(pf_forecast_EA)
pred_EA <- forecast(pf_ARIMAfit_EA, h = 5)
plot(pred_EA)

pred_EA_df <- data.frame(pred_EA)
#predict younger gen
punggol_field_group$`Younger Group`
[1] 31950 31790 31950 31600 31400 32620 35050 35400 35710
pf_forecast_YG <- ts(punggol_field_group$`Younger Group`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
plot(pf_forecast_YG)

pf_ARIMAfit_YG <- auto.arima(pf_forecast_YG)
pred_YG <- forecast(pf_ARIMAfit_YG, h = 5)
plot(pred_YG)

pred_YG_df <- data.frame(pred_YG)

7.1 Punggol Field’s Forecast Data Table

punggol_field_group[nrow(punggol_field_group) + 1,] <-  c(pred_AG_df[1,]$Point.Forecast, pred_EA_df[1,]$Point.Forecast, pred_YG_df[1,]$Point.Forecast, 2020)
punggol_field_group[nrow(punggol_field_group) + 1,] <-  c(pred_AG_df[2,]$Point.Forecast, pred_EA_df[2,]$Point.Forecast, pred_YG_df[2,]$Point.Forecast, 2021)
punggol_field_group[nrow(punggol_field_group) + 1,] <-  c(pred_AG_df[3,]$Point.Forecast, pred_EA_df[3,]$Point.Forecast, pred_YG_df[3,]$Point.Forecast, 2022)
punggol_field_group[nrow(punggol_field_group) + 1,] <-  c(pred_AG_df[4,]$Point.Forecast, pred_EA_df[4,]$Point.Forecast, pred_YG_df[4,]$Point.Forecast, 2023)
punggol_field_group[nrow(punggol_field_group) + 1,] <-  c(pred_AG_df[5,]$Point.Forecast, pred_EA_df[5,]$Point.Forecast, pred_YG_df[5,]$Point.Forecast, 2024)
punggol_field_group

8 Forecasting Punggol Town Centre’s Population

#predict aged group
punggol_tc_group$`Aged Group`
[1]  850 1170 1490 1890 2770 4430 5780 7470 9140
tc_forecast_AG <- ts(punggol_tc_group$`Aged Group`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
plot(tc_forecast_AG)

tc_ARIMAfit_AG <- auto.arima(tc_forecast_AG)
pred_AG <- forecast(tc_ARIMAfit_AG, h = 5)
plot(pred_AG)

pred_AG_df <- data.frame(pred_AG)
#predict economic active
punggol_tc_group$`Economic  Active`
[1]  6750  8320  8770  9380 14100 21990 27390 31840 37340
tc_forecast_EA <- ts(punggol_tc_group$`Economic  Active`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
plot(tc_forecast_EA)

tc_ARIMAfit_EA <- auto.arima(tc_forecast_EA)
pred_EA <- forecast(tc_ARIMAfit_EA, h = 5)
plot(pred_EA)

pred_EA_df <- data.frame(pred_EA)
#predict younger gen
punggol_tc_group$`Younger Group`
[1]  2290  3020  3450  3700  5340  8500 10560 12450 14980
tc_forecast_YG <- ts(punggol_tc_group$`Younger Group`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
plot(tc_forecast_YG)

tc_ARIMAfit_YG <- auto.arima(tc_forecast_YG)
pred_YG <- forecast(tc_ARIMAfit_YG, h = 5)
plot(pred_YG)

pred_YG_df <- data.frame(pred_YG)

8.1 Punggol Town Centre’s Forecast Data Table

punggol_tc_group[nrow(punggol_tc_group) + 1,] <-  c(pred_AG_df[1,]$Point.Forecast, pred_EA_df[1,]$Point.Forecast, pred_YG_df[1,]$Point.Forecast, 2020)
punggol_tc_group[nrow(punggol_tc_group) + 1,] <-  c(pred_AG_df[2,]$Point.Forecast, pred_EA_df[2,]$Point.Forecast, pred_YG_df[2,]$Point.Forecast, 2021)
punggol_tc_group[nrow(punggol_tc_group) + 1,] <-  c(pred_AG_df[3,]$Point.Forecast, pred_EA_df[3,]$Point.Forecast, pred_YG_df[3,]$Point.Forecast, 2022)
punggol_tc_group[nrow(punggol_tc_group) + 1,] <-  c(pred_AG_df[4,]$Point.Forecast, pred_EA_df[4,]$Point.Forecast, pred_YG_df[4,]$Point.Forecast, 2023)
punggol_tc_group[nrow(punggol_tc_group) + 1,] <-  c(pred_AG_df[5,]$Point.Forecast, pred_EA_df[5,]$Point.Forecast, pred_YG_df[5,]$Point.Forecast, 2024)
punggol_tc_group

9 Forecasting Waterway East’s Population

#predict aged group
we_forecast_AG <- ts(waterway_east_group$`Aged Group`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
we_ARIMAfit_AG <- auto.arima(we_forecast_AG)
pred_AG <- forecast(we_ARIMAfit_AG, h = 5)
plot(pred_AG)

pred_AG_df <- data.frame(pred_AG)
#predict economic active
waterway_east_group$`Economic  Active`
[1] 22130 31840 41830 43790 51280 65140 72910 82700 85560
we_forecast_EA <- ts(waterway_east_group$`Economic  Active`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
we_ARIMAfit_EA <- auto.arima(we_forecast_EA)
pred_EA <- forecast(we_ARIMAfit_EA, h = 5)
plot(pred_EA)

pred_EA_df <- data.frame(pred_EA)
#predict younger gen
waterway_east_group$`Younger Group`
[1]  8320 11580 15240 16340 20200 25720 29450 33950 35310
we_forecast_YG <- ts(waterway_east_group$`Younger Group`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
we_ARIMAfit_YG <- auto.arima(we_forecast_YG)
pred_YG <- forecast(we_ARIMAfit_YG, h = 5)
plot(pred_YG)

pred_YG_df <- data.frame(pred_YG)
waterway_east_group[nrow(waterway_east_group) + 1,] <-  c(pred_AG_df[1,]$Point.Forecast, pred_EA_df[1,]$Point.Forecast, pred_YG_df[1,]$Point.Forecast, 2020)
waterway_east_group[nrow(waterway_east_group) + 1,] <-  c(pred_AG_df[2,]$Point.Forecast, pred_EA_df[2,]$Point.Forecast, pred_YG_df[2,]$Point.Forecast, 2021)
waterway_east_group[nrow(waterway_east_group) + 1,] <-  c(pred_AG_df[3,]$Point.Forecast, pred_EA_df[3,]$Point.Forecast, pred_YG_df[3,]$Point.Forecast, 2022)
waterway_east_group[nrow(waterway_east_group) + 1,] <-  c(pred_AG_df[4,]$Point.Forecast, pred_EA_df[4,]$Point.Forecast, pred_YG_df[4,]$Point.Forecast, 2023)
waterway_east_group[nrow(waterway_east_group) + 1,] <-  c(pred_AG_df[5,]$Point.Forecast, pred_EA_df[5,]$Point.Forecast, pred_YG_df[5,]$Point.Forecast, 2024)
waterway_east_group
waterway_east_group

9.1 Assumption: Areas like Conney Island & Punggol Canal will be treated as an empty region.

punggol_canal_group[nrow(punggol_canal_group) + 1,] <-  c(0,0,0, 2020)
punggol_canal_group[nrow(punggol_canal_group) + 1,] <-  c(0,0,0, 2021)
punggol_canal_group[nrow(punggol_canal_group) + 1,] <-  c(0,0,0, 2022)
punggol_canal_group[nrow(punggol_canal_group) + 1,] <-  c(0,0,0, 2023)
punggol_canal_group[nrow(punggol_canal_group) + 1,] <-  c(0,0,0, 2024)
conney_grp[nrow(conney_grp) + 1,] <-  c(0,0,0, 2020)
conney_grp[nrow(conney_grp) + 1,] <-  c(0,0,0, 2021)
conney_grp[nrow(conney_grp) + 1,] <-  c(0,0,0, 2022)
conney_grp[nrow(conney_grp) + 1,] <-  c(0,0,0, 2023)
conney_grp[nrow(conney_grp) + 1,] <-  c(0,0,0, 2024)

10 Merging dataframes and adding back subzone values for data aggregation to be done on QGIS

conney_grp$Subzone <- "Coney Island"
punggol_canal_group$Subzone <- "Punggol Canal"
matilda_grp$Subzone <- "Matilda"
punggol_field_group$Subzone <-"Punggol Field"
waterway_east_group$Subzone <- "Waterway East"
punggol_tc_group$Subzone <- "Punggol Town Centre"
northsore_group$Subzone <-  "Northshore"
combined_forecast <- rbind(punggol_tc_group, waterway_east_group,punggol_field_group,matilda_grp,punggol_canal_group,conney_grp, northsore_group)
combined_forecast

11 Write data to CSV

write.csv(combined_forecast,'punggol_pop_predicted.csv')
---
title: 'Punggol Forecast Population Analysis'
output:
  html_notebook:
    number_section: yes
    theme: flatly
    toc: yes
    toc_float: yes
  html_document:
    code_folding: hide
    df_print: paged
    toc: yes
  pdf_document:
    toc: yes
  word_document:
    toc: yes
---

Import Library
```{r setup, include=TRUE, eval=TRUE,message=FALSE, warning=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```


```{r}
packages = c('dplyr','forecast','ggplot2','reshape2','readxl')
for (p in packages){
  if(!require(p, character.only = T)){
    install.packages(p) 
  } 
  library(p,character.only = T) 
}
```

Glimpse of Data
```{r}
df <- read_excel( "./Singapore_Residents_by_Planning.xlsx")
head(df)
```

Filter only Punggol PA & Clean Data
```{r}
punggol_PA <- df %>%
  filter(`Planning Area` == "Punggol") %>%
  filter(`Subzone` != "Total")  %>%
   filter(`Age Group` != "Total") 
str(punggol_PA)

punggol_PA$`2011`[punggol_PA$`2011` == "-" | is.na(punggol_PA$`2011`)] <- 0
punggol_PA$`2012`[punggol_PA$`2012` == "-" | is.na(punggol_PA$`2012`)] <- 0
punggol_PA$`2013`[punggol_PA$`2013` == "-" | is.na(punggol_PA$`2013`)] <- 0
punggol_PA$`2014`[punggol_PA$`2014` == "-" | is.na(punggol_PA$`2014`)] <- 0
punggol_PA$`2015`[punggol_PA$`2015` == "-" | is.na(punggol_PA$`2015`)] <- 0
punggol_PA$`2016`[punggol_PA$`2016` == "-" | is.na(punggol_PA$`2016`)] <- 0
punggol_PA$`2017`[punggol_PA$`2017` == "-" | is.na(punggol_PA$`2017`)] <- 0
punggol_PA$`2018`[punggol_PA$`2018` == "-" | is.na(punggol_PA$`2018`)] <- 0
punggol_PA$`2019`[punggol_PA$`2019` == "-" | is.na(punggol_PA$`2019`)] <- 0



punggol_PA[,5:13] <- punggol_PA[,5:13] %>% mutate_if(is.character, as.numeric)

punggol_PA <-punggol_PA[ , c(1, 2, 3,5:13)]
tail(punggol_PA,20)
```


Group By PA,SZ, AGE GROUP
```{r}
punggol_PA_aggregated <- punggol_PA %>% 
  group_by(`Planning Area`, `Subzone`, `Age Group`) %>% 
  summarize_all(sum)
```

Classiying age groups based on age group classification
```{r}
#age groups classification
economic_active <- c('25 - 29','30 - 34','35 - 39','40 - 44','45 - 49', '50 - 54', '55 - 59', '60 - 64')
younger <- c('0 - 4','10 - 14','15 - 19','20 - 24')

#age groups: Aged (0-24) economic active, younger group
punggol_PA_aggregated$`Economic Group` <- ifelse(punggol_PA_aggregated$`Age Group` %in% economic_active, 'Economic  Active', ifelse(punggol_PA_aggregated$`Age Group` %in% younger, 'Younger Group', 'Aged Group'))
                  
tail(punggol_PA_aggregated,20)
     

```

Reselect fields and grouping
```{r}
younger_age_group <-  punggol_PA_aggregated %>% filter(`Economic Group` == "Younger Group")
younger_age_group$colname <- paste(younger_age_group$Subzone,younger_age_group$`Age Group`)
younger_age_group <-younger_age_group[ , c(14,4:12)]
younger_age_group <- younger_age_group %>% 
  group_by(`colname`) %>% 
  summarize_all(sum)


reverse<-data.frame(t(younger_age_group))
colnames(reverse) <- unlist(reverse[row.names(reverse)=='colname',])
reverse$Year <- rownames(reverse)
reverse<-reverse[-1,]
rownames(reverse) <- NULL
reverse[,1:28] <- lapply(reverse, function(x) as.numeric(as.character(x)))
```

```{r, echo=FALSE}
print(reverse, right=FALSE)
```

```{r}
#Overall diagram
meltdf <- melt(reverse,id="Year")
ggplot(meltdf,aes(x=Year,y=value,colour=variable,group=variable)) + geom_line()
```
Matilda's: Younger Age Group Analysis
```{r}

meltdf <- melt(reverse[, c(5:8,29)],id="Year")
ggplot(meltdf,aes(x=Year,y=value,colour=variable,group=variable)) + geom_line()
```

Northshore's: Younger Age Group Analysis
```{r}
meltdf <- melt(reverse[, c(9:12,29)],id="Year")
ggplot(meltdf,aes(x=Year,y=value,colour=variable,group=variable)) + geom_line()
```

#Punggol Field's: Younger Age Group Analysis
```{r}
meltdf <- melt(reverse[, c(17:20,29)],id="Year")
ggplot(meltdf,aes(x=Year,y=value,colour=variable,group=variable)) + geom_line()
```

#Punggol Town Centre: Younger Age Group Analysis
```{r}
meltdf <- melt(reverse[, c(21:24,29)],id="Year")
ggplot(meltdf,aes(x=Year,y=value,colour=variable,group=variable)) + geom_line()
```

#Waterway East: Younger Age Group Analysis
```{r}
meltdf <- melt(reverse[, c(25:28,29)],id="Year")
ggplot(meltdf,aes(x=Year,y=value,colour=variable,group=variable)) + geom_line()
```



```{r}
punggol_PA_aggregated <-punggol_PA_aggregated[ , c(2,13,4:12)]
punggol_PA_aggregated <- punggol_PA_aggregated %>% 
  group_by(`Subzone`, `Economic Group`) %>% 
  summarize_all(sum)

```

```{r}

reverse<-data.frame(t(punggol_PA_aggregated))
reverse

#Conney
conney_grp<-reverse[1:3]
conney_grp<-conney_grp[-1,]
colnames(conney_grp) <- unlist(conney_grp[row.names(conney_grp)=='Economic Group',])
conney_grp$Year <- rownames(conney_grp)
conney_grp<-conney_grp[-1,]
rownames(conney_grp) <- NULL

conney_grp$`Aged Group`<-as.numeric(levels(conney_grp$`Aged Group`)[conney_grp$`Aged Group`])
conney_grp$`Economic  Active`<-as.numeric(levels(conney_grp$`Economic  Active`)[conney_grp$`Economic  Active`])
conney_grp$`Younger Group`<-as.numeric(levels(conney_grp$`Younger Group`)[conney_grp$`Younger Group`])
```



```{r}
#Matilda
matilda_grp<-reverse[4:6]
matilda_grp<-matilda_grp[-1,]

colnames(matilda_grp) <- unlist(matilda_grp[row.names(matilda_grp)=='Economic Group',])
matilda_grp$Year <- rownames(matilda_grp)
matilda_grp<-matilda_grp[-1,]
rownames(matilda_grp) <- NULL

matilda_grp$`Aged Group`<-as.numeric(levels(matilda_grp$`Aged Group`)[matilda_grp$`Aged Group`])
matilda_grp$`Economic  Active`<-as.numeric(levels(matilda_grp$`Economic  Active`)[matilda_grp$`Economic  Active`])
matilda_grp$`Younger Group`<-as.numeric(levels(matilda_grp$`Younger Group`)[matilda_grp$`Younger Group`])
```

```{r}
#Northshore
northsore_group<-reverse[7:9]
northsore_group<-northsore_group[-1,]

colnames(northsore_group) <- unlist(northsore_group[row.names(northsore_group)=='Economic Group',])
northsore_group$Year <- rownames(northsore_group)
northsore_group<-northsore_group[-1,]
rownames(northsore_group) <- NULL

northsore_group$`Aged Group`<-as.numeric(levels(northsore_group$`Aged Group`)[northsore_group$`Aged Group`])
northsore_group$`Economic  Active`<-as.numeric(levels(northsore_group$`Economic  Active`)[northsore_group$`Economic  Active`])
northsore_group$`Younger Group`<-as.numeric(levels(northsore_group$`Younger Group`)[northsore_group$`Younger Group`])
```


```{r}
#Punggol Canal
punggol_canal_group<-reverse[10:12]
punggol_canal_group<-punggol_canal_group[-1,]

colnames(punggol_canal_group) <- unlist(punggol_canal_group[row.names(punggol_canal_group)=='Economic Group',])
punggol_canal_group$Year <- rownames(punggol_canal_group)
punggol_canal_group<-punggol_canal_group[-1,]
rownames(punggol_canal_group) <- NULL

punggol_canal_group$`Aged Group`<-as.numeric(levels(punggol_canal_group$`Aged Group`)[punggol_canal_group$`Aged Group`])
punggol_canal_group$`Economic  Active`<-as.numeric(levels(punggol_canal_group$`Economic  Active`)[punggol_canal_group$`Economic  Active`])
punggol_canal_group$`Younger Group`<-as.numeric(levels(punggol_canal_group$`Younger Group`)[punggol_canal_group$`Younger Group`])
```

```{r}
#Punggol Field
punggol_field_group <-reverse[13:15]
punggol_field_group<-punggol_field_group[-1,]

colnames(punggol_field_group) <- unlist(punggol_field_group[row.names(punggol_field_group)=='Economic Group',])
punggol_field_group$Year <- rownames(punggol_field_group)
punggol_field_group<-punggol_field_group[-1,]
rownames(punggol_field_group) <- NULL

punggol_field_group$`Aged Group`<-as.numeric(levels(punggol_field_group$`Aged Group`)[punggol_field_group$`Aged Group`])
punggol_field_group$`Economic  Active`<-as.numeric(levels(punggol_field_group$`Economic  Active`)[punggol_field_group$`Economic  Active`])
punggol_field_group$`Younger Group`<-as.numeric(levels(punggol_field_group$`Younger Group`)[punggol_field_group$`Younger Group`])

```

```{r}
#Punggol Town Centre
punggol_tc_group <-reverse[16:18]
punggol_tc_group<-punggol_tc_group[-1,]

colnames(punggol_tc_group) <- unlist(punggol_tc_group[row.names(punggol_tc_group)=='Economic Group',])
punggol_tc_group$Year <- rownames(punggol_tc_group)
punggol_tc_group<-punggol_tc_group[-1,]
rownames(punggol_tc_group) <- NULL

punggol_tc_group$`Aged Group`<-as.numeric(levels(punggol_tc_group$`Aged Group`)[punggol_tc_group$`Aged Group`])
punggol_tc_group$`Economic  Active`<-as.numeric(levels(punggol_tc_group$`Economic  Active`)[punggol_tc_group$`Economic  Active`])
punggol_tc_group$`Younger Group`<-as.numeric(levels(punggol_tc_group$`Younger Group`)[punggol_tc_group$`Younger Group`])


```

```{r}
#Waterway East
waterway_east_group <-reverse[19:21]
waterway_east_group<-waterway_east_group[-1,]

colnames(waterway_east_group) <- unlist(waterway_east_group[row.names(waterway_east_group)=='Economic Group',])
waterway_east_group$Year <- rownames(waterway_east_group)
waterway_east_group<-waterway_east_group[-1,]
rownames(waterway_east_group) <- NULL

waterway_east_group$`Aged Group`<-as.numeric(levels(waterway_east_group$`Aged Group`)[waterway_east_group$`Aged Group`])
waterway_east_group$`Economic  Active`<-as.numeric(levels(waterway_east_group$`Economic  Active`)[waterway_east_group$`Economic  Active`])
waterway_east_group$`Younger Group`<-as.numeric(levels(waterway_east_group$`Younger Group`)[waterway_east_group$`Younger Group`])


```


Matilda's Current Population
```{r}
ggplot(matilda_grp, aes(x=matilda_grp$Year)) + 
  geom_line(aes( y=matilda_grp$`Younger Group`, color="steelblue",group=1))+
    geom_line(aes( y=matilda_grp$`Economic  Active`, color="green",group=1))+
  geom_line(aes(y=matilda_grp$`Aged Group`, color = "darkred", group=1)) + 
      scale_color_discrete(name = "Population Group", labels = c("Younger Group (Below 25)","Economic Active (25-64)",  "Aged Group (Above 64)"))+
  labs(title="Matilda's Current Population",
        x ="Year", y = "Total Population")

```


Northshore's Current Population
```{r}
ggplot(northsore_group, aes(x=northsore_group$Year)) + 
  geom_line(aes( y=northsore_group$`Younger Group`, color="steelblue",group=1))+
    geom_line(aes( y=northsore_group$`Economic  Active`, color="green",group=1))+
  geom_line(aes(y=northsore_group$`Aged Group`, color = "darkred", group=1)) + 
      scale_color_discrete(name = "Population Group", labels = c("Younger Group (Below 25)","Economic Active (25-64)",  "Aged Group (Above 64)"))+
  labs(title="Northshore's Current Population",
        x ="Year", y = "Total Population")
```

Punggol Canal's Current Population
```{r}
ggplot(punggol_canal_group, aes(x=punggol_canal_group$Year)) + 
  geom_line(aes( y=punggol_canal_group$`Younger Group`, color="steelblue",group=1))+
    geom_line(aes( y=punggol_canal_group$`Economic  Active`, color="green",group=1))+
  geom_line(aes(y=punggol_canal_group$`Aged Group`, color = "darkred", group=1)) + 
      scale_color_discrete(name = "Population Group", labels = c("Younger Group (Below 25)","Economic Active (25-64)",  "Aged Group (Above 64)"))+
  labs(title="Punggol Canal's Current Population",
        x ="Year", y = "Total Population")
```

Punggol Field's Current Population
```{r}
ggplot(punggol_field_group, aes(x=punggol_field_group$Year)) + 
  geom_line(aes( y=punggol_field_group$`Younger Group`, color="steelblue",group=1))+
    geom_line(aes( y=punggol_field_group$`Economic  Active`, color="green",group=1))+
  geom_line(aes(y=punggol_field_group$`Aged Group`, color = "darkred", group=1)) + 
      scale_color_discrete(name = "Population Group", labels = c("Younger Group (Below 25)","Economic Active (25-64)",  "Aged Group (Above 64)"))+
  labs(title="Punggol Field's Current Population",
        x ="Year", y = "Total Population")
```

Punggol Town Centre's Current Population
```{r}
ggplot(punggol_tc_group, aes(x=punggol_tc_group$Year)) + 
  geom_line(aes( y=punggol_tc_group$`Younger Group`, color="steelblue",group=1))+
    geom_line(aes( y=punggol_tc_group$`Economic  Active`, color="green",group=1))+
  geom_line(aes(y=punggol_tc_group$`Aged Group`, color = "darkred", group=1)) + 
      scale_color_discrete(name = "Population Group", labels = c("Younger Group (Below 25)","Economic Active (25-64)",  "Aged Group (Above 64)"))+
  labs(title="Punggol Town Centre's Current Population",
        x ="Year", y = "Total Population")

```

Waterway East's Current Population
```{r}
ggplot(waterway_east_group, aes(x=waterway_east_group$Year)) + 
  geom_line(aes( y=waterway_east_group$`Younger Group`, color="steelblue",group=1))+
    geom_line(aes( y=waterway_east_group$`Economic  Active`, color="green",group=1))+
  geom_line(aes(y=waterway_east_group$`Aged Group`, color = "darkred", group=1)) + 
      scale_color_discrete(name = "Population Group", labels = c("Younger Group (Below 25)","Economic Active (25-64)",  "Aged Group (Above 64)"))+
  labs(title="Waterway East's Current Population",
        x ="Year", y = "Total Population")

```




#Why ARIMA?
https://www.researchgate.net/post/Do_ARIMA_models_have_an_theoretical_interpretation_in_population_forecasting 
https://link.springer.com/article/10.1007/BF01066529
https://www.researchgate.net/publication/228468254_Forecasting_the_population_of_Pakistan_using_ARIMA_models

#Forecasting Matilda's Population
https://stackoverflow.com/questions/47852567/arima-forecast-keep-getting-error-data-must-be-of-a-vector-type-was-null
https://stackoverflow.com/questions/53099289/error-in-array-data-must-be-of-a-vector-type-was-null-in-r

Developing a ARIMA Model to Matilda's Population Forecast
Preassumption that the data is stationary, which means trends and seasonality has been removed due to a consistent demand.
```{r}

#predict aged group
matilda_grp$`Aged Group`
matilda_forecast_AG <- ts(matilda_grp$`Aged Group`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
plot(matilda_forecast_AG)
matilda_ARIMAfit_AG <- auto.arima(matilda_forecast_AG)
pred_AG <- forecast(matilda_ARIMAfit_AG, h = 5)
plot(pred_AG)
pred_AG_df <- data.frame(pred_AG)

```

```{r}
#predict economic active
matilda_grp$`Economic  Active`
matilda_forecast_EA <- ts(matilda_grp$`Economic  Active`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
plot(matilda_forecast_EA)
matilda_ARIMAfit_EA <- auto.arima(matilda_forecast_EA)
pred_EA <- forecast(matilda_ARIMAfit_EA, h = 5)

plot(pred_EA)

pred_EA_df <- data.frame(pred_EA)

```

##Matilda's Forecast Data Table
```{r}
#predict younger gen
matilda_grp$`Younger Group`
matilda_forecast_YG <- ts(matilda_grp$`Younger Group`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
plot(matilda_forecast_YG)
matilda_ARIMAfit_YG <- auto.arima(matilda_forecast_YG)
pred_YG <- forecast(matilda_ARIMAfit_YG, h = 5)
plot(pred_YG)

pred_YG_df <- data.frame(pred_YG)


#add ARIMA prediction to current 


matilda_grp[nrow(matilda_grp) + 1,] <-  c(pred_AG_df[1,]$Point.Forecast, pred_EA_df[1,]$Point.Forecast, pred_YG_df[1,]$Point.Forecast, 2020)
matilda_grp[nrow(matilda_grp) + 1,] <-  c(pred_AG_df[2,]$Point.Forecast, pred_EA_df[2,]$Point.Forecast, pred_YG_df[2,]$Point.Forecast, 2021)
matilda_grp[nrow(matilda_grp) + 1,] <-  c(pred_AG_df[3,]$Point.Forecast, pred_EA_df[3,]$Point.Forecast, pred_YG_df[3,]$Point.Forecast, 2022)
matilda_grp[nrow(matilda_grp) + 1,] <-  c(pred_AG_df[4,]$Point.Forecast, pred_EA_df[4,]$Point.Forecast, pred_YG_df[4,]$Point.Forecast, 2023)
matilda_grp[nrow(matilda_grp) + 1,] <-  c(pred_AG_df[5,]$Point.Forecast, pred_EA_df[5,]$Point.Forecast, pred_YG_df[5,]$Point.Forecast, 2024)

matilda_grp

```

#Forecasting Northsore's Population
An ARIMA(0,0,0) model with zero mean is white noise, means that the errors are uncorrelated across time.
```{r}

#predict aged group
northsore_group$`Aged Group`
northsore_forecast_AG <- ts(northsore_group$`Aged Group`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
plot(northsore_forecast_AG)
northsore_ARIMAfit_AG <- auto.arima(northsore_forecast_AG)
pred_AG <- forecast(northsore_ARIMAfit_AG, h = 5)
plot(pred_AG)

pred_AG_df <- data.frame(pred_AG)


#predict economic active
northsore_group$`Economic  Active`
northsore_forecast_EA <- ts(northsore_group$`Economic  Active`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
plot(northsore_forecast_EA)
northsore_ARIMAfit_EA <- auto.arima(northsore_forecast_EA)
pred_EA <- forecast(northsore_ARIMAfit_EA, h = 5)
plot(pred_EA)

pred_EA_df <- data.frame(pred_EA)

#predict younger gen
northsore_group$`Younger Group`
northsore_forecast_YG <- ts(northsore_group$`Younger Group`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
plot(northsore_forecast_YG)
northsore_ARIMAfit_YG <- auto.arima(northsore_forecast_YG)
pred_YG <- forecast(northsore_ARIMAfit_YG, h = 5)
plot(pred_YG)

pred_YG_df <- data.frame(pred_YG)

```

##Northsore's Forecast Data Table
```{r}
northsore_group[nrow(northsore_group) + 1,] <-  c(pred_AG_df[1,]$Point.Forecast, pred_EA_df[1,]$Point.Forecast, pred_YG_df[1,]$Point.Forecast, 2020)
northsore_group[nrow(northsore_group) + 1,] <-  c(pred_AG_df[2,]$Point.Forecast, pred_EA_df[2,]$Point.Forecast, pred_YG_df[2,]$Point.Forecast, 2021)
northsore_group[nrow(northsore_group) + 1,] <-  c(pred_AG_df[3,]$Point.Forecast, pred_EA_df[3,]$Point.Forecast, pred_YG_df[3,]$Point.Forecast, 2022)
northsore_group[nrow(northsore_group) + 1,] <-  c(pred_AG_df[4,]$Point.Forecast, pred_EA_df[4,]$Point.Forecast, pred_YG_df[4,]$Point.Forecast, 2023)
northsore_group[nrow(northsore_group) + 1,] <-  c(pred_AG_df[5,]$Point.Forecast, pred_EA_df[5,]$Point.Forecast, pred_YG_df[5,]$Point.Forecast, 2024)


northsore_group
```

#Forecasting Punggol Field's Population
```{r}

#predict aged group
punggol_field_group$`Aged Group`
pf_forecast_AG <- ts(punggol_field_group$`Aged Group`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
plot(pf_forecast_AG)
pf_ARIMAfit_AG <- auto.arima(pf_forecast_AG)
pred_AG <- forecast(pf_ARIMAfit_AG, h = 5)
plot(pred_AG)

pred_AG_df <- data.frame(pred_AG)


#predict economic active
punggol_field_group$`Economic  Active`
pf_forecast_EA <- ts(punggol_field_group$`Economic  Active`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
plot(pf_forecast_EA)
pf_ARIMAfit_EA <- auto.arima(pf_forecast_EA)
pred_EA <- forecast(pf_ARIMAfit_EA, h = 5)
plot(pred_EA)

pred_EA_df <- data.frame(pred_EA)

#predict younger gen
punggol_field_group$`Younger Group`
pf_forecast_YG <- ts(punggol_field_group$`Younger Group`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
plot(pf_forecast_YG)
pf_ARIMAfit_YG <- auto.arima(pf_forecast_YG)
pred_YG <- forecast(pf_ARIMAfit_YG, h = 5)
plot(pred_YG)

pred_YG_df <- data.frame(pred_YG)
```

##Punggol Field's Forecast Data Table
```{r}
punggol_field_group[nrow(punggol_field_group) + 1,] <-  c(pred_AG_df[1,]$Point.Forecast, pred_EA_df[1,]$Point.Forecast, pred_YG_df[1,]$Point.Forecast, 2020)
punggol_field_group[nrow(punggol_field_group) + 1,] <-  c(pred_AG_df[2,]$Point.Forecast, pred_EA_df[2,]$Point.Forecast, pred_YG_df[2,]$Point.Forecast, 2021)
punggol_field_group[nrow(punggol_field_group) + 1,] <-  c(pred_AG_df[3,]$Point.Forecast, pred_EA_df[3,]$Point.Forecast, pred_YG_df[3,]$Point.Forecast, 2022)
punggol_field_group[nrow(punggol_field_group) + 1,] <-  c(pred_AG_df[4,]$Point.Forecast, pred_EA_df[4,]$Point.Forecast, pred_YG_df[4,]$Point.Forecast, 2023)
punggol_field_group[nrow(punggol_field_group) + 1,] <-  c(pred_AG_df[5,]$Point.Forecast, pred_EA_df[5,]$Point.Forecast, pred_YG_df[5,]$Point.Forecast, 2024)

punggol_field_group
```

#Forecasting Punggol Town Centre's  Population
```{r}

#predict aged group
punggol_tc_group$`Aged Group`
tc_forecast_AG <- ts(punggol_tc_group$`Aged Group`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
plot(tc_forecast_AG)
tc_ARIMAfit_AG <- auto.arima(tc_forecast_AG)
pred_AG <- forecast(tc_ARIMAfit_AG, h = 5)
plot(pred_AG)

pred_AG_df <- data.frame(pred_AG)


#predict economic active
punggol_tc_group$`Economic  Active`
tc_forecast_EA <- ts(punggol_tc_group$`Economic  Active`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
plot(tc_forecast_EA)
tc_ARIMAfit_EA <- auto.arima(tc_forecast_EA)
pred_EA <- forecast(tc_ARIMAfit_EA, h = 5)
plot(pred_EA)

pred_EA_df <- data.frame(pred_EA)

#predict younger gen
punggol_tc_group$`Younger Group`
tc_forecast_YG <- ts(punggol_tc_group$`Younger Group`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
plot(tc_forecast_YG)
tc_ARIMAfit_YG <- auto.arima(tc_forecast_YG)
pred_YG <- forecast(tc_ARIMAfit_YG, h = 5)
plot(pred_YG)

pred_YG_df <- data.frame(pred_YG)

```

##Punggol Town Centre's Forecast Data Table
```{r}
punggol_tc_group[nrow(punggol_tc_group) + 1,] <-  c(pred_AG_df[1,]$Point.Forecast, pred_EA_df[1,]$Point.Forecast, pred_YG_df[1,]$Point.Forecast, 2020)
punggol_tc_group[nrow(punggol_tc_group) + 1,] <-  c(pred_AG_df[2,]$Point.Forecast, pred_EA_df[2,]$Point.Forecast, pred_YG_df[2,]$Point.Forecast, 2021)
punggol_tc_group[nrow(punggol_tc_group) + 1,] <-  c(pred_AG_df[3,]$Point.Forecast, pred_EA_df[3,]$Point.Forecast, pred_YG_df[3,]$Point.Forecast, 2022)
punggol_tc_group[nrow(punggol_tc_group) + 1,] <-  c(pred_AG_df[4,]$Point.Forecast, pred_EA_df[4,]$Point.Forecast, pred_YG_df[4,]$Point.Forecast, 2023)
punggol_tc_group[nrow(punggol_tc_group) + 1,] <-  c(pred_AG_df[5,]$Point.Forecast, pred_EA_df[5,]$Point.Forecast, pred_YG_df[5,]$Point.Forecast, 2024)
punggol_tc_group
```

#Forecasting Waterway East's  Population

```{r}


#predict aged group
we_forecast_AG <- ts(waterway_east_group$`Aged Group`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
we_ARIMAfit_AG <- auto.arima(we_forecast_AG)
pred_AG <- forecast(we_ARIMAfit_AG, h = 5)
plot(pred_AG)

pred_AG_df <- data.frame(pred_AG)


#predict economic active
waterway_east_group$`Economic  Active`
we_forecast_EA <- ts(waterway_east_group$`Economic  Active`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
we_ARIMAfit_EA <- auto.arima(we_forecast_EA)
pred_EA <- forecast(we_ARIMAfit_EA, h = 5)
plot(pred_EA)

pred_EA_df <- data.frame(pred_EA)

#predict younger gen
waterway_east_group$`Younger Group`
we_forecast_YG <- ts(waterway_east_group$`Younger Group`,start = c(2011,1),  end=c(2019, 1),frequency = 1)
we_ARIMAfit_YG <- auto.arima(we_forecast_YG)
pred_YG <- forecast(we_ARIMAfit_YG, h = 5)
plot(pred_YG)

pred_YG_df <- data.frame(pred_YG)
```

```{r}
waterway_east_group[nrow(waterway_east_group) + 1,] <-  c(pred_AG_df[1,]$Point.Forecast, pred_EA_df[1,]$Point.Forecast, pred_YG_df[1,]$Point.Forecast, 2020)
waterway_east_group[nrow(waterway_east_group) + 1,] <-  c(pred_AG_df[2,]$Point.Forecast, pred_EA_df[2,]$Point.Forecast, pred_YG_df[2,]$Point.Forecast, 2021)
waterway_east_group[nrow(waterway_east_group) + 1,] <-  c(pred_AG_df[3,]$Point.Forecast, pred_EA_df[3,]$Point.Forecast, pred_YG_df[3,]$Point.Forecast, 2022)
waterway_east_group[nrow(waterway_east_group) + 1,] <-  c(pred_AG_df[4,]$Point.Forecast, pred_EA_df[4,]$Point.Forecast, pred_YG_df[4,]$Point.Forecast, 2023)
waterway_east_group[nrow(waterway_east_group) + 1,] <-  c(pred_AG_df[5,]$Point.Forecast, pred_EA_df[5,]$Point.Forecast, pred_YG_df[5,]$Point.Forecast, 2024)
waterway_east_group

waterway_east_group
```

##Assumption: Areas like Conney Island & Punggol Canal will be treated as an empty region.
```{r}
punggol_canal_group[nrow(punggol_canal_group) + 1,] <-  c(0,0,0, 2020)
punggol_canal_group[nrow(punggol_canal_group) + 1,] <-  c(0,0,0, 2021)
punggol_canal_group[nrow(punggol_canal_group) + 1,] <-  c(0,0,0, 2022)
punggol_canal_group[nrow(punggol_canal_group) + 1,] <-  c(0,0,0, 2023)
punggol_canal_group[nrow(punggol_canal_group) + 1,] <-  c(0,0,0, 2024)


conney_grp[nrow(conney_grp) + 1,] <-  c(0,0,0, 2020)
conney_grp[nrow(conney_grp) + 1,] <-  c(0,0,0, 2021)
conney_grp[nrow(conney_grp) + 1,] <-  c(0,0,0, 2022)
conney_grp[nrow(conney_grp) + 1,] <-  c(0,0,0, 2023)
conney_grp[nrow(conney_grp) + 1,] <-  c(0,0,0, 2024)
```


#Merging dataframes and adding back subzone values for data aggregation to be done on QGIS
```{r}
conney_grp$Subzone <- "Coney Island"
punggol_canal_group$Subzone <- "Punggol Canal"
matilda_grp$Subzone <- "Matilda"
punggol_field_group$Subzone <-"Punggol Field"
waterway_east_group$Subzone <- "Waterway East"
punggol_tc_group$Subzone <- "Punggol Town Centre"
northsore_group$Subzone <-  "Northshore"

combined_forecast <- rbind(punggol_tc_group, waterway_east_group,punggol_field_group,matilda_grp,punggol_canal_group,conney_grp, northsore_group)
combined_forecast
```

#Write data to CSV
```{r}
write.csv(combined_forecast,'punggol_pop_predicted.csv')
```



