#library(scales)
#library(zoo)
library(forecast)
library(tseries)
library(fpp)
library(ggplot2)
library(dplyr)
d <- read.csv("d:/UPWORK-IRL/df.csv")
head(d,3)
## X Name Year hdi me_normalized me
## 1 1 Algeria 1990 0.572 20.62263761 904269155
## 2 2 Argentina 1990 0.718 21.44154828 2050907629
## 3 3 Australia 1990 0.871 22.62600208 6704213698
d1 <- d[,-1]
d01 <- select(d1,Year,hdi,me_normalized,Name)
d02 <- na.omit(d01) # remove NA
dim(d02)
## [1] 3852 4
Need to convert Name Country to numeric for time series forecasting
dn <- d02 %>%
dplyr::mutate(Name=case_when(
Name=="Algeria"~ "1",
Name=="Argentina"~ "2",
Name=="Australia"~ "3",
Name=="Austria"~ "4",
Name=="Bahrain"~ "5",
Name=="Bangladesh"~"6",
Name=="Belgium"~"7",
Name=="Belize"~"8",
Name=="Benin"~"9",
Name=="Bolivia"~"10",
Name=="Botswana"~"11",
Name=="Brazil"~"12",
#-----
Name=="Brunei Darussalam"~"13",
Name=="Bulgaria"~"14",
Name=="Burundi"~"15",
Name=="Cambodia"~"16",
Name=="Cameroon"~"17",
Name=="Canada"~"18",
Name=="Chile"~"19",
Name=="China"~"20",
Name=="Colombia"~"21",
Name=="Congo (Democratic Republic of the)"~"22",
Name=="Cote d'Ivoire"~"23",
Name=="Cyprus"~"24",
#---
Name=="Denmark"~"25",
Name=="Djibouti"~"26",
Name=="Ecuador"~"27",
Name=="Egypt"~"28",
Name=="El Salvador"~"29",
Name=="Eswatini"~"30",
Name=="Ethiopia"~"31",
Name=="Finland"~"32",
Name=="France"~"33",
Name=="Gambia"~"34",
Name=="Ghana"~"35",
Name=="Greece"~"36",
#---
Name=="Haiti"~"37",
Name=="Honduras"~"38",
Name=="Hungary"~"39",
Name=="Iceland"~"40",
Name=="India"~"41",
Name=="Indonesia"~"42",
Name=="Iran"~"43",
Name=="Ireland"~"44",
Name=="Israel"~"45",
Name=="Italy"~"46",
Name=="Jamaica"~"47",
Name=="Japan"~"48",
#---
Name=="Jordan"~"49",
Name=="Kenya"~"50",
Name=="Lebanon"~"51",
Name=="Luxembourg"~"52",
Name=="Madagascar"~"53",
Name=="Malaysia"~"54",
Name=="Malta"~"55",
Name=="Mauritania"~"56",
Name=="Mexico"~"57",
Name=="Mozambique"~"58",
Name=="Myanmar"~"59",
Name=="Nepal"~"60",
#---
Name=="Netherlands"~"61",
Name=="New Zealand"~"62",
Name=="Nicaragua"~"63",
Name=="Nigeria"~"64",
Name=="Oman"~"65",
Name=="Papua New Guinea"~"66",
Name=="Paraguay"~"67",
Name=="Peru"~"68",
Name=="Philippines"~"69",
Name=="Poland"~"70",
Name=="Portugal"~"71",
Name=="Qatar"~"72",
#---
Name=="Romania"~"73",
Name=="Rwanda"~"74",
Name=="Saudi Arabia"~"75",
Name=="Senegal"~"76",
Name=="Seychelles"~"77",
Name=="Singapore"~"78",
Name=="South Africa"~"79",
Name=="South Korea"~"80",
Name=="Sri Lanka"~"81",
Name=="Sudan"~"82",
Name=="Switzerland"~"83",
Name=="Syrian Arab Republic"~"84",
#---
Name=="Tanzania"~"85",
Name=="Thailand"~"86",
Name=="Tunisia"~"87",
Name=="Turkey"~"88",
Name=="United Kingdom"~"89",
Name=="United States"~"90",
Name=="Uruguay"~"91",
Name=="Vietnam"~"92",
Name=="Yemen"~"93",
Name=="Zambia"~"94",
Name=="Zimbabwe"~"95")) #ok
#write.csv(dn,"d:/UPWORK-IRL/dn-clean.csv")
#---
str(dn)
## 'data.frame': 3852 obs. of 4 variables:
## $ Year : int 1990 1990 1990 1990 1990 1990 1990 1990 1990 1990 ...
## $ hdi : num 0.572 0.718 0.871 0.803 0.749 0.394 0.813 0.61 0.364 0.551 ...
## $ me_normalized: chr "20.62263761" "21.44154828" "22.62600208" "21.40266985" ...
## $ Name : chr "1" "2" "3" "4" ...
## - attr(*, "na.action")= 'omit' Named int [1:17] 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 ...
## ..- attr(*, "names")= chr [1:17] "3853" "3854" "3855" "3856" ...
dn$Name <- as.numeric(dn$Name)
summary(dn$Name)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.00 25.00 48.00 48.09 72.00 95.00 1447
dn_clean <- na.omit(dn)
dn_clean_ts <- as.ts(dn_clean)
#write.csv(dn_clean_ts,"d:/UPWORK-IRL/dn_clean_ts.csv")
write.csv(dn_clean_ts,"dn_clean_ts.csv")
Start from 2315/Year=2018
df2315 <- head(dn_clean_ts,2315)
tail(df2315)
## Time Series:
## Start = 2310
## End = 2315
## Frequency = 1
## Year hdi me_normalized Name
## 2310 2017 0.443 2368 90
## 2311 2017 0.933 1200 91
## 2312 2017 0.912 1827 92
## 2313 2017 0.903 785 94
## 2314 2017 0.775 786 95
## 2315 2018 0.942 1988 1
#View(df235)
Start from row 2315/Year=2018, and forecast for Year=2019-2023, input h=475(each period of the year is 95 times,h=95*5=475)
fcastdf2315 <- forecast(df2315, h = 475)
#head(fcastdf2315)
write.csv(fcastdf2315,"fcastdf2315.csv")
plot(fcastdf2315)#ok
hdi and military expenditure(me_normalized) of a nation from 1990-2018
dc <- read.csv("dn_clean_ts.csv")
dc <- dc[,-1]
dc1 <- dc %>% filter(hdi < 0.8)
head(dc1)
## Year hdi me_normalized Name
## 1 1990 0.572 1114 1
## 2 1990 0.718 1406 2
## 3 1990 0.749 653 5
## 4 1990 0.394 843 6
## 5 1990 0.610 28 8
## 6 1990 0.364 140 9
g1 <- dc1 %>%
ggplot(aes(x=hdi,y=me_normalized,color = Year))+
geom_point(alpha = 0.8, size = 0.9)+
theme_light()+
ggtitle("hdi and military expenditure of a nation from 1990-2018")
g1
dexp <- read.csv("fcastdf2315.csv")
dexp1 <- dexp[,-1]
dfrcst <- dexp1 %>% filter(Point.Forecast >= 0.8)
head(dfrcst)
## Time Series Point.Forecast Lo.80 Hi.80 Lo.95 Hi.95
## 1 2316 Year 2018.003 2017.863 2018.143 2017.789 2018.218
## 2 2317 Year 2018.015 2017.818 2018.213 2017.713 2018.317
## 3 2318 Year 2018.027 2017.786 2018.269 2017.658 2018.397
## 4 2319 Year 2018.039 2017.761 2018.318 2017.613 2018.466
## 5 2320 Year 2018.051 2017.740 2018.363 2017.575 2018.528
## 6 2321 Year 2018.063 2017.722 2018.405 2017.542 2018.585
d_name <- dfrcst %>% filter(Series=="Name")
head(d_name)
## Time Series Point.Forecast Lo.80 Hi.80 Lo.95 Hi.95
## 1 2316 Name 2.32851 1.871732 2.785288 1.6299278 3.027092
## 2 2317 Name 2.32851 1.683384 2.973636 1.3418750 3.315145
## 3 2318 Name 2.32851 1.535759 3.121262 1.1161012 3.540919
## 4 2319 Name 2.32851 1.408969 3.248051 0.9221929 3.734827
## 5 2320 Name 2.32851 1.295262 3.361758 0.7482935 3.908727
## 6 2321 Name 2.32851 1.190660 3.466361 0.5883180 4.068702