##install required packages
install.packages("readr")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/3.6'
## (as 'lib' is unspecified)
install.packages("tidyr")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/3.6'
## (as 'lib' is unspecified)
install.packages("dplyr")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/3.6'
## (as 'lib' is unspecified)
install.packages("ggplot2")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/3.6'
## (as 'lib' is unspecified)
install.packages("rworldmap")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/3.6'
## (as 'lib' is unspecified)
install.packages("DT")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/3.6'
## (as 'lib' is unspecified)
install.packages("ggcorrplot")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/3.6'
## (as 'lib' is unspecified)
install.packages("viridis")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/3.6'
## (as 'lib' is unspecified)
install.packages("plotly")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/3.6'
## (as 'lib' is unspecified)
install.packages("RCurl")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/3.6'
## (as 'lib' is unspecified)
library(readr)
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(rworldmap)
## Loading required package: sp
## ### Welcome to rworldmap ###
## For a short introduction type : vignette('rworldmap')
library(DT)
library(ggcorrplot)
library(viridis)
## Loading required package: viridisLite
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(RCurl)
##
## Attaching package: 'RCurl'
## The following object is masked from 'package:tidyr':
##
## complete
getwd()
## [1] "/cloud/project/data"
setwd("/cloud/project/data")
h_2015 <-read.csv('/cloud/project/data/2015.csv')
h_2016 <-read.csv('/cloud/project/data/2016.csv')
h_2017 <-read.csv('/cloud/project/data/2017.csv')
h_2018 <-read.csv('/cloud/project/data/2018.csv')
h_2019 <-read.csv('/cloud/project/data/2019.csv')
#Selecting variables from data set for year 2015
names(h_2015)
## [1] "Country" "Region"
## [3] "Happiness.Rank" "Happiness.Score"
## [5] "Standard.Error" "Economy..GDP.per.Capita."
## [7] "Family" "Health..Life.Expectancy."
## [9] "Freedom" "Trust..Government.Corruption."
## [11] "Generosity" "Dystopia.Residual"
pre15 <- h_2015 %>% select(Country,Region, Happiness.Score,Economy..GDP.per.Capita.,Health..Life.Expectancy. )
names(pre15)
## [1] "Country" "Region"
## [3] "Happiness.Score" "Economy..GDP.per.Capita."
## [5] "Health..Life.Expectancy."
#Selecting variables from data set for year 2016
names(h_2016)
## [1] "Country" "Region"
## [3] "Happiness.Rank" "Happiness.Score"
## [5] "Lower.Confidence.Interval" "Upper.Confidence.Interval"
## [7] "Economy..GDP.per.Capita." "Family"
## [9] "Health..Life.Expectancy." "Freedom"
## [11] "Trust..Government.Corruption." "Generosity"
## [13] "Dystopia.Residual"
pre16 <- h_2016 %>% select(Country,Region, Happiness.Score,Economy..GDP.per.Capita.,Health..Life.Expectancy. )
names(pre16)
## [1] "Country" "Region"
## [3] "Happiness.Score" "Economy..GDP.per.Capita."
## [5] "Health..Life.Expectancy."
#Selecting variables from data set for year 2017
names(h_2017)
## [1] "Country" "Happiness.Rank"
## [3] "Happiness.Score" "Whisker.high"
## [5] "Whisker.low" "Economy..GDP.per.Capita."
## [7] "Family" "Health..Life.Expectancy."
## [9] "Freedom" "Generosity"
## [11] "Trust..Government.Corruption." "Dystopia.Residual"
pre17 <- h_2017 %>% select(Country,Happiness.Score,Economy..GDP.per.Capita.,Health..Life.Expectancy. )
names(pre17)
## [1] "Country" "Happiness.Score"
## [3] "Economy..GDP.per.Capita." "Health..Life.Expectancy."
ref <- select(h_2015,Country,Region)
#ref
pre17 <- left_join(pre17,ref, by = "Country")
## Warning: Column `Country` joining factors with different levels, coercing to
## character vector
pre17 <- pre17[,c(1,5,2,3,4)]
#dim(pre17)
str(pre17)
## 'data.frame': 155 obs. of 5 variables:
## $ Country : chr "Norway" "Denmark" "Iceland" "Switzerland" ...
## $ Region : Factor w/ 10 levels "Australia and New Zealand",..: 10 10 10 10 10 10 6 1 10 1 ...
## $ Happiness.Score : num 7.54 7.52 7.5 7.49 7.47 ...
## $ Economy..GDP.per.Capita.: num 1.62 1.48 1.48 1.56 1.44 ...
## $ Health..Life.Expectancy.: num 0.797 0.793 0.834 0.858 0.809 ...
colSums(is.na(pre17))
## Country Region Happiness.Score
## 0 6 0
## Economy..GDP.per.Capita. Health..Life.Expectancy.
## 0 0
c()
## NULL
c <- which(rowSums(is.na(pre17)) == 1)
pre17 <- na.omit(pre17)
head(pre17)
## Country Region Happiness.Score Economy..GDP.per.Capita.
## 1 Norway Western Europe 7.537 1.616463
## 2 Denmark Western Europe 7.522 1.482383
## 3 Iceland Western Europe 7.504 1.480633
## 4 Switzerland Western Europe 7.494 1.564980
## 5 Finland Western Europe 7.469 1.443572
## 6 Netherlands Western Europe 7.377 1.503945
## Health..Life.Expectancy.
## 1 0.7966665
## 2 0.7925655
## 3 0.8335521
## 4 0.8581313
## 5 0.8091577
## 6 0.8106961
#Selecting variables from data set for year 2018
names(h_2018)
## [1] "Overall.rank" "Country.or.region"
## [3] "Score" "GDP.per.capita"
## [5] "Social.support" "Healthy.life.expectancy"
## [7] "Freedom.to.make.life.choices" "Generosity"
## [9] "Perceptions.of.corruption"
pre_18 <- h_2018 %>% select(Country.or.region ,Score,GDP.per.capita,Healthy.life.expectancy )
names(pre_18)
## [1] "Country.or.region" "Score"
## [3] "GDP.per.capita" "Healthy.life.expectancy"
pre18 <- pre_18 %>% rename(Country = Country.or.region)
head(pre18)
## Country Score GDP.per.capita Healthy.life.expectancy
## 1 Finland 7.632 1.305 0.874
## 2 Norway 7.594 1.456 0.861
## 3 Denmark 7.555 1.351 0.868
## 4 Iceland 7.495 1.343 0.914
## 5 Switzerland 7.487 1.420 0.927
## 6 Netherlands 7.441 1.361 0.878
ref <- select(h_2015,Country,Region)
#ref
pre18 <- left_join(pre18,ref, by = "Country")
## Warning: Column `Country` joining factors with different levels, coercing to
## character vector
pre18 <- pre18[,c(1,5,2,3,4)]
#dim(pre18)
str(pre18)
## 'data.frame': 156 obs. of 5 variables:
## $ Country : chr "Finland" "Norway" "Denmark" "Iceland" ...
## $ Region : Factor w/ 10 levels "Australia and New Zealand",..: 10 10 10 10 10 10 6 1 10 1 ...
## $ Score : num 7.63 7.59 7.55 7.5 7.49 ...
## $ GDP.per.capita : num 1.3 1.46 1.35 1.34 1.42 ...
## $ Healthy.life.expectancy: num 0.874 0.861 0.868 0.914 0.927 0.878 0.896 0.876 0.913 0.91 ...
colSums(is.na(pre18))
## Country Region Score
## 0 6 0
## GDP.per.capita Healthy.life.expectancy
## 0 0
c()
## NULL
c <- which(rowSums(is.na(pre18)) == 1)
pre18 <- na.omit(pre18)
#Selecting variables from data set for year 2019
names(h_2019)
## [1] "Overall.rank" "Country.or.region"
## [3] "Score" "GDP.per.capita"
## [5] "Social.support" "Healthy.life.expectancy"
## [7] "Freedom.to.make.life.choices" "Generosity"
## [9] "Perceptions.of.corruption"
pre_19 <- h_2019 %>% select(Country.or.region ,Score,GDP.per.capita,Healthy.life.expectancy )
names(pre_19)
## [1] "Country.or.region" "Score"
## [3] "GDP.per.capita" "Healthy.life.expectancy"
#adding region column
pre19 <- pre_19 %>% rename(Country = Country.or.region)
# adding region in combined data
# new dataset containg Country and region
ref <- select(h_2015,Country,Region)
#ref
pre19 <- left_join(pre19,ref, by = "Country")
## Warning: Column `Country` joining factors with different levels, coercing to
## character vector
pre19 <- pre19[,c(1,5,2,3,4)]
pre19 <- pre19 %>% rename(Region = Region )
#dim(pre19)
str(pre19)
## 'data.frame': 156 obs. of 5 variables:
## $ Country : chr "Finland" "Denmark" "Norway" "Iceland" ...
## $ Region : Factor w/ 10 levels "Australia and New Zealand",..: 10 10 10 10 10 10 10 1 6 10 ...
## $ Score : num 7.77 7.6 7.55 7.49 7.49 ...
## $ GDP.per.capita : num 1.34 1.38 1.49 1.38 1.4 ...
## $ Healthy.life.expectancy: num 0.986 0.996 1.028 1.026 0.999 ...
colSums(is.na(pre19))
## Country Region Score
## 0 7 0
## GDP.per.capita Healthy.life.expectancy
## 0 0
c()
## NULL
c <- which(rowSums(is.na(pre19)) == 1)
pre19 <- na.omit(pre19)
#Renaming variables to be standardised across all data sets
fin15 <- pre15 %>% rename("Score" = "Happiness.Score",
"GDP" = "Economy..GDP.per.Capita.",
"Life" = "Health..Life.Expectancy.")
fin16 <- pre16 %>% rename("Score" = "Happiness.Score",
"GDP" = "Economy..GDP.per.Capita.",
"Life" = "Health..Life.Expectancy.")
fin17 <- pre17 %>% rename("Score" = "Happiness.Score",
"GDP" = "Economy..GDP.per.Capita.",
"Life" = "Health..Life.Expectancy.")
fin18 <- pre18 %>% rename("Score" = "Score",
"GDP" = "GDP.per.capita",
"Life" = "Healthy.life.expectancy")
fin19 <- pre19 %>% rename("Score" = "Score",
"GDP" = "GDP.per.capita",
"Life" = "Healthy.life.expectancy")
head(pre19)
## Country Region Score GDP.per.capita Healthy.life.expectancy
## 1 Finland Western Europe 7.769 1.340 0.986
## 2 Denmark Western Europe 7.600 1.383 0.996
## 3 Norway Western Europe 7.554 1.488 1.028
## 4 Iceland Western Europe 7.494 1.380 1.026
## 5 Netherlands Western Europe 7.488 1.396 0.999
## 6 Switzerland Western Europe 7.480 1.452 1.052
pre_happiness <- fin15 %>%
left_join(fin16, by=c("Country")) %>%
na.omit() %>%
left_join(fin17,by=c("Country")) %>%
na.omit() %>%
#left_join(fin18,by=c("Country")) %>%
#na.omit() %>%
#left_join(fin19,by=c("Country")) %>%
#na.omit() %>%
select(Country,Region,Score.x,Score.y,Score) %>%
rename(Score_2015 = Score.x,Score_2016 = Score.y,Score_2017 = Score)
## Warning: Column `Country` joining factors with different levels, coercing to
## character vector
pre_happiness$Score_2017 <- round(pre_happiness$Score_2017,2)
#Checking for missing values in the common data set
sum(is.na(pre_happiness))
## [1] 0
head(pre_happiness)
## Country Region Score_2015 Score_2016 Score_2017
## 1 Switzerland Western Europe 7.587 7.509 7.49
## 2 Iceland Western Europe 7.561 7.501 7.50
## 3 Denmark Western Europe 7.527 7.526 7.52
## 4 Norway Western Europe 7.522 7.498 7.54
## 5 Canada North America 7.427 7.404 7.32
## 6 Finland Western Europe 7.406 7.413 7.47
happiness <- pre_happiness %>%
left_join(fin18,by=c("Country")) %>%
na.omit() %>%
left_join(fin19,by=c("Country")) %>%
na.omit() %>%
select(Country,Region,Score_2015,Score_2016,Score_2017,Score.x,Score.y) %>%
rename(Score_2018 = Score.x,Score_2019 = Score.y)
happiness$Score_2019 <- round(happiness$Score_2019,2)
#Checking for missing values in the common data set
sum(is.na(happiness))
## [1] 0
head(happiness)
## Country Region Score_2015 Score_2016 Score_2017 Score_2018
## 1 Switzerland Western Europe 7.587 7.509 7.49 7.487
## 2 Iceland Western Europe 7.561 7.501 7.50 7.495
## 3 Denmark Western Europe 7.527 7.526 7.52 7.555
## 4 Norway Western Europe 7.522 7.498 7.54 7.594
## 5 Canada North America 7.427 7.404 7.32 7.328
## 6 Finland Western Europe 7.406 7.413 7.47 7.632
## Score_2019
## 1 7.48
## 2 7.49
## 3 7.60
## 4 7.55
## 5 7.28
## 6 7.77
master1 <- inner_join(fin17,fin16, by = c("Country","Region")) %>%
inner_join(fin15,by = c("Country","Region"))
## Warning: Column `Country` joining character vector and factor, coercing into
## character vector
## Warning: Column `Country` joining character vector and factor, coercing into
## character vector
master<- inner_join(fin19,fin18, by = c("Country","Region")) %>%
inner_join(master1,by = c("Country","Region"))
names(master)
## [1] "Country" "Region" "Score.x.x" "GDP.x.x" "Life.x.x" "Score.y.x"
## [7] "GDP.y.x" "Life.y.x" "Score.x.y" "GDP.x.y" "Life.x.y" "Score.y.y"
## [13] "GDP.y.y" "Life.y.y" "Score" "GDP" "Life"
finmaster <- master %>% rename("Score_2019" = "Score.x.x","GDP_2019" = "GDP.x.x","Life Expectancy_2019" = "Life.x.x",
"Score_2018" = "Score.y.x","GDP_2018" = "GDP.y.x","Life Expectancy_2018" = "Life.y.x",
"Score_2017" = "Score.x.y","GDP_2017" = "GDP.x.y","Life Expectancy_2017" = "Life.x.y",
"Score_2016" = "Score.y.y","GDP_2016" = "GDP.y.y","Life Expectancy_2016" = "Life.y.y",
"Score_2015" = "Score","GDP_2015" = "GDP","Life Expectancy_2015" = "Life")
finmaster <-finmaster[,c(1,2,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3)]
head(finmaster)
## Country Region Life Expectancy_2015 GDP_2015 Score_2015
## 1 Finland Western Europe 0.88911 1.29025 7.406
## 2 Denmark Western Europe 0.87464 1.32548 7.527
## 3 Norway Western Europe 0.88521 1.45900 7.522
## 4 Iceland Western Europe 0.94784 1.30232 7.561
## 5 Netherlands Western Europe 0.89284 1.32944 7.378
## 6 Switzerland Western Europe 0.94143 1.39651 7.587
## Life Expectancy_2016 GDP_2016 Score_2016 Life Expectancy_2017 GDP_2017
## 1 0.81091 1.40598 7.413 0.8091577 1.443572
## 2 0.79504 1.44178 7.526 0.7925655 1.482383
## 3 0.79579 1.57744 7.498 0.7966665 1.616463
## 4 0.86733 1.42666 7.501 0.8335521 1.480633
## 5 0.81231 1.46468 7.339 0.8106961 1.503945
## 6 0.86303 1.52733 7.509 0.8581313 1.564980
## Score_2017 Life Expectancy_2018 GDP_2018 Score_2018 Life Expectancy_2019
## 1 7.469 0.874 1.305 7.632 0.986
## 2 7.522 0.868 1.351 7.555 0.996
## 3 7.537 0.861 1.456 7.594 1.028
## 4 7.504 0.914 1.343 7.495 1.026
## 5 7.377 0.878 1.361 7.441 0.999
## 6 7.494 0.927 1.420 7.487 1.052
## GDP_2019 Score_2019
## 1 1.340 7.769
## 2 1.383 7.600
## 3 1.488 7.554
## 4 1.380 7.494
## 5 1.396 7.488
## 6 1.452 7.480
score.df <- select(happiness, Country = Country,Region,'2015' = 'Score_2015','2016' = 'Score_2016','2017' = 'Score_2017','2018' = 'Score_2018','2019' = 'Score_2019')
score.comb <- gather(score.df,Year,HappinessScore,3:7)
GDP.df <- select(finmaster, Country = Country,Region,'2015' = 'GDP_2015','2016' = 'GDP_2016','2017' = 'GDP_2017','2018' = 'GDP_2018','2019' = 'GDP_2019')
GDP.comb <- gather(GDP.df,Year,GDP,3:7)
life.df <- select(finmaster, Country = Country,Region,'2015' = 'Life Expectancy_2015','2016' = 'Life Expectancy_2016','2017' = 'Life Expectancy_2017','2018' = 'Life Expectancy_2018','2019' = 'Life Expectancy_2019')
life.comb <- gather(life.df,Year,LifeExpectancy,3:7)
combined <- inner_join(score.comb,GDP.comb,by = c("Country","Region","Year")) %>%
inner_join(life.comb,by = c("Country","Region","Year"))
head(combined)
## Country Region Year HappinessScore GDP LifeExpectancy
## 1 Switzerland Western Europe 2015 7.587 1.39651 0.94143
## 2 Iceland Western Europe 2015 7.561 1.30232 0.94784
## 3 Denmark Western Europe 2015 7.527 1.32548 0.87464
## 4 Norway Western Europe 2015 7.522 1.45900 0.88521
## 5 Canada North America 2015 7.427 1.32629 0.90563
## 6 Finland Western Europe 2015 7.406 1.29025 0.88911
#Aggregating data by regional affliation
line15 <- aggregate(x = fin15,by = list(as.factor(fin15$Region)),FUN = "mean")
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
line15new <- rename(line15,RegionName = Group.1)
line16 <- aggregate(x = fin16,by = list(as.factor(fin16$Region)),FUN = "mean")
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
line16new <- rename(line16,RegionName = Group.1)
line17 <- aggregate(x = fin17,by = list(as.factor(fin17$Region)),FUN = "mean")
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
line17new <- rename(line17,RegionName = Group.1)
line18 <- aggregate(x = fin18,by = list(as.factor(fin18$Region)),FUN = "mean")
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
line18new <- rename(line18,RegionName = Group.1)
line19 <- aggregate(x = fin19,by = list(as.factor(fin19$Region)),FUN = "mean")
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
line19new <- rename(line19,RegionName = Group.1)
#plotting happiness score region wise for year 2015
theme_set(theme_bw())
ggplot(line15new,aes(RegionName,Score)) + geom_point(aes(color = RegionName),size = 3) + geom_segment(aes(x = RegionName,xend = RegionName,y = 0,yend = Score)) + theme(axis.text.x = element_text(angle = 90)) + ggtitle("Happiness Score by Region for 2015")
#Cheking change in happiness score
overallhap <- inner_join(line15new,line16new,by = "RegionName") %>%
#inner_join(line17new,line18new,by = "RegionName") %>%
inner_join(line17new, by = "RegionName") %>%
select(RegionName,'2015' = Score.x,'2016'= Score.y,'2017'= Score) %>%
gather(Year,AverageHappinessScore,2:4)
theme_set(theme_bw())
ggplot(overallhap,aes(Year,AverageHappinessScore,color = RegionName)) +
geom_line(aes(group = RegionName)) + geom_text(aes(label = RegionName),size = 0)+
geom_point() + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())
head(overallhap)
## RegionName Year AverageHappinessScore
## 1 Australia and New Zealand 2015 7.285000
## 2 Central and Eastern Europe 2015 5.332931
## 3 Eastern Asia 2015 5.626167
## 4 Latin America and Caribbean 2015 6.144682
## 5 Middle East and Northern Africa 2015 5.406900
## 6 North America 2015 7.273000
overallhap1<- inner_join(line18new,line19new,by = "RegionName") %>%
inner_join(line17new, by = "RegionName") %>%
select(RegionName,'2017' = Score.x,'2018'= Score.y,'2019'= Score) %>%
gather(Year,AverageHappinessScore,2:4)
overallhap1 <-subset(overallhap1,Year!="2017")
head(overallhap1)
## RegionName Year AverageHappinessScore
## 11 Australia and New Zealand 2018 7.267500
## 12 Central and Eastern Europe 2018 5.571786
## 13 Eastern Asia 2018 5.688833
## 14 Latin America and Caribbean 2018 5.942550
## 15 Middle East and Northern Africa 2018 5.237000
## 16 North America 2018 7.085000
totalhapreg <-rbind(overallhap,overallhap1)
##Plotting average happiness score across regions for last 5 years
theme_set(theme_bw())
ggplot(totalhapreg,aes(Year,AverageHappinessScore,color = RegionName)) +
geom_line(aes(group = RegionName)) + geom_text(aes(label = RegionName),size = 0)+
geom_point() + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())
##Now plotting interactive plot with help of plotly to further explore processed data
#plotly
colors <- c("red", "green", "blue", "purple","navyblue" , "yellow", "darkgrey", "orange","violet", "maroon")
fig <- plot_ly(combined, x = ~GDP, y = ~LifeExpectancy, text = ~Country, type = 'scatter', mode = 'markers',color = ~Region, colors = colors,
#Choosing the range of the bubbles' sizes:
sizes = c(1, 50),
marker = list(size = ~HappinessScore, opacity = 0.75,sizemode = 'diameter'))
fig <- fig %>% layout(title = 'xxx',
xaxis = list(showgrid = TRUE),
yaxis = list(showgrid = TRUE))
fig
#plotly
colors <- c("red", "green", "blue", "purple","navyblue" , "yellow", "darkgrey", "orange","violet", "maroon")
desired_maximum_marker_size <- 1000
your_list_of_size_values <- combined['HappinessScore']
sizeref <- 1000.0 * max(your_list_of_size_values) / (desired_maximum_marker_size**1)
fig <- plot_ly(combined, x = ~GDP, y = ~LifeExpectancy, text = ~Country, type = 'scatter', mode = 'markers',color = ~Region, colors=colors,
#Choosing the range of the bubbles' sizes:
sizes = c(50, 1000),
marker = list(size = your_list_of_size_values,opacity = 1,sizemode = 'diameter', sizeref = sizeref))
fig <- fig %>% layout(title = '5 year average GDP vs Life Expectancy',
xaxis = list(showgrid = TRUE),
yaxis = list(showgrid = TRUE))
#Tuning charts
#quick chart plotting
fig <- plot_ly(data = combined, x = ~GDP, y = ~HappinessScore, color = ~Region)
fig
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
##Plotting chart to see which country is getting happier and which country is getting unhappier.
library(ggplot2)
library(ggalt)
## Registered S3 methods overwritten by 'ggalt':
## method from
## grid.draw.absoluteGrob ggplot2
## grobHeight.absoluteGrob ggplot2
## grobWidth.absoluteGrob ggplot2
## grobX.absoluteGrob ggplot2
## grobY.absoluteGrob ggplot2
happiness.index<-left_join(fin15,fin19,by="Country") %>% select(Country,Score.x,Score.y)
## Warning: Column `Country` joining factor and character vector, coercing into
## character vector
happiness.index$Country<-factor(happiness.index$Country,levels=as.character(happiness.index$Country))
colnames(happiness.index)=c("Country","Score_2015","Score_2019")
ggplot()+geom_dumbbell(data=happiness.index %>% filter(Score_2019-Score_2015>0),aes(x=Score_2019,xend=Score_2015,y=Country,group=Country),color="coral1",colour_xend="coral4", size=1.5) +
geom_dumbbell(data=happiness.index%>%mutate(a=Score_2019-Score_2015)%>%arrange(desc(a))%>%top_n(10),aes(x=Score_2019,xend=Score_2015,y=Country,group=Country),color=NA,dot_guide = TRUE,dot_guide_colour ='chocolate1' ) +
labs(x=NULL,y=NULL,title='Less Happier Countries: 2015 vs 2019') +
theme(plot.title = element_text(face="bold"),
plot.background = element_rect(fill="cornsilk1"),
panel.background = element_rect(fill="cornsilk1"),
axis.text.x=element_text(size=9),
axis.text.y=element_text(size=6),
panel.grid.major.y = element_blank(),
panel.border = element_blank())
## Selecting by a
ggplot()+geom_dumbbell(data=happiness.index %>% filter(Score_2019-Score_2015<0),aes(x=Score_2019,xend=Score_2015,y=Country,group=Country),color="coral1",colour_xend="coral4", size=1.5) +
geom_dumbbell(data=happiness.index%>%mutate(a=Score_2015-Score_2019)%>%arrange(desc(a))%>%top_n(10),aes(x=Score_2019,xend=Score_2015,y=Country,group=Country),color=NA,dot_guide = TRUE,dot_guide_colour ='chocolate1' ) +
labs(x=NULL,y=NULL,title='Happier Countries: 2015 vs 2019') +
theme(plot.title = element_text(face="bold"),
plot.background = element_rect(fill="cornsilk1"),
panel.background = element_rect(fill="cornsilk1"),
axis.text.x=element_text(size=9),
axis.text.y=element_text(size=6),
panel.grid.major.y = element_blank(),
panel.border = element_blank())
## Selecting by a
##Plotting data on map
#Worldmap2015 happiness scores
world15 <- joinCountryData2Map(fin15, joinCode = "NAME", nameJoinColumn = "Country")
## 155 codes from your data successfully matched countries in the map
## 3 codes from your data failed to match with a country code in the map
## 88 codes from the map weren't represented in your data
mapCountryData(world15,nameColumnToPlot = "Score",mapTitle = "Happiness Scores across the Globe - 2015")
#Worldmap2016 happiness scores
world16 <- joinCountryData2Map(fin16, joinCode = "NAME", nameJoinColumn = "Country")
## 154 codes from your data successfully matched countries in the map
## 3 codes from your data failed to match with a country code in the map
## 89 codes from the map weren't represented in your data
mapCountryData(world16,nameColumnToPlot = "Score",mapTitle = "Happiness Scores across the Globe - 2016")
#Worldmap2017 happiness scores
world17 <- joinCountryData2Map(fin17, joinCode = "NAME", nameJoinColumn = "Country")
## 147 codes from your data successfully matched countries in the map
## 2 codes from your data failed to match with a country code in the map
## 96 codes from the map weren't represented in your data
mapCountryData(world17,nameColumnToPlot = "Score",mapTitle = "Happiness Scores across the Globe - 2017")
#Worldmap2018 happiness scores
world18 <- joinCountryData2Map(fin18, joinCode = "NAME", nameJoinColumn = "Country")
## 149 codes from your data successfully matched countries in the map
## 1 codes from your data failed to match with a country code in the map
## 94 codes from the map weren't represented in your data
mapCountryData(world18,nameColumnToPlot = "Score",mapTitle = "Happiness Scores across the Globe - 2018")
#Worldmap2019 happiness scores
world19 <- joinCountryData2Map(fin19, joinCode = "NAME", nameJoinColumn = "Country")
## 148 codes from your data successfully matched countries in the map
## 1 codes from your data failed to match with a country code in the map
## 95 codes from the map weren't represented in your data
mapCountryData(world19,nameColumnToPlot = "Score",mapTitle = "Happiness Scores across the Globe - 2019")
##Plotting chart to check changes across 3 variables
#Line chart of variation over five years in happiness score
c1 <- fin19 %>% filter(Score >7.25) %>% select(Country)
c2 <- as.list(c1)
#View(c2)
viz1 <- combined %>% filter(combined$Country %in% c2$Country == 1)
ggplot(viz1,aes(Year,HappinessScore,color = Country)) +
geom_line(aes(group = Country)) + geom_text(aes(label = Country),size = 0) +
geom_point() + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())
#Line chart of variation over five years in GDP per capita
c1 <- fin19 %>% filter(GDP >1.4) %>% select(Country)
c2 <- as.list(c1)
#View(c2)
viz1 <- combined %>% filter(combined$Country %in% c2$Country == 1)
ggplot(viz1,aes(Year,GDP,color = Country)) +
geom_line(aes(group = Country)) + geom_text(aes(label = Country),size = 0) +
geom_point() + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())
#Line chart of variation over five years in life expectancy
c1 <- fin19 %>% filter(Life >1.028) %>% select(Country)
c2 <- as.list(c1)
#View(c2)
viz1 <- combined %>% filter(combined$Country %in% c2$Country == 1)
ggplot(viz1,aes(Year,LifeExpectancy,color = Country)) +
geom_line(aes(group = Country)) + geom_text(aes(label = Country),size = 0) +
geom_point() + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())
#Ploting chart for Happiness score Vs GDP per capita
combined$Year <- factor(combined$Year)
ggplot(data = combined, aes(x = GDP, y = HappinessScore,color = Year)) + geom_point(alpha = 0.5,aes(color = Year)) + scale_fill_viridis(discrete = F) + geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#Ploting chart for Happiness score Vs life expectancy
ggplot(data = combined, aes(x = LifeExpectancy, y = HappinessScore,color = Year)) + geom_point(alpha = 0.5,aes(color = Year)) + scale_fill_viridis(discrete = F) + geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#Ploting chart for GDP per capita Vs life expectancy
ggplot(data = combined, aes(x = GDP, y = LifeExpectancy,color = Year)) + geom_point(alpha = 0.5,aes(color = Year)) + scale_fill_viridis(discrete = F) + geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
##Line diagrams for top and bottom performers based on 3 variables
#Top and bottom countries based on happiness score
#Line chart of variation over five years
c1 <- fin19 %>% filter(Score >7.25) %>% select(Country)
c2 <- as.list(c1)
#View(c2)
viz1 <- combined %>% filter(combined$Country %in% c2$Country == 1)
ggplot(viz1,aes(Year,HappinessScore,color = Country)) +
geom_line(aes(group = Country)) + geom_text(aes(label = Country),size = 0) +
geom_point() + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())
#Line chart of variation over five years
c1 <- fin19 %>% filter(Score <3.9) %>% select(Country)
c2 <- as.list(c1)
#View(c2)
viz1 <- combined %>% filter(combined$Country %in% c2$Country == 1)
ggplot(viz1,aes(Year,HappinessScore,color = Country)) +
geom_line(aes(group = Country)) + geom_text(aes(label = Country),size = 0) +
geom_point() + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())
#Top and bottom countries based on life expectancy and GDP
#Line chart of variation over five years
c1 <- fin19 %>% filter(Life >1.03) %>% select(Country)
c2 <- as.list(c1)
#View(c2)
viz1 <- combined %>% filter(combined$Country %in% c2$Country == 1)
ggplot(viz1,aes(Year,LifeExpectancy,color = Country)) +
geom_line(aes(group = Country)) + geom_text(aes(label = Country),size = 0) +
geom_point() + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())
#Line chart of variation over five years
c1 <- fin19 %>% filter(GDP <0.325) %>% select(Country)
c2 <- as.list(c1)
#View(c2)
viz1 <- combined %>% filter(combined$Country %in% c2$Country == 1)
ggplot(viz1,aes(Year,GDP,color = Country)) +
geom_line(aes(group = Country)) + geom_text(aes(label = Country),size = 0) +
geom_point() + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())
#Plotting box plot for happiness score across region
plot_ly(fin15,x=~Region,
y=~Score,
type="box",
boxpoints="all",
pointpos = -1.8,
color=~Region)%>%
layout(xaxis=list(showticklabels = FALSE),
margin=list(b = 100))
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
#Box plot tuning
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:viridis':
##
## viridis_pal
## The following object is masked from 'package:readr':
##
## col_factor
box <- ggplot(fin19, aes(x = Region, y = Score, color = Region)) +
geom_boxplot() +
geom_jitter(aes(color=Country), size = 0.5) +
ggtitle("Happiness Score for Regions and Countries") +
coord_flip() +
theme(legend.position="none")
ggplotly(box)
##Plotting data for insights in data
plot1<-plot_ly(combined,x=~HappinessScore,
y=~LifeExpectancy,
color=~Region,
colors=c("red","orange","yellow","green","cyan","purple","darkgreen","grey","gold","darkblue"),
size=~HappinessScore,
hoverinfo = 'text',
text=~paste("Happiness Score:",HappinessScore,
"</br>Health Life Expectancy:",LifeExpectancy,
"</br>Country:",Country,
"</br>Region:",Region))%>%
layout(xaxis=list(title="Happiness Score"),
yaxis=list(title="Health Life Expectancy"))
plot2<-plot_ly(combined,x=~HappinessScore,
y=~GDP,
color=~Region,
colors=c("red","orange","yellow","green","cyan","purple","darkgreen","grey","gold","darkblue"),
size=~HappinessScore,
hoverinfo = 'text',
text=~paste("Happiness Score:",HappinessScore,
"</br>GDP per capita:",GDP,
"</br>Country:",Country,
"</br>Region:",Region))%>%
layout(xaxis=list(title="Happiness Score"),
yaxis=list(title="GDP per capita"))
#Plotting
plot1
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
plot2
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.