# getwd()
# setwd("C:/Users/taetaetae/Documents/R/NEW_Kaggle/7th_happy")
library(patchwork)
## Warning: package 'patchwork' was built under R version 3.6.3
library(plotly)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.6.1
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.1
## -- Attaching packages ------------------------------------------------------------------------------- tidyverse 1.2.1 --
## √ tibble 2.1.1 √ purrr 0.3.2
## √ tidyr 0.8.3 √ dplyr 0.8.3
## √ readr 1.3.1 √ stringr 1.4.0
## √ tibble 2.1.1 √ forcats 0.4.0
## Warning: package 'tidyr' was built under R version 3.6.1
## Warning: package 'dplyr' was built under R version 3.6.1
## -- Conflicts ---------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks plotly::filter(), stats::filter()
## x dplyr::lag() masks stats::lag()
library(readr)
library(reshape)
## Warning: package 'reshape' was built under R version 3.6.1
##
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
##
## rename
## The following objects are masked from 'package:tidyr':
##
## expand, smiths
## The following object is masked from 'package:plotly':
##
## rename
raw_hap <- read.csv("2019.csv")
str(raw_hap)
## 'data.frame': 156 obs. of 9 variables:
## $ Overall.rank : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Country.or.region : Factor w/ 156 levels "Afghanistan",..: 44 37 106 58 99 134 133 100 24 7 ...
## $ Score : num 7.77 7.6 7.55 7.49 7.49 ...
## $ GDP.per.capita : num 1.34 1.38 1.49 1.38 1.4 ...
## $ Social.support : num 1.59 1.57 1.58 1.62 1.52 ...
## $ Healthy.life.expectancy : num 0.986 0.996 1.028 1.026 0.999 ...
## $ Freedom.to.make.life.choices: num 0.596 0.592 0.603 0.591 0.557 0.572 0.574 0.585 0.584 0.532 ...
## $ Generosity : num 0.153 0.252 0.271 0.354 0.322 0.263 0.267 0.33 0.285 0.244 ...
## $ Perceptions.of.corruption : num 0.393 0.41 0.341 0.118 0.298 0.343 0.373 0.38 0.308 0.226 ...
summary(raw_hap)
## Overall.rank Country.or.region Score GDP.per.capita
## Min. : 1.00 Afghanistan: 1 Min. :2.853 Min. :0.0000
## 1st Qu.: 39.75 Albania : 1 1st Qu.:4.545 1st Qu.:0.6028
## Median : 78.50 Algeria : 1 Median :5.380 Median :0.9600
## Mean : 78.50 Argentina : 1 Mean :5.407 Mean :0.9051
## 3rd Qu.:117.25 Armenia : 1 3rd Qu.:6.184 3rd Qu.:1.2325
## Max. :156.00 Australia : 1 Max. :7.769 Max. :1.6840
## (Other) :150
## Social.support Healthy.life.expectancy Freedom.to.make.life.choices
## Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:1.056 1st Qu.:0.5477 1st Qu.:0.3080
## Median :1.272 Median :0.7890 Median :0.4170
## Mean :1.209 Mean :0.7252 Mean :0.3926
## 3rd Qu.:1.452 3rd Qu.:0.8818 3rd Qu.:0.5072
## Max. :1.624 Max. :1.1410 Max. :0.6310
##
## Generosity Perceptions.of.corruption
## Min. :0.0000 Min. :0.0000
## 1st Qu.:0.1087 1st Qu.:0.0470
## Median :0.1775 Median :0.0855
## Mean :0.1848 Mean :0.1106
## 3rd Qu.:0.2482 3rd Qu.:0.1412
## Max. :0.5660 Max. :0.4530
##
glimpse(raw_hap)
## Observations: 156
## Variables: 9
## $ Overall.rank <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11...
## $ Country.or.region <fct> Finland, Denmark, Norway, Iceland...
## $ Score <dbl> 7.769, 7.600, 7.554, 7.494, 7.488...
## $ GDP.per.capita <dbl> 1.340, 1.383, 1.488, 1.380, 1.396...
## $ Social.support <dbl> 1.587, 1.573, 1.582, 1.624, 1.522...
## $ Healthy.life.expectancy <dbl> 0.986, 0.996, 1.028, 1.026, 0.999...
## $ Freedom.to.make.life.choices <dbl> 0.596, 0.592, 0.603, 0.591, 0.557...
## $ Generosity <dbl> 0.153, 0.252, 0.271, 0.354, 0.322...
## $ Perceptions.of.corruption <dbl> 0.393, 0.410, 0.341, 0.118, 0.298...
head(raw_hap)
## Overall.rank Country.or.region Score GDP.per.capita Social.support
## 1 1 Finland 7.769 1.340 1.587
## 2 2 Denmark 7.600 1.383 1.573
## 3 3 Norway 7.554 1.488 1.582
## 4 4 Iceland 7.494 1.380 1.624
## 5 5 Netherlands 7.488 1.396 1.522
## 6 6 Switzerland 7.480 1.452 1.526
## Healthy.life.expectancy Freedom.to.make.life.choices Generosity
## 1 0.986 0.596 0.153
## 2 0.996 0.592 0.252
## 3 1.028 0.603 0.271
## 4 1.026 0.591 0.354
## 5 0.999 0.557 0.322
## 6 1.052 0.572 0.263
## Perceptions.of.corruption
## 1 0.393
## 2 0.410
## 3 0.341
## 4 0.118
## 5 0.298
## 6 0.343
names(raw_hap) <- tolower(names(raw_hap))
#대소문자 귀찮으니 모두 소문자로
# raw_hap <- rename(raw_hap,
# c("overall.rank" = "rank",
# "country.or.region" = "country",
# "score" = "sc",
# "gdp.per.capita" = "gdp",
# "social.support" = "social_sup",
# "healthy.life.expectancy" = "health",
# "freedom.to.make.life.choices" = "free",
# "generosity" = "gen",
# "perceptions.of.corruption" = "per_corp")
# )
names(raw_hap) <- c("rank", "country", "sc", "gdp", "social_sup", "health", "free", "gen", "per_corp")
#변수명 길다. 줄이자.
raw_hap$scoref <- as.factor(floor(raw_hap$sc))
#점수를 범주로 나눠보자
raw_hap$lvl_c = 1
raw_hap$lvl_c[raw_hap$scoref == 7] = "heaven C"
raw_hap$lvl_c[raw_hap$scoref == 6] = "pretty happy C"
raw_hap$lvl_c[raw_hap$scoref == 5] = "happy C"
raw_hap$lvl_c[raw_hap$scoref == 4] = "soso C"
raw_hap$lvl_c[raw_hap$scoref == 3] = "unhappy C"
raw_hap$lvl_c[raw_hap$scoref == 2] = "pretty unhappy C"
raw_hap$lvl_c[raw_hap$scoref == 1] = "miserable C"
raw_hap$lvl_c <- ordered(raw_hap$lvl_c,
levels=c("heaven C",
"pretty happy C",
"happy C",
"soso C",
"unhappy C",
"pretty unhappy C",
"miserable C" ))
#data/geom/aes/coordinate/scales/stats/facet에 대해서 알아봐야함
#one variable_continuos: lvl_c
ggplot(raw_hap, aes(lvl_c)) + geom_histogram(stat = "count")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

ggplot(raw_hap, aes(lvl_c)) + geom_density(kernel = "gaussian")
## Warning: Groups with fewer than two data points have been dropped.

#two variable_discrete/continuous: lvl/gdp
lvl_gdp <- raw_hap %>%
ggplot(aes(lvl_c, gdp))
lvl_gdp + geom_bar(stat = "identity")

lvl_gdp + geom_boxplot()

lvl_gdp + geom_violin(scale = "area")

#two varibable_continuous/continuous : gdp/score
sc_gdp <- raw_hap %>%
ggplot(aes(sc, gdp))
sc_gdp + geom_point()

sc_gdp + geom_point(alpha = 0.5)

sc_gdp + geom_point(alpha = 0.5, color = "red", pch = 2, size = 2)

sc_gdp + geom_point(aes(shape = lvl_c, color = lvl_c), size = 3)
## Warning: Using shapes for an ordinal variable is not advised

sc_gdp + geom_point() + geom_smooth(model = lm)
## Warning: Ignoring unknown parameters: model
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#line
sc_gdp + geom_point() +
geom_hline(yintercept = mean(raw_hap$gdp),
linetype = "dashed",
color = "red",
size = 1) +
geom_vline(xintercept = median(raw_hap$sc),
linetype = "dashed",
color = "red",
size = 1)

#abline, segment도 있음
sc_gdp + geom_density2d() #?

sc_gdp + geom_area()

#coordinate
lvl_gdp + geom_bar(stat = "identity") + coord_flip()

sc_gdp + geom_point() +
coord_cartesian(expand = TRUE)

sc_gdp + geom_point() +
coord_cartesian(xlim = c(0, 8),
ylim = c(min(raw_hap$gdp), max(raw_hap$gdp)+2))

#facet
sc_gdp + geom_point() +
facet_grid(. ~ lvl_c)

sc_gdp + geom_point() +
facet_grid(lvl_c ~.)

sc_gdp + geom_point() +
facet_wrap(~ lvl_c, ncol = 3)

#scale
sc_gdp + geom_point() +
scale_x_continuous(limits = c(0, 8))

#scale_x limits data, but cartesian just plays with cartesian
#theme
sc_gdp + geom_point() + geom_smooth(model = lm) + theme_bw()
## Warning: Ignoring unknown parameters: model
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#label/stat/text/theme은 유동인구데이터셋에서 써보겠습니다.
#patchwork, plotly
p1 <- sc_gdp + geom_point()
p2 <- sc_gdp + geom_point(aes(shape = lvl_c, color = lvl_c),
size = 3)
p3 <- sc_gdp + geom_point() + geom_smooth(model = lm)
## Warning: Ignoring unknown parameters: model
p1 + p2
## Warning: Using shapes for an ordinal variable is not advised

(p1 + p2)/ p3
## Warning: Using shapes for an ordinal variable is not advised
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

p3 / (p1+p2)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Using shapes for an ordinal variable is not advised

ggplotly(p3)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'