# getwd()
# setwd("C:/Users/taetaetae/Documents/R/NEW_Kaggle/7th_happy")
library(patchwork)
## Warning: package 'patchwork' was built under R version 3.6.3
library(plotly)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.6.1
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggplot2)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.1
## -- Attaching packages ------------------------------------------------------------------------------- tidyverse 1.2.1 --
## √ tibble  2.1.1     √ purrr   0.3.2
## √ tidyr   0.8.3     √ dplyr   0.8.3
## √ readr   1.3.1     √ stringr 1.4.0
## √ tibble  2.1.1     √ forcats 0.4.0
## Warning: package 'tidyr' was built under R version 3.6.1
## Warning: package 'dplyr' was built under R version 3.6.1
## -- Conflicts ---------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks plotly::filter(), stats::filter()
## x dplyr::lag()    masks stats::lag()
library(readr)
library(reshape)
## Warning: package 'reshape' was built under R version 3.6.1
## 
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
## 
##     rename
## The following objects are masked from 'package:tidyr':
## 
##     expand, smiths
## The following object is masked from 'package:plotly':
## 
##     rename
raw_hap <- read.csv("2019.csv")
str(raw_hap)
## 'data.frame':    156 obs. of  9 variables:
##  $ Overall.rank                : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Country.or.region           : Factor w/ 156 levels "Afghanistan",..: 44 37 106 58 99 134 133 100 24 7 ...
##  $ Score                       : num  7.77 7.6 7.55 7.49 7.49 ...
##  $ GDP.per.capita              : num  1.34 1.38 1.49 1.38 1.4 ...
##  $ Social.support              : num  1.59 1.57 1.58 1.62 1.52 ...
##  $ Healthy.life.expectancy     : num  0.986 0.996 1.028 1.026 0.999 ...
##  $ Freedom.to.make.life.choices: num  0.596 0.592 0.603 0.591 0.557 0.572 0.574 0.585 0.584 0.532 ...
##  $ Generosity                  : num  0.153 0.252 0.271 0.354 0.322 0.263 0.267 0.33 0.285 0.244 ...
##  $ Perceptions.of.corruption   : num  0.393 0.41 0.341 0.118 0.298 0.343 0.373 0.38 0.308 0.226 ...
summary(raw_hap)
##   Overall.rank      Country.or.region     Score       GDP.per.capita  
##  Min.   :  1.00   Afghanistan:  1     Min.   :2.853   Min.   :0.0000  
##  1st Qu.: 39.75   Albania    :  1     1st Qu.:4.545   1st Qu.:0.6028  
##  Median : 78.50   Algeria    :  1     Median :5.380   Median :0.9600  
##  Mean   : 78.50   Argentina  :  1     Mean   :5.407   Mean   :0.9051  
##  3rd Qu.:117.25   Armenia    :  1     3rd Qu.:6.184   3rd Qu.:1.2325  
##  Max.   :156.00   Australia  :  1     Max.   :7.769   Max.   :1.6840  
##                   (Other)    :150                                     
##  Social.support  Healthy.life.expectancy Freedom.to.make.life.choices
##  Min.   :0.000   Min.   :0.0000          Min.   :0.0000              
##  1st Qu.:1.056   1st Qu.:0.5477          1st Qu.:0.3080              
##  Median :1.272   Median :0.7890          Median :0.4170              
##  Mean   :1.209   Mean   :0.7252          Mean   :0.3926              
##  3rd Qu.:1.452   3rd Qu.:0.8818          3rd Qu.:0.5072              
##  Max.   :1.624   Max.   :1.1410          Max.   :0.6310              
##                                                                      
##    Generosity     Perceptions.of.corruption
##  Min.   :0.0000   Min.   :0.0000           
##  1st Qu.:0.1087   1st Qu.:0.0470           
##  Median :0.1775   Median :0.0855           
##  Mean   :0.1848   Mean   :0.1106           
##  3rd Qu.:0.2482   3rd Qu.:0.1412           
##  Max.   :0.5660   Max.   :0.4530           
## 
glimpse(raw_hap)
## Observations: 156
## Variables: 9
## $ Overall.rank                 <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11...
## $ Country.or.region            <fct> Finland, Denmark, Norway, Iceland...
## $ Score                        <dbl> 7.769, 7.600, 7.554, 7.494, 7.488...
## $ GDP.per.capita               <dbl> 1.340, 1.383, 1.488, 1.380, 1.396...
## $ Social.support               <dbl> 1.587, 1.573, 1.582, 1.624, 1.522...
## $ Healthy.life.expectancy      <dbl> 0.986, 0.996, 1.028, 1.026, 0.999...
## $ Freedom.to.make.life.choices <dbl> 0.596, 0.592, 0.603, 0.591, 0.557...
## $ Generosity                   <dbl> 0.153, 0.252, 0.271, 0.354, 0.322...
## $ Perceptions.of.corruption    <dbl> 0.393, 0.410, 0.341, 0.118, 0.298...
head(raw_hap)
##   Overall.rank Country.or.region Score GDP.per.capita Social.support
## 1            1           Finland 7.769          1.340          1.587
## 2            2           Denmark 7.600          1.383          1.573
## 3            3            Norway 7.554          1.488          1.582
## 4            4           Iceland 7.494          1.380          1.624
## 5            5       Netherlands 7.488          1.396          1.522
## 6            6       Switzerland 7.480          1.452          1.526
##   Healthy.life.expectancy Freedom.to.make.life.choices Generosity
## 1                   0.986                        0.596      0.153
## 2                   0.996                        0.592      0.252
## 3                   1.028                        0.603      0.271
## 4                   1.026                        0.591      0.354
## 5                   0.999                        0.557      0.322
## 6                   1.052                        0.572      0.263
##   Perceptions.of.corruption
## 1                     0.393
## 2                     0.410
## 3                     0.341
## 4                     0.118
## 5                     0.298
## 6                     0.343
names(raw_hap) <- tolower(names(raw_hap)) 
#대소문자 귀찮으니 모두 소문자로

# raw_hap <- rename(raw_hap,
#                     c("overall.rank" = "rank",
#                       "country.or.region" = "country",
#                       "score" = "sc",
#                       "gdp.per.capita" = "gdp",
#                       "social.support" = "social_sup",
#                       "healthy.life.expectancy" = "health",
#                       "freedom.to.make.life.choices" = "free",
#                       "generosity" = "gen",
#                       "perceptions.of.corruption" = "per_corp")
#                     ) 

names(raw_hap) <- c("rank", "country", "sc", "gdp", "social_sup", "health", "free", "gen", "per_corp")
#변수명 길다. 줄이자.

raw_hap$scoref <- as.factor(floor(raw_hap$sc)) 

#점수를 범주로 나눠보자

raw_hap$lvl_c = 1 
raw_hap$lvl_c[raw_hap$scoref == 7] = "heaven C"
raw_hap$lvl_c[raw_hap$scoref == 6] = "pretty happy C"
raw_hap$lvl_c[raw_hap$scoref == 5] = "happy C"
raw_hap$lvl_c[raw_hap$scoref == 4] = "soso C"
raw_hap$lvl_c[raw_hap$scoref == 3] = "unhappy C"
raw_hap$lvl_c[raw_hap$scoref == 2] = "pretty unhappy C"
raw_hap$lvl_c[raw_hap$scoref == 1] = "miserable C"

raw_hap$lvl_c <- ordered(raw_hap$lvl_c, 
                           levels=c("heaven C",
                                    "pretty happy C", 
                                    "happy C",
                                    "soso C",
                                    "unhappy C",
                                    "pretty unhappy C", 
                                    "miserable C" ))
#data/geom/aes/coordinate/scales/stats/facet에 대해서 알아봐야함

#one variable_continuos: lvl_c
ggplot(raw_hap, aes(lvl_c)) + geom_histogram(stat = "count")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

ggplot(raw_hap, aes(lvl_c)) + geom_density(kernel = "gaussian")
## Warning: Groups with fewer than two data points have been dropped.

#two variable_discrete/continuous: lvl/gdp
lvl_gdp <- raw_hap %>% 
  ggplot(aes(lvl_c, gdp))

lvl_gdp + geom_bar(stat = "identity")

lvl_gdp + geom_boxplot()

lvl_gdp + geom_violin(scale = "area")

#two varibable_continuous/continuous : gdp/score
sc_gdp <- raw_hap %>% 
  ggplot(aes(sc, gdp))

sc_gdp + geom_point() 

sc_gdp + geom_point(alpha = 0.5) 

sc_gdp + geom_point(alpha = 0.5, color = "red", pch = 2, size = 2) 

sc_gdp + geom_point(aes(shape = lvl_c, color = lvl_c), size = 3) 
## Warning: Using shapes for an ordinal variable is not advised

sc_gdp + geom_point() + geom_smooth(model = lm)
## Warning: Ignoring unknown parameters: model
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#line
sc_gdp + geom_point() + 
  geom_hline(yintercept = mean(raw_hap$gdp), 
             linetype = "dashed", 
             color = "red",
             size = 1) +
  geom_vline(xintercept = median(raw_hap$sc),
             linetype = "dashed", 
             color = "red",
             size = 1)

  #abline, segment도 있음

sc_gdp + geom_density2d() #?

sc_gdp + geom_area()

#coordinate
lvl_gdp + geom_bar(stat = "identity") + coord_flip()

sc_gdp + geom_point() + 
  coord_cartesian(expand = TRUE)

sc_gdp + geom_point() + 
  coord_cartesian(xlim = c(0, 8), 
                  ylim = c(min(raw_hap$gdp), max(raw_hap$gdp)+2))

#facet
sc_gdp + geom_point() +
  facet_grid(. ~ lvl_c)

sc_gdp + geom_point() +
  facet_grid(lvl_c ~.)

sc_gdp + geom_point() +
  facet_wrap(~ lvl_c, ncol = 3)

#scale
sc_gdp + geom_point() + 
  scale_x_continuous(limits = c(0, 8))

  #scale_x limits data, but cartesian just plays with cartesian 

#theme
sc_gdp + geom_point() + geom_smooth(model = lm) + theme_bw()
## Warning: Ignoring unknown parameters: model
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#label/stat/text/theme은 유동인구데이터셋에서 써보겠습니다.


#patchwork, plotly
p1 <- sc_gdp + geom_point() 
p2 <- sc_gdp + geom_point(aes(shape = lvl_c, color = lvl_c), 
                          size = 3) 
p3 <- sc_gdp + geom_point() + geom_smooth(model = lm)
## Warning: Ignoring unknown parameters: model
p1 + p2
## Warning: Using shapes for an ordinal variable is not advised

(p1 + p2)/ p3
## Warning: Using shapes for an ordinal variable is not advised
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

p3 / (p1+p2)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Using shapes for an ordinal variable is not advised

ggplotly(p3)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'