Tìm hiểu về ggplot2

Bộ dữ liệu gapminder

data("gapminder")
names(gapminder)
## [1] "country"   "continent" "year"      "lifeExp"   "pop"       "gdpPercap"
p2 =  ggplot(data = gapminder, mapping = aes(x =gdpPercap, y = lifeExp)) 
p2

p3 = p2 + geom_point(aes(color = continent))
p3

p4 = p3 + scale_x_log10()
p4

p5 = p4 + geom_smooth(method = "loess")
p5
## `geom_smooth()` using formula 'y ~ x'

p6 = p3 + geom_smooth(method = "loess")
p6
## `geom_smooth()` using formula 'y ~ x'

p7 = p4 + labs(x = "log GDP per Capita", y = "Life Expectancy") +
  ggtitle("Association between GDP per Capita and Life Expectancy")
p7

p8 = p7 + theme(plot.title = element_text(lineheight = 0.6, face = "bold", hjust = 0.5))
p8

p9 = p5 + labs(x = "log GDP per Capita", y = "Life Expectancy") +
  ggtitle("Association between GDP per Capita and Life Expectancy")
p9
## `geom_smooth()` using formula 'y ~ x'

p10 = p9 + theme_economist()
p10
## `geom_smooth()` using formula 'y ~ x'

Y2007 <- subset(gapminder, year == 2007)
dim(Y2007)
## [1] 142   6
head(Y2007)
## # A tibble: 6 x 6
##   country     continent  year lifeExp      pop gdpPercap
##   <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
## 1 Afghanistan Asia       2007    43.8 31889923      975.
## 2 Albania     Europe     2007    76.4  3600523     5937.
## 3 Algeria     Africa     2007    72.3 33333216     6223.
## 4 Angola      Africa     2007    42.7 12420476     4797.
## 5 Argentina   Americas   2007    75.3 40301927    12779.
## 6 Australia   Oceania    2007    81.2 20434176    34435.
tail(Y2007,3)
## # A tibble: 3 x 6
##   country     continent  year lifeExp      pop gdpPercap
##   <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
## 1 Yemen, Rep. Asia       2007    62.7 22211743     2281.
## 2 Zambia      Africa     2007    42.4 11746035     1271.
## 3 Zimbabwe    Africa     2007    43.5 12311143      470.
str(Y2007)
## tibble [142 x 6] (S3: tbl_df/tbl/data.frame)
##  $ country  : Factor w/ 142 levels "Afghanistan",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 4 1 1 2 5 4 3 3 4 ...
##  $ year     : int [1:142] 2007 2007 2007 2007 2007 2007 2007 2007 2007 2007 ...
##  $ lifeExp  : num [1:142] 43.8 76.4 72.3 42.7 75.3 ...
##  $ pop      : int [1:142] 31889923 3600523 33333216 12420476 40301927 20434176 8199783 708573 150448339 10392226 ...
##  $ gdpPercap: num [1:142] 975 5937 6223 4797 12779 ...
summary(Y2007)
##         country       continent       year         lifeExp     
##  Afghanistan:  1   Africa  :52   Min.   :2007   Min.   :39.61  
##  Albania    :  1   Americas:25   1st Qu.:2007   1st Qu.:57.16  
##  Algeria    :  1   Asia    :33   Median :2007   Median :71.94  
##  Angola     :  1   Europe  :30   Mean   :2007   Mean   :67.01  
##  Argentina  :  1   Oceania : 2   3rd Qu.:2007   3rd Qu.:76.41  
##  Australia  :  1                 Max.   :2007   Max.   :82.60  
##  (Other)    :136                                               
##       pop              gdpPercap      
##  Min.   :1.996e+05   Min.   :  277.6  
##  1st Qu.:4.508e+06   1st Qu.: 1624.8  
##  Median :1.052e+07   Median : 6124.4  
##  Mean   :4.402e+07   Mean   :11680.1  
##  3rd Qu.:3.121e+07   3rd Qu.:18008.8  
##  Max.   :1.319e+09   Max.   :49357.2  
## 
y1 = ggplot(data = Y2007, mapping = aes(x =gdpPercap))
y2 = y1 + geom_histogram(fill = "lightgreen", color = "red") + labs(title = "ABA")
y2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

y3 = y1 + geom_histogram(aes(y = ..density..), fill = "lightblue", color = "red")
y3
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

y4 = ggplot(data = Y2007, mapping = aes(gdpPercap, fill = continent)) +
  geom_density(alpha = 0.4)
y4

y5 = ggplot(data = Y2007, mapping = aes(x = continent, fill = continent)) +  geom_bar()
y5

y6 = y5 + theme(legend.position = "none")
y6

Y2007$level_age[Y2007$lifeExp < 60] <- "< 60 age"
## Warning: Unknown or uninitialised column: `level_age`.
Y2007$level_age[60 <= Y2007$lifeExp & Y2007$lifeExp < 80] <- "60 - 80 age"
Y2007$level_age[Y2007$lifeExp >= 80] <- "> 80 age"
y7 = ggplot(data = Y2007, mapping = aes(x = level_age, fill = level_age)) +  geom_bar()
y7

y8 = ggplot(data = Y2007, mapping = aes(x = continent, fill = level_age)) +  geom_bar()
y8

CA <- subset(gapminder, year == 2007 & continent == "Asia")

y9 = ggplot(data = CA, mapping = aes(x = country, y = lifeExp, fill = country)) + geom_bar(stat = "identity", width = 0.9)

y10 = y9 + coord_flip()
y10

y11 = y10 + theme(legend.position = "none")
y11

y12 = ggplot(data = CA, mapping = aes(x = reorder(country, lifeExp), y = lifeExp, fill = country)) + geom_bar(stat = "identity", width = 0.9)

y13 = y12 + coord_flip() + theme(legend.position = "none")
y13

y14 = ggplot(data = CA, mapping = aes(x = reorder(country, gdpPercap), y = gdpPercap, fill = country)) + geom_bar(stat = "identity", width = 0.9)

y15 = y14 + coord_flip() + theme(legend.position = "none")
y15

grid.arrange(y15,y13, ncol = 2)

y9 = ggplot(data = CA, mapping = aes(x = country, y = lifeExp, fill = country)) + geom_bar(stat = "identity", width = 0.9) +
  theme(legend.position = "none")
y14 = ggplot(data = CA, mapping = aes(x = reorder(country, gdpPercap), y = gdpPercap, fill = country)) + geom_bar(stat = "identity", width = 0.9)
grid.arrange(y9,y14, ncol = 2)

g1 =  ggplot(data = Y2007, mapping = aes(x = continent, y = gdpPercap, fill = continent)) 
g1

g2 = g1 + geom_boxplot(alpha = 0.9)
g2

g3 = g2 + geom_jitter(alpha = 0.3)
g3