library(ggplot2)
library(dplyr) # để sử dụng %>% gọi là pipe operator
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(dslabs)
data(murders)
data(heights)

str(murders)
## 'data.frame':    51 obs. of  5 variables:
##  $ state     : chr  "Alabama" "Alaska" "Arizona" "Arkansas" ...
##  $ abb       : chr  "AL" "AK" "AZ" "AR" ...
##  $ region    : Factor w/ 4 levels "Northeast","South",..: 2 4 4 2 4 4 1 2 2 2 ...
##  $ population: num  4779736 710231 6392017 2915918 37253956 ...
##  $ total     : num  135 19 232 93 1257 ...
names(murders)
## [1] "state"      "abb"        "region"     "population" "total"
str(heights)
## 'data.frame':    1050 obs. of  2 variables:
##  $ sex   : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 1 1 1 1 2 ...
##  $ height: num  75 70 68 74 61 65 66 62 66 67 ...
x <- log10(murders$population)
y <- murders$total

data.frame(x=x, y=y) %>% 
  ggplot(aes(x,y)) + 
  geom_point()

qplot(x, y)

library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
p1 <- qplot(x)
p2 <- qplot(x,y)
grid.arrange(p1, p2, ncol = 2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

p <- murders %>% ggplot()

murders %>% ggplot(aes(population/10^6, total)) +
  geom_point() + 
  geom_label(aes(population/10^6, total, label = abb), nudge_x = 1.5)

#qplot( murders$population, murders$total) giống cái trên


murders %>% ggplot(aes(population/10^6, total)) + 
  geom_point() +
  geom_label(aes(population/10^6, total, label = abb, color = region ), nudge_x = 1.5) # color parameter need to add to aes to seperate color each region

?geom_label()

đang tới 7.15 https://rafalab.github.io/dsbook/ggplot2.html

library(ggthemes)
library(ggrepel)

p <- murders %>% 
  ggplot(aes(population, total, label = abb, color = region)) +
  geom_label() + 
  scale_x_log10() 

p # change x => scale_x_log10() 

p1 <- murders %>% 
  ggplot(aes(population, total, label = abb, color = region)) +
  geom_label() + 
  scale_y_log10()

p1 # change y => scale_y_log10() 

p2 <- murders %>% 
  ggplot(aes(population, total, label = abb, color = region)) +
  geom_point(aes(color = region),  size = 3) +
  geom_text_repel() + # chỉnh label mà có dấu gạch chỉ vô từng điểm ấy
  #geom_label(nudge_x = 0.05) +  # bỏ cái này đi vì 2 label trùng nhau
  scale_x_log10() + 
  scale_y_log10() +
  ggtitle("Gun murder data") + 
  theme_economist() # ggthemes package , thay đổi luôn bảng region đưa vào trong

p2 # change cả x và y