library(ggplot2)
library(dplyr) # để sử dụng %>% gọi là pipe operator
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(dslabs)
data(murders)

str(murders)
## 'data.frame':    51 obs. of  5 variables:
##  $ state     : chr  "Alabama" "Alaska" "Arizona" "Arkansas" ...
##  $ abb       : chr  "AL" "AK" "AZ" "AR" ...
##  $ region    : Factor w/ 4 levels "Northeast","South",..: 2 4 4 2 4 4 1 2 2 2 ...
##  $ population: num  4779736 710231 6392017 2915918 37253956 ...
##  $ total     : num  135 19 232 93 1257 ...
names(murders)
## [1] "state"      "abb"        "region"     "population" "total"

In ggplot2 we create graphs by adding layers. Layers can define geometries, compute summary statistics, define what scales to use, or even change styles. To add layers, we use the symbol +. In general, a line of code will look like this:

DATA %>% ggplot() + LAYER 1 + LAYER 2 + … + LAYER N

ggplot(data = murders)

# or cách viết khác:  murders %>% ggplot() 
p <- ggplot(data = murders)
class(p)
## [1] "gg"     "ggplot"
print(p)

Aesthetic mappings describe how properties of the data connect with features of the graph, such as distance along an axis, size, or color. The aes function connects data with what we see on the graph by defining aesthetic mappings and will be one of the functions you use most often when plotting. The outcome of the aes function is often used as the argument of a geometry function. This example produces a scatterplot of total murders versus population in millions:

murders %>% ggplot() + 
  geom_point(aes(x = population/10^6, y = total)) 

A second layer in the plot we wish to make involves adding a label to each point to identify the state. The geom_label and geom_text functions permit us to add text to the plot with and without a rectangle behind the text, respectively.

Because each point (each state in this case) has a label, we need an aesthetic mapping to make the connection between points and labels. By reading the help file, we learn that we supply the mapping between point and label through the label argument of aes. So the code looks like this:

p + geom_point(aes(population/10^6, total)) +
  geom_text(aes(population/10^6, total, label = abb))

# abb is variable of "murder" DF
p_test <- p + geom_text(aes(population/10^6, total, label = abb))

# if p_test <- p + geom_text(aes(population/10^6, total), label = abb) is an error

#will give you an error since "abb" is not found because it is outside of the aes function. The layer geom_text does not know where to find abb since it is a column name and not a global variable.
# change parameter "size"
p + geom_point(aes(population/10^6, total), size = 3) +
  geom_text(aes(population/10^6, total, label = abb))

Now because the points are larger it is hard to see the labels. If we read the help file for geom_text, we see the nudge_x argument, which moves the text slightly to the right or to the left:

p + geom_point(aes(population/10^6, total), size = 3) +
  geom_text(aes(population/10^6, total, label = abb), nudge_x = 1.5)

đang đọc ở đây, https://rafalab.github.io/dsbook/ggplot2.html để lấy ý kiến thức đọc tiếp https://www.kaggle.com/iamhungundji/r-project/notebook

args(ggplot)
## function (data = NULL, mapping = aes(), ..., environment = parent.frame()) 
## NULL
p <- murders %>% ggplot(aes(population/10^6, total, label = abb)) # dòng này không nó ko vẽ phải thêm 2 dòng code dưới

p + geom_point(size = 3) + 
  geom_text(nudge_x = 1.5)

p + geom_point(size = 3) +  # size kích cỡ của point
  geom_text(aes(x = 10, y = 800, label = "Hello there!")) # thêm text vào tọa độ x,y

p + geom_point(size = 3) +  
  geom_text(nudge_x = 0.08) +  # nudge dời chữ qua lại
  scale_x_continuous(trans = "log10") + # vẽ theo dạng log
  scale_y_continuous(trans = "log10")   # vẽ theo dạng log

p + geom_point(size = 3) +  
  geom_text(nudge_x = 0.05) + 
  scale_x_log10() +
  scale_y_log10() 

p + geom_point(size = 3, color ="blue") +  
  geom_text(nudge_x = 0.05) + 
  scale_x_log10() +
  scale_y_log10() +
  xlab("Populations in millions (log scale)") + 
  ylab("Total number of murders (log scale)") +
  ggtitle("US Gun Murders in 2010")

p <- p + scale_color_discrete(name = "Region") # đổi mỗi chữ region thành Region
r <- murders %>% 
  summarize(rate = sum(total) /  sum(population) * 10^6) %>% 
  pull(rate)

p + geom_point(aes(col=region), size = 3) + 
  geom_abline(intercept = log10(r)) # vẽ đường thẳng

p + geom_point(aes(col= region ),   size = 3) +
  geom_text(nudge_x = 0.05) + 
  scale_x_log10() +
  scale_y_log10() +
  xlab("Populations in millions (log scale)") + 
  ylab("Total number of murders (log scale)") +
  ggtitle("US Gun Murders in 2010")+
  geom_abline(intercept = log10(r), lty = 2, color = "darkgrey") # vẽ đường thẳng

library(ggthemes)
library(ggrepel)
p + theme_economist() 

p + theme_fivethirtyeight()

p+ theme_map()

r <- murders %>% 
  summarize(rate = sum(total) /  sum(population) * 10^6) %>%
  pull(rate)

murders %>% ggplot(aes(population/10^6, total, label = abb)) +   
  geom_abline(intercept = log10(r), lty = 2, color = "darkgrey") +
  geom_point(aes(col=region), size = 3) +
  geom_text_repel() + 
  scale_x_log10() +
  scale_y_log10() +
  xlab("Populations in millions (log scale)") + 
  ylab("Total number of murders (log scale)") +
  ggtitle("US Gun Murders in 2010") + 
  scale_color_discrete(name = "Region") +
  theme_economist()