library(ggplot2)
library(dplyr) # để sử dụng %>% gọi là pipe operator
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(dslabs)
data(murders)
data(heights)
str(murders)
## 'data.frame': 51 obs. of 5 variables:
## $ state : chr "Alabama" "Alaska" "Arizona" "Arkansas" ...
## $ abb : chr "AL" "AK" "AZ" "AR" ...
## $ region : Factor w/ 4 levels "Northeast","South",..: 2 4 4 2 4 4 1 2 2 2 ...
## $ population: num 4779736 710231 6392017 2915918 37253956 ...
## $ total : num 135 19 232 93 1257 ...
names(murders)
## [1] "state" "abb" "region" "population" "total"
str(heights)
## 'data.frame': 1050 obs. of 2 variables:
## $ sex : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 1 1 1 1 2 ...
## $ height: num 75 70 68 74 61 65 66 62 66 67 ...
x <- log10(murders$population)
y <- murders$total
data.frame(x=x, y=y) %>%
ggplot(aes(x,y)) +
geom_point()
qplot(x, y)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
p1 <- qplot(x)
p2 <- qplot(x,y)
grid.arrange(p1, p2, ncol = 2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
p <- murders %>% ggplot()
murders %>% ggplot(aes(population/10^6, total)) +
geom_point() +
geom_label(aes(population/10^6, total, label = abb), nudge_x = 1.5)
#qplot( murders$population, murders$total) giống cái trên
murders %>% ggplot(aes(population/10^6, total)) +
geom_point() +
geom_label(aes(population/10^6, total, label = abb, color = region ), nudge_x = 1.5) # color parameter need to add to aes to seperate color each region
?geom_label()
đang tới 7.15 https://rafalab.github.io/dsbook/ggplot2.html
library(ggthemes)
library(ggrepel)
p <- murders %>%
ggplot(aes(population, total, label = abb, color = region)) +
geom_label() +
scale_x_log10()
p # change x => scale_x_log10()
p1 <- murders %>%
ggplot(aes(population, total, label = abb, color = region)) +
geom_label() +
scale_y_log10()
p1 # change y => scale_y_log10()
p2 <- murders %>%
ggplot(aes(population, total, label = abb, color = region)) +
geom_point(aes(color = region), size = 3) +
geom_text_repel() + # chỉnh label mà có dấu gạch chỉ vô từng điểm ấy
#geom_label(nudge_x = 0.05) + # bỏ cái này đi vì 2 label trùng nhau
scale_x_log10() +
scale_y_log10() +
ggtitle("Gun murder data") +
theme_economist() # ggthemes package , thay đổi luôn bảng region đưa vào trong
p2 # change cả x và y