데이터의 크기를 막대의 길이로 표현한 그래프.
집단간 차이를 표현할 때 주로 사용됨.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df_mpg <- mpg %>%
group_by(drv) %>%
summarise(mean_hwy = mean(hwy))
df_mpg
## # A tibble: 3 x 2
## drv mean_hwy
## <chr> <dbl>
## 1 4 19.17476
## 2 f 28.16038
## 3 r 21.00000
x 축에 범주를 나타내는 변수를 지정
ggplot(df_mpg, aes(x = drv, y = mean_hwy)) + geom_col()
ggplot(df_mpg, aes(x = reorder(drv, -mean_hwy), y = mean_hwy)) + geom_col()
빈도 막대그래프는 값의 개수(빈도)로 막대의 길이를 표현한 그래프.
빈도 막대 그래프를 만들려면 y축 없이 x축만 지정하고, geom_col() 대신 geom_bar() 를 사용하면 됨.
ggplot(mpg, aes(x = drv)) + geom_bar()
x축에 연속변수르 지정하면 값의 분포를 파악할 수 있음.
ggplot(mpg, aes(x = hwy)) + geom_bar()
어떤 회사에서 생산한 “suv” 차종의 도시 연비가 높은지 알아보려고 합니다. “suv” 차종을 대상으로 평균 cty(도시 연비)가 가장 높은 회사 다섯 곳을 막대 그래프로 표현해보세요. 막대는 연비가 높은 순으로 정렬하세요.
mpg <- as.data.frame((ggplot2::mpg))
df <- mpg %>%
filter(class == "suv") %>%
group_by(manufacturer) %>%
summarise(mean.cty = mean(hwy)) %>%
arrange(desc(mean.cty)) %>%
head(5)
df
## # A tibble: 5 x 2
## manufacturer mean.cty
## <chr> <dbl>
## 1 subaru 25.00000
## 2 toyota 18.25000
## 3 mercury 18.00000
## 4 nissan 18.00000
## 5 ford 17.77778
ggplot(df, aes(x = reorder(manufacturer, -mean.cty), y = mean.cty)) + geom_col()
자동차 중에서 어떤 class (자동차종류)가 가장 많은지 알아보려고 합니다. 자동차 종류별 빈도를 표현한 막대그래프를 만들어보세요.
ggplot(mpg, aes(x = class)) + geom_bar()
ggplot(mpg, aes(x = class)) + geom_histogram(stat="count")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
library(gcookbook)
ggplot(pg_mean, aes(x = group, y = weight)) + geom_col()
library(gcookbook)
ggplot(pg_mean, aes(x = group, y = weight)) + geom_bar(stat = "identity")
ggplot(pg_mean, aes(x = group, y = weight)) +
geom_col(fill = "lightblue", colour = "black")
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(position = "dodge")
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(position = "dodge", colour = "black") +
scale_fill_brewer(palette="Pastel1")
library(gcookbook)
upc <- subset(uspopchange, rank(Change) > 40)
upc
## State Abb Region Change
## 3 Arizona AZ West 24.6
## 6 Colorado CO West 16.9
## 10 Florida FL South 17.6
## 11 Georgia GA South 18.3
## 13 Idaho ID West 21.1
## 29 Nevada NV West 35.1
## 34 North Carolina NC South 18.5
## 41 South Carolina SC South 15.3
## 44 Texas TX South 20.6
## 45 Utah UT West 23.8
ggplot(upc, aes(x = Abb, y = Change, fill = Region)) +
geom_col()
변수의 대입(mapping)은 aes() 안에서 일어나는데 비해, 설정(setting) 은 aes() 에 있음.
ggplot(upc, aes(x = reorder(Abb, Change), y = Change, fill = Region)) +
geom_col(colour = "black") +
scale_fill_manual(values = c("#669933", "#FFCC66")) +
xlab("State")
library(gcookbook)
csub <- subset(climate, Source == "Berkeley" & Year >= 1900)
csub$pos <- csub$Anomaly10y >= 0
ggplot(csub, aes(x = Year, y = Anomaly10y, fill = pos)) +
geom_col()
scale_fill_manual() 을 사용해 색깔을 바꾸고, guide=FALSE 로 범례를 지움
ggplot(csub, aes(x = Year, y = Anomaly10y, fill = pos)) +
geom_col(colour = "black", size = .25) +
scale_fill_manual(values = c("#CCEEFF", "#FFDDDD"), guide = FALSE)
library(gcookbook)
ggplot(pg_mean, aes(x = group, y = weight)) +
geom_col(width = .5)
최대넓이 1
library(gcookbook)
ggplot(pg_mean, aes(x = group, y = weight)) +
geom_col(width = 1)
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col()
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar, order = desc(Cultivar))) +
geom_col() +
guides(fill = guide_legend(reverse = TRUE))
library(dplyr)
ce <- cabbage_exp %>%
group_by(Date) %>%
mutate(percent_weight = Weight / sum(Weight) * 100)
ce
## # A tibble: 6 x 7
## # Groups: Date [3]
## Cultivar Date Weight sd n se percent_weight
## <fctr> <fctr> <dbl> <dbl> <int> <dbl> <dbl>
## 1 c39 d16 3.18 0.9566144 10 0.30250803 58.45588
## 2 c39 d20 2.80 0.2788867 10 0.08819171 47.37733
## 3 c39 d21 2.74 0.9834181 10 0.31098410 65.08314
## 4 c52 d16 2.26 0.4452215 10 0.14079141 41.54412
## 5 c52 d20 3.11 0.7908505 10 0.25008887 52.62267
## 6 c52 d21 1.47 0.2110819 10 0.06674995 34.91686
ggplot(ce, aes(x = Date, y = percent_weight, fill = Cultivar)) +
geom_col()
그래프에 geom_text()를 추가. vjust(vertical justification) 을 설정함으로써 텍스트를 막대의 상단 위 아래로 움직일 수 있다.
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) +
geom_col() +
geom_text(aes(label = Weight), vjust = 1.5, color = "white")
ggplot(cabbage_exp, aes(x = interaction(Date, Cultivar), y = Weight)) +
geom_col() +
geom_text(aes(label = Weight), vjust = -0.2)
ggplot(cabbage_exp, aes(x = Date, y = Weight, fill = Cultivar)) +
geom_col(position = "dodge") +
geom_text(aes(label = Weight), vjust = 1.5, color = "white",
position = position_dodge(.9), size = 3)
ce <- cabbage_exp %>%
group_by(Date) %>%
mutate(percent_weight = Weight / sum(Weight) * 100)
ce <- ce %>%
arrange(Date, Cultivar) %>%
group_by(Date) %>%
mutate(label_y = cumsum(Weight) - 0.5*Weight)
ggplot(ce, aes(x = Date, y = percent_weight, fill = Cultivar)) +
geom_col() +
geom_text(aes(y = label_y, label = Weight), vjust = 1.5, color = "white")
tophit <- tophitters2001[1:25, ]
ggplot(tophit, aes(x = avg, y = name)) +
geom_point()
tophit[ , c("name", "lg", "avg")]
## name lg avg
## 1 Larry Walker NL 0.3501
## 2 Ichiro Suzuki AL 0.3497
## 3 Jason Giambi AL 0.3423
## 4 Roberto Alomar AL 0.3357
## 5 Todd Helton NL 0.3356
## 6 Moises Alou NL 0.3314
## 7 Lance Berkman NL 0.3310
## 8 Bret Boone AL 0.3307
## 9 Frank Catalanotto AL 0.3305
## 10 Chipper Jones NL 0.3304
## 11 Albert Pujols NL 0.3288
## 12 Barry Bonds NL 0.3277
## 13 Sammy Sosa NL 0.3276
## 14 Juan Pierre NL 0.3274
## 15 Juan Gonzalez AL 0.3252
## 16 Luis Gonzalez NL 0.3251
## 17 Rich Aurilia NL 0.3239
## 18 Paul Lo Duca NL 0.3196
## 19 Jose Vidro NL 0.3189
## 20 Alex Rodriguez AL 0.3180
## 21 Cliff Floyd NL 0.3171
## 22 Shannon Stewart AL 0.3156
## 23 Jeff Cirillo NL 0.3125
## 24 Jeff Conine AL 0.3111
## 25 Derek Jeter AL 0.3111
tophit <- tophitters2001[1:25, ]
ggplot(tophit, aes(x = avg, y = reorder(name, avg))) +
geom_point(size = 3) +
theme_bw(base_family = "") +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(colour = "grey60", linetype = "dashed"))
tophit <- tophitters2001[1:25, ]
ggplot(tophit, aes(x = reorder(name, avg), y = avg)) +
geom_point(size = 3) +
theme_bw(base_family = "") +
theme(axis.text.x = element_text(angle=60, hjust=1),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_line(colour = "grey60", linetype = "dashed"))
nameorder <- tophit$name[order(tophit$lg, tophit$avg)]
tophit$name <- factor(tophit$name, levels = nameorder)
ggplot(tophit, aes(x = avg, y = name)) +
geom_segment(aes(yend=name), xend=0, colour = "grey50") +
geom_point(size = 3, aes(colour = lg)) +
scale_colour_brewer(palette = "Set1", limits = c("NL", "AL")) +
theme_bw(base_family = "") +
theme(panel.grid.major.y = element_blank(), #수평 격자선을 없앰
legend.position = c(1, 0.55), #범례를 그래프 안쪽으로 옮김
legend.justification = c(1, 0.5))
ggplot(tophit, aes(x = avg, y = name)) +
geom_segment(aes(yend=name), xend=0, colour = "grey50") +
geom_point(size = 3, aes(colour = lg)) +
scale_colour_brewer(palette = "Set1", limits = c("NL", "AL"), guide = FALSE) +
theme_bw(base_family = "") +
theme(panel.grid.major.y = element_blank()) + #수평 격자선을 없앰
facet_grid(lg ~ ., scales = "free_y", space = "free_y")