#3.4 Annotations
library(ggplot2)
data("economics")
rm(list=ls())
ls() # 객체이름이 무엇인지 보는 함수
## character(0)
library(dplyr)
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
glimpse(economics)
## Rows: 574
## Columns: 6
## $ date <date> 1967-07-01, 1967-08-01, 1967-09-01, 1967-10-01, 1967-11-01, …
## $ pce <dbl> 506.7, 509.8, 515.6, 512.2, 517.4, 525.1, 530.9, 533.6, 544.3…
## $ pop <dbl> 198712, 198911, 199113, 199311, 199498, 199657, 199808, 19992…
## $ psavert <dbl> 12.6, 12.6, 11.9, 12.9, 12.8, 11.8, 11.7, 12.3, 11.7, 12.3, 1…
## $ uempmed <dbl> 4.5, 4.7, 4.6, 4.9, 4.7, 4.8, 5.1, 4.5, 4.1, 4.6, 4.4, 4.4, 4…
## $ unemploy <dbl> 2944, 2945, 2958, 3143, 3066, 3018, 2878, 3001, 2877, 2709, 2…
ggplot(economics, aes(date, unemploy))+geom_line()

data("presidential")
glimpse(presidential)
## Rows: 11
## Columns: 4
## $ name <chr> "Eisenhower", "Kennedy", "Johnson", "Nixon", "Ford", "Carter", "…
## $ start <date> 1953-01-20, 1961-01-20, 1963-11-22, 1969-01-20, 1974-08-09, 197…
## $ end <date> 1961-01-20, 1963-11-22, 1969-01-20, 1974-08-09, 1977-01-20, 198…
## $ party <chr> "Republican", "Democratic", "Democratic", "Republican", "Republi…
head(presidential)
## # A tibble: 6 × 4
## name start end party
## <chr> <date> <date> <chr>
## 1 Eisenhower 1953-01-20 1961-01-20 Republican
## 2 Kennedy 1961-01-20 1963-11-22 Democratic
## 3 Johnson 1963-11-22 1969-01-20 Democratic
## 4 Nixon 1969-01-20 1974-08-09 Republican
## 5 Ford 1974-08-09 1977-01-20 Republican
## 6 Carter 1977-01-20 1981-01-20 Democratic
head(economics)
## # A tibble: 6 × 6
## date pce pop psavert uempmed unemploy
## <date> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1967-07-01 507. 198712 12.6 4.5 2944
## 2 1967-08-01 510. 198911 12.6 4.7 2945
## 3 1967-09-01 516. 199113 11.9 4.6 2958
## 4 1967-10-01 512. 199311 12.9 4.9 3143
## 5 1967-11-01 517. 199498 12.8 4.7 3066
## 6 1967-12-01 525. 199657 11.8 4.8 3018
economics$date[1]
## [1] "1967-07-01"
presidential <- subset(presidential, start > economics$date[1]) #subset은 filter와 같은 기능을 하는 기본 함수
head(presidential)
## # A tibble: 6 × 4
## name start end party
## <chr> <date> <date> <chr>
## 1 Nixon 1969-01-20 1974-08-09 Republican
## 2 Ford 1974-08-09 1977-01-20 Republican
## 3 Carter 1977-01-20 1981-01-20 Democratic
## 4 Reagan 1981-01-20 1989-01-20 Republican
## 5 Bush 1989-01-20 1993-01-20 Republican
## 6 Clinton 1993-01-20 2001-01-20 Democratic
presidential1 <- presidential %>% filter(start>economics$date[1])
presidential1
## # A tibble: 8 × 4
## name start end party
## <chr> <date> <date> <chr>
## 1 Nixon 1969-01-20 1974-08-09 Republican
## 2 Ford 1974-08-09 1977-01-20 Republican
## 3 Carter 1977-01-20 1981-01-20 Democratic
## 4 Reagan 1981-01-20 1989-01-20 Republican
## 5 Bush 1989-01-20 1993-01-20 Republican
## 6 Clinton 1993-01-20 2001-01-20 Democratic
## 7 Bush 2001-01-20 2009-01-20 Republican
## 8 Obama 2009-01-20 2017-01-20 Democratic
ggplot(economics) +
geom_rect(
aes(xmin=start, xmax=end, fill= party),
ymin = -Inf, ymax=Inf, alpha = 0.7,
data = presidential1)+
geom_vline(
aes(xintercept = as.numeric(start)),
data = presidential1,
colour = "grey50", alpha = 0.5)+
geom_text(
aes(x=start, y=2500, label= name),
data = presidential1,
size = 3, vjust = 0, hjust = 0, nudge_x = 50)+
geom_line(aes(date, unemploy))+
scale_fill_manual(values = c("blue", "red"))

ggplot(diamonds, aes(log10(carat), log10(price))) +
geom_bin2d() +
facet_wrap(~cut, nrow =1)

mod_coef <- coef(lm(log10(price) ~log10(carat), data = diamonds))
mod_coef
## (Intercept) log10(carat)
## 3.669207 1.675817
ggplot(diamonds, aes(log10(carat), log10(price))) +
geom_bin2d() +
geom_abline(intercept = mod_coef[1], slope = mod_coef[2],
colour = "white", size = 1) +
facet_wrap(~cut, nrow =1)

data(Oxboys, package = "nlme")
head(Oxboys)
## Subject age height Occasion
## 1 1 -1.0000 140.5 1
## 2 1 -0.7479 143.4 2
## 3 1 -0.4630 144.8 3
## 4 1 -0.1643 147.1 4
## 5 1 -0.0027 147.7 5
## 6 1 0.2466 150.2 6
glimpse(Oxboys)
## Rows: 234
## Columns: 4
## $ Subject <ord> 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3…
## $ age <dbl> -1.0000, -0.7479, -0.4630, -0.1643, -0.0027, 0.2466, 0.5562, …
## $ height <dbl> 140.5, 143.4, 144.8, 147.1, 147.7, 150.2, 151.7, 153.3, 155.8…
## $ Occasion <ord> 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3…
ggplot(Oxboys, aes(age, height, group = Subject)) + #정상적인 결과
geom_point() +
geom_line()

ggplot(Oxboys, aes(age, height)) + # 오류로 이러한 결과가 나오면 안됨
geom_point() +
geom_line()

ggplot(Oxboys, aes(age, height, group = Subject)) + #정상적인 결과
geom_line() +
geom_smooth(method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'

ggplot(Oxboys, aes(age, height)) + #정상적인 결과
geom_line(aes(group = Subject)) +
geom_smooth(method = "lm", size = 2, se = FALSE)
## `geom_smooth()` using formula 'y ~ x'
