ggplot2

#3.4 Annotations

library(ggplot2)
data("economics")
rm(list=ls())
ls() # 객체이름이 무엇인지 보는 함수

## character(0)

library(dplyr)

## 
## 다음의 패키지를 부착합니다: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

glimpse(economics)

## Rows: 574
## Columns: 6
## $ date     <date> 1967-07-01, 1967-08-01, 1967-09-01, 1967-10-01, 1967-11-01, …
## $ pce      <dbl> 506.7, 509.8, 515.6, 512.2, 517.4, 525.1, 530.9, 533.6, 544.3…
## $ pop      <dbl> 198712, 198911, 199113, 199311, 199498, 199657, 199808, 19992…
## $ psavert  <dbl> 12.6, 12.6, 11.9, 12.9, 12.8, 11.8, 11.7, 12.3, 11.7, 12.3, 1…
## $ uempmed  <dbl> 4.5, 4.7, 4.6, 4.9, 4.7, 4.8, 5.1, 4.5, 4.1, 4.6, 4.4, 4.4, 4…
## $ unemploy <dbl> 2944, 2945, 2958, 3143, 3066, 3018, 2878, 3001, 2877, 2709, 2…

ggplot(economics, aes(date, unemploy))+geom_line()

data("presidential")
glimpse(presidential)

## Rows: 11
## Columns: 4
## $ name  <chr> "Eisenhower", "Kennedy", "Johnson", "Nixon", "Ford", "Carter", "…
## $ start <date> 1953-01-20, 1961-01-20, 1963-11-22, 1969-01-20, 1974-08-09, 197…
## $ end   <date> 1961-01-20, 1963-11-22, 1969-01-20, 1974-08-09, 1977-01-20, 198…
## $ party <chr> "Republican", "Democratic", "Democratic", "Republican", "Republi…

head(presidential)

## # A tibble: 6 × 4
##   name       start      end        party     
##   <chr>      <date>     <date>     <chr>     
## 1 Eisenhower 1953-01-20 1961-01-20 Republican
## 2 Kennedy    1961-01-20 1963-11-22 Democratic
## 3 Johnson    1963-11-22 1969-01-20 Democratic
## 4 Nixon      1969-01-20 1974-08-09 Republican
## 5 Ford       1974-08-09 1977-01-20 Republican
## 6 Carter     1977-01-20 1981-01-20 Democratic

head(economics)

## # A tibble: 6 × 6
##   date         pce    pop psavert uempmed unemploy
##   <date>     <dbl>  <dbl>   <dbl>   <dbl>    <dbl>
## 1 1967-07-01  507. 198712    12.6     4.5     2944
## 2 1967-08-01  510. 198911    12.6     4.7     2945
## 3 1967-09-01  516. 199113    11.9     4.6     2958
## 4 1967-10-01  512. 199311    12.9     4.9     3143
## 5 1967-11-01  517. 199498    12.8     4.7     3066
## 6 1967-12-01  525. 199657    11.8     4.8     3018

economics$date[1]

## [1] "1967-07-01"

presidential <- subset(presidential, start > economics$date[1]) #subset은 filter와 같은 기능을 하는 기본 함수
head(presidential)

## # A tibble: 6 × 4
##   name    start      end        party     
##   <chr>   <date>     <date>     <chr>     
## 1 Nixon   1969-01-20 1974-08-09 Republican
## 2 Ford    1974-08-09 1977-01-20 Republican
## 3 Carter  1977-01-20 1981-01-20 Democratic
## 4 Reagan  1981-01-20 1989-01-20 Republican
## 5 Bush    1989-01-20 1993-01-20 Republican
## 6 Clinton 1993-01-20 2001-01-20 Democratic

presidential1 <- presidential %>% filter(start>economics$date[1])
presidential1

## # A tibble: 8 × 4
##   name    start      end        party     
##   <chr>   <date>     <date>     <chr>     
## 1 Nixon   1969-01-20 1974-08-09 Republican
## 2 Ford    1974-08-09 1977-01-20 Republican
## 3 Carter  1977-01-20 1981-01-20 Democratic
## 4 Reagan  1981-01-20 1989-01-20 Republican
## 5 Bush    1989-01-20 1993-01-20 Republican
## 6 Clinton 1993-01-20 2001-01-20 Democratic
## 7 Bush    2001-01-20 2009-01-20 Republican
## 8 Obama   2009-01-20 2017-01-20 Democratic

ggplot(economics) +
  geom_rect(
    aes(xmin=start, xmax=end, fill= party),
    ymin = -Inf, ymax=Inf, alpha = 0.7,
    data = presidential1)+
  geom_vline(
    aes(xintercept = as.numeric(start)),
    data = presidential1,
    colour = "grey50", alpha = 0.5)+
  geom_text(
    aes(x=start, y=2500, label= name),
    data = presidential1,
    size = 3, vjust = 0, hjust = 0, nudge_x = 50)+
  geom_line(aes(date, unemploy))+
  scale_fill_manual(values = c("blue", "red"))

ggplot(diamonds, aes(log10(carat), log10(price))) +
  geom_bin2d() +
  facet_wrap(~cut, nrow =1)

mod_coef <- coef(lm(log10(price) ~log10(carat), data = diamonds))
mod_coef

##  (Intercept) log10(carat) 
##     3.669207     1.675817

ggplot(diamonds, aes(log10(carat), log10(price))) +
  geom_bin2d() +
  geom_abline(intercept = mod_coef[1], slope = mod_coef[2], 
              colour = "white", size = 1) +
  facet_wrap(~cut, nrow =1)

data(Oxboys, package = "nlme")
head(Oxboys)

##   Subject     age height Occasion
## 1       1 -1.0000  140.5        1
## 2       1 -0.7479  143.4        2
## 3       1 -0.4630  144.8        3
## 4       1 -0.1643  147.1        4
## 5       1 -0.0027  147.7        5
## 6       1  0.2466  150.2        6

glimpse(Oxboys)

## Rows: 234
## Columns: 4
## $ Subject  <ord> 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3…
## $ age      <dbl> -1.0000, -0.7479, -0.4630, -0.1643, -0.0027, 0.2466, 0.5562, …
## $ height   <dbl> 140.5, 143.4, 144.8, 147.1, 147.7, 150.2, 151.7, 153.3, 155.8…
## $ Occasion <ord> 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3…

ggplot(Oxboys, aes(age, height, group = Subject)) + #정상적인 결과
  geom_point() +
  geom_line()

ggplot(Oxboys, aes(age, height)) +  # 오류로 이러한 결과가 나오면 안됨
  geom_point() +
  geom_line()

ggplot(Oxboys, aes(age, height, group = Subject)) + #정상적인 결과
  geom_line() +
  geom_smooth(method = "lm", se = FALSE)

## `geom_smooth()` using formula 'y ~ x'

ggplot(Oxboys, aes(age, height)) + #정상적인 결과
  geom_line(aes(group = Subject)) +
  geom_smooth(method = "lm", size = 2, se = FALSE)

## `geom_smooth()` using formula 'y ~ x'

ggplot2_002

이동건

2022-10-22