rm(list=ls())
library(ggplot2)
data("economics")
library(dplyr)
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
glimpse(economics)
## Rows: 574
## Columns: 6
## $ date     <date> 1967-07-01, 1967-08-01, 1967-09-01, 1967-10-01, 1967-11-01, …
## $ pce      <dbl> 506.7, 509.8, 515.6, 512.2, 517.4, 525.1, 530.9, 533.6, 544.3…
## $ pop      <dbl> 198712, 198911, 199113, 199311, 199498, 199657, 199808, 19992…
## $ psavert  <dbl> 12.6, 12.6, 11.9, 12.9, 12.8, 11.8, 11.7, 12.3, 11.7, 12.3, 1…
## $ uempmed  <dbl> 4.5, 4.7, 4.6, 4.9, 4.7, 4.8, 5.1, 4.5, 4.1, 4.6, 4.4, 4.4, 4…
## $ unemploy <dbl> 2944, 2945, 2958, 3143, 3066, 3018, 2878, 3001, 2877, 2709, 2…
ggplot(economics,aes(date,unemploy))+geom_line()

data("presidential")
glimpse(presidential)
## Rows: 11
## Columns: 4
## $ name  <chr> "Eisenhower", "Kennedy", "Johnson", "Nixon", "Ford", "Carter", "…
## $ start <date> 1953-01-20, 1961-01-20, 1963-11-22, 1969-01-20, 1974-08-09, 197…
## $ end   <date> 1961-01-20, 1963-11-22, 1969-01-20, 1974-08-09, 1977-01-20, 198…
## $ party <chr> "Republican", "Democratic", "Democratic", "Republican", "Republi…
head(presidential)
## # A tibble: 6 × 4
##   name       start      end        party     
##   <chr>      <date>     <date>     <chr>     
## 1 Eisenhower 1953-01-20 1961-01-20 Republican
## 2 Kennedy    1961-01-20 1963-11-22 Democratic
## 3 Johnson    1963-11-22 1969-01-20 Democratic
## 4 Nixon      1969-01-20 1974-08-09 Republican
## 5 Ford       1974-08-09 1977-01-20 Republican
## 6 Carter     1977-01-20 1981-01-20 Democratic
head(economics)
## # A tibble: 6 × 6
##   date         pce    pop psavert uempmed unemploy
##   <date>     <dbl>  <dbl>   <dbl>   <dbl>    <dbl>
## 1 1967-07-01  507. 198712    12.6     4.5     2944
## 2 1967-08-01  510. 198911    12.6     4.7     2945
## 3 1967-09-01  516. 199113    11.9     4.6     2958
## 4 1967-10-01  512. 199311    12.9     4.9     3143
## 5 1967-11-01  517. 199498    12.8     4.7     3066
## 6 1967-12-01  525. 199657    11.8     4.8     3018
economics$date[1]
## [1] "1967-07-01"
presidential <- subset(presidential, start > economics$date[1])
head(presidential)
## # A tibble: 6 × 4
##   name    start      end        party     
##   <chr>   <date>     <date>     <chr>     
## 1 Nixon   1969-01-20 1974-08-09 Republican
## 2 Ford    1974-08-09 1977-01-20 Republican
## 3 Carter  1977-01-20 1981-01-20 Democratic
## 4 Reagan  1981-01-20 1989-01-20 Republican
## 5 Bush    1989-01-20 1993-01-20 Republican
## 6 Clinton 1993-01-20 2001-01-20 Democratic
presidential
## # A tibble: 8 × 4
##   name    start      end        party     
##   <chr>   <date>     <date>     <chr>     
## 1 Nixon   1969-01-20 1974-08-09 Republican
## 2 Ford    1974-08-09 1977-01-20 Republican
## 3 Carter  1977-01-20 1981-01-20 Democratic
## 4 Reagan  1981-01-20 1989-01-20 Republican
## 5 Bush    1989-01-20 1993-01-20 Republican
## 6 Clinton 1993-01-20 2001-01-20 Democratic
## 7 Bush    2001-01-20 2009-01-20 Republican
## 8 Obama   2009-01-20 2017-01-20 Democratic
presidential1<-presidential %>% filter(start>economics$date[1])
presidential1
## # A tibble: 8 × 4
##   name    start      end        party     
##   <chr>   <date>     <date>     <chr>     
## 1 Nixon   1969-01-20 1974-08-09 Republican
## 2 Ford    1974-08-09 1977-01-20 Republican
## 3 Carter  1977-01-20 1981-01-20 Democratic
## 4 Reagan  1981-01-20 1989-01-20 Republican
## 5 Bush    1989-01-20 1993-01-20 Republican
## 6 Clinton 1993-01-20 2001-01-20 Democratic
## 7 Bush    2001-01-20 2009-01-20 Republican
## 8 Obama   2009-01-20 2017-01-20 Democratic
ggplot(economics) +
  geom_rect(
    aes(xmin = start, xmax = end, fill = party),
    ymin = -Inf, ymax = Inf, alpha = 0.7,
    data = presidential)+
  geom_vline(
    aes(xintercept = as.numeric(start)),
    data = presidential,
    colour = "grey50", alpha = 0.5)+
  geom_text(
    aes(x = start, y = 2500, label = name),
    data = presidential,
    size = 3, vjust = 0, hjust = 0, nudge_x = 50)+
  geom_line(aes(date, unemploy)) +
  scale_fill_manual(values = c("blue", "red"))

# https://tidyverse.github.io/ggplot2-docs/reference/geom_text.html

yrng <- range(economics$unemploy)
xrng <- range(economics$date)

caption <- paste(strwrap("Unemployment rates in the US have
                           varied a lot over the years", 40), collapse = '\n')



ggplot(diamonds, aes(log10(carat), log10(price))) +
  geom_bin2d()+facet_wrap(~cut, nrow = 1)

mod_coef <- coef(lm(log10(price) ~log10(carat), data = diamonds))
mod_coef
##  (Intercept) log10(carat) 
##     3.669207     1.675817
ggplot(diamonds, aes(log10(carat), log10(price))) +
  geom_bin2d() +
  geom_abline(intercept = mod_coef[1], slope = mod_coef[2],
              colour = "white", size = 1) +
  facet_wrap(~cut, nrow = 1)

# 3.5 Collective Geoms
# https://rdrr.io/cran/mlmRev/man/Oxboys.html
data(Oxboys, package = "nlme")
head(Oxboys)
##   Subject     age height Occasion
## 1       1 -1.0000  140.5        1
## 2       1 -0.7479  143.4        2
## 3       1 -0.4630  144.8        3
## 4       1 -0.1643  147.1        4
## 5       1 -0.0027  147.7        5
## 6       1  0.2466  150.2        6
glimpse(Oxboys)
## Rows: 234
## Columns: 4
## $ Subject  <ord> 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3…
## $ age      <dbl> -1.0000, -0.7479, -0.4630, -0.1643, -0.0027, 0.2466, 0.5562, …
## $ height   <dbl> 140.5, 143.4, 144.8, 147.1, 147.7, 150.2, 151.7, 153.3, 155.8…
## $ Occasion <ord> 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3…
summary(Oxboys)
##     Subject         age               height         Occasion 
##  10     :  9   Min.   :-1.00000   Min.   :126.2   1      :26  
##  26     :  9   1st Qu.:-0.46300   1st Qu.:143.8   2      :26  
##  25     :  9   Median :-0.00270   Median :149.5   3      :26  
##  9      :  9   Mean   : 0.02263   Mean   :149.5   4      :26  
##  2      :  9   3rd Qu.: 0.55620   3rd Qu.:155.5   5      :26  
##  6      :  9   Max.   : 1.00550   Max.   :174.8   6      :26  
##  (Other):180                                      (Other):78
ggplot(Oxboys, aes(age, height, group = Subject)) +
  geom_point() +
  geom_line()

getwd()
## [1] "C:/data_R"
setwd('c:/data_R')
library(dplyr)
library(readxl)
airseoul<-read_excel("period1.xlsx")
str(airseoul)
## tibble [1,535 × 8] (S3: tbl_df/tbl/data.frame)
##  $ 날짜                       : chr [1:1535] "전체" "2022-03-31" "2022-03-31" "2022-03-31" ...
##  $ 측정소명                   : chr [1:1535] "평균" "평균" "강남구" "강동구" ...
##  $ 미세먼지 PM10
## (㎍/m3)   : num [1:1535] 41 20 21 26 NA 23 19 21 23 17 ...
##  $ 초미세먼지
## PM2.5 (㎍/m3): num [1:1535] 23 11 10 13 17 9 9 10 10 9 ...
##  $ 오존
## O3 (ppm)           : chr [1:1535] "0.026" "0.032" "0.033" "0.026" ...
##  $ 이산화질소
## NO2 (ppm)    : chr [1:1535] "0.026" "0.014" "0.015" "0.014" ...
##  $ 일산화탄소
## CO (ppm)     : chr [1:1535] "0.5" "0.3" "0.3" "0.3" ...
##  $ 아황산가스
## SO2(ppm)     : num [1:1535] 0.003 0.002 0.003 0.002 0.003 0.003 0.002 0.003 0.003 0.002 ...
names(airseoul)
## [1] "날짜"                        "측정소명"                   
## [3] "미세먼지 PM10\r\n(㎍/m3)"    "초미세먼지\r\nPM2.5 (㎍/m3)"
## [5] "오존\r\nO3 (ppm)"            "이산화질소\r\nNO2 (ppm)"    
## [7] "일산화탄소\r\nCO (ppm)"      "아황산가스\r\nSO2(ppm)"
airseoul1<-airseoul %>% 
  rename(date="날짜",
         region="측정소명",
         pm10="미세먼지 PM10\r\n(㎍/m3)",
         pm2.5="초미세먼지\r\nPM2.5 (㎍/m3)") %>% 
  select(date,region,pm10,pm2.5)

table(airseoul1$date)
## 
## 2022-02-01 2022-02-02 2022-02-03 2022-02-04 2022-02-05 2022-02-06 2022-02-07 
##         26         26         26         26         26         26         26 
## 2022-02-08 2022-02-09 2022-02-10 2022-02-11 2022-02-12 2022-02-13 2022-02-14 
##         26         26         26         26         26         26         26 
## 2022-02-15 2022-02-16 2022-02-17 2022-02-18 2022-02-19 2022-02-20 2022-02-21 
##         26         26         26         26         26         26         26 
## 2022-02-22 2022-02-23 2022-02-24 2022-02-25 2022-02-26 2022-02-27 2022-02-28 
##         26         26         26         26         26         26         26 
## 2022-03-01 2022-03-02 2022-03-03 2022-03-04 2022-03-05 2022-03-06 2022-03-07 
##         26         26         26         26         26         26         26 
## 2022-03-08 2022-03-09 2022-03-10 2022-03-11 2022-03-12 2022-03-13 2022-03-14 
##         26         26         26         26         26         26         26 
## 2022-03-15 2022-03-16 2022-03-17 2022-03-18 2022-03-19 2022-03-20 2022-03-21 
##         26         26         26         26         26         26         26 
## 2022-03-22 2022-03-23 2022-03-24 2022-03-25 2022-03-26 2022-03-27 2022-03-28 
##         26         26         26         26         26         26         26 
## 2022-03-29 2022-03-30 2022-03-31       전체 
##         26         26         26          1
table(airseoul1$region)
## 
##   강남구   강동구   강북구   강서구   관악구   광진구   구로구   금천구 
##       59       59       59       59       59       59       59       59 
##   노원구   도봉구 동대문구   동작구   마포구 서대문구   서초구   성동구 
##       59       59       59       59       59       59       59       59 
##   성북구   송파구   양천구 영등포구   용산구   은평구   종로구     중구 
##       59       59       59       59       59       59       59       59 
##   중랑구     평균 
##       59       60
airseoul1<-airseoul1 %>% filter(date!="전체"&region!="평균")
table(airseoul1$date)
## 
## 2022-02-01 2022-02-02 2022-02-03 2022-02-04 2022-02-05 2022-02-06 2022-02-07 
##         25         25         25         25         25         25         25 
## 2022-02-08 2022-02-09 2022-02-10 2022-02-11 2022-02-12 2022-02-13 2022-02-14 
##         25         25         25         25         25         25         25 
## 2022-02-15 2022-02-16 2022-02-17 2022-02-18 2022-02-19 2022-02-20 2022-02-21 
##         25         25         25         25         25         25         25 
## 2022-02-22 2022-02-23 2022-02-24 2022-02-25 2022-02-26 2022-02-27 2022-02-28 
##         25         25         25         25         25         25         25 
## 2022-03-01 2022-03-02 2022-03-03 2022-03-04 2022-03-05 2022-03-06 2022-03-07 
##         25         25         25         25         25         25         25 
## 2022-03-08 2022-03-09 2022-03-10 2022-03-11 2022-03-12 2022-03-13 2022-03-14 
##         25         25         25         25         25         25         25 
## 2022-03-15 2022-03-16 2022-03-17 2022-03-18 2022-03-19 2022-03-20 2022-03-21 
##         25         25         25         25         25         25         25 
## 2022-03-22 2022-03-23 2022-03-24 2022-03-25 2022-03-26 2022-03-27 2022-03-28 
##         25         25         25         25         25         25         25 
## 2022-03-29 2022-03-30 2022-03-31 
##         25         25         25
table(airseoul1$region)
## 
##   강남구   강동구   강북구   강서구   관악구   광진구   구로구   금천구 
##       59       59       59       59       59       59       59       59 
##   노원구   도봉구 동대문구   동작구   마포구 서대문구   서초구   성동구 
##       59       59       59       59       59       59       59       59 
##   성북구   송파구   양천구 영등포구   용산구   은평구   종로구     중구 
##       59       59       59       59       59       59       59       59 
##   중랑구 
##       59
summary(airseoul1$pm10)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    3.00   27.00   36.00   40.54   50.00  112.00       7
summary(airseoul1$pm2.5)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    1.00   13.00   19.00   23.28   32.00   92.00       3
airseoul1<-airseoul1 %>% filter(!is.na(pm10)&!is.na(pm2.5))
# 1
#01)
airseoul1 %>% filter(pm10==max(pm10)) %>%select(date,region,pm10)
## # A tibble: 1 × 3
##   date       region  pm10
##   <chr>      <chr>  <dbl>
## 1 2022-03-05 구로구   112
# A tibble : 1 x 3
#02)
airseoul1 %>% group_by(region) %>% summarize(m=mean(pm10)) %>%
  arrange(desc(m)) %>% head(5)
## # A tibble: 5 × 2
##   region     m
##   <chr>  <dbl>
## 1 양천구  44.4
## 2 강북구  44.2
## 3 강서구  43.8
## 4 노원구  43.7
## 5 강동구  43.6
# A tibble : 5 x 2