rm(list=ls())
library(ggplot2)
data("economics")
library(dplyr)
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
glimpse(economics)
## Rows: 574
## Columns: 6
## $ date <date> 1967-07-01, 1967-08-01, 1967-09-01, 1967-10-01, 1967-11-01, …
## $ pce <dbl> 506.7, 509.8, 515.6, 512.2, 517.4, 525.1, 530.9, 533.6, 544.3…
## $ pop <dbl> 198712, 198911, 199113, 199311, 199498, 199657, 199808, 19992…
## $ psavert <dbl> 12.6, 12.6, 11.9, 12.9, 12.8, 11.8, 11.7, 12.3, 11.7, 12.3, 1…
## $ uempmed <dbl> 4.5, 4.7, 4.6, 4.9, 4.7, 4.8, 5.1, 4.5, 4.1, 4.6, 4.4, 4.4, 4…
## $ unemploy <dbl> 2944, 2945, 2958, 3143, 3066, 3018, 2878, 3001, 2877, 2709, 2…
ggplot(economics,aes(date,unemploy))+geom_line()

data("presidential")
glimpse(presidential)
## Rows: 11
## Columns: 4
## $ name <chr> "Eisenhower", "Kennedy", "Johnson", "Nixon", "Ford", "Carter", "…
## $ start <date> 1953-01-20, 1961-01-20, 1963-11-22, 1969-01-20, 1974-08-09, 197…
## $ end <date> 1961-01-20, 1963-11-22, 1969-01-20, 1974-08-09, 1977-01-20, 198…
## $ party <chr> "Republican", "Democratic", "Democratic", "Republican", "Republi…
head(presidential)
## # A tibble: 6 × 4
## name start end party
## <chr> <date> <date> <chr>
## 1 Eisenhower 1953-01-20 1961-01-20 Republican
## 2 Kennedy 1961-01-20 1963-11-22 Democratic
## 3 Johnson 1963-11-22 1969-01-20 Democratic
## 4 Nixon 1969-01-20 1974-08-09 Republican
## 5 Ford 1974-08-09 1977-01-20 Republican
## 6 Carter 1977-01-20 1981-01-20 Democratic
head(economics)
## # A tibble: 6 × 6
## date pce pop psavert uempmed unemploy
## <date> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1967-07-01 507. 198712 12.6 4.5 2944
## 2 1967-08-01 510. 198911 12.6 4.7 2945
## 3 1967-09-01 516. 199113 11.9 4.6 2958
## 4 1967-10-01 512. 199311 12.9 4.9 3143
## 5 1967-11-01 517. 199498 12.8 4.7 3066
## 6 1967-12-01 525. 199657 11.8 4.8 3018
economics$date[1]
## [1] "1967-07-01"
presidential <- subset(presidential, start > economics$date[1])
head(presidential)
## # A tibble: 6 × 4
## name start end party
## <chr> <date> <date> <chr>
## 1 Nixon 1969-01-20 1974-08-09 Republican
## 2 Ford 1974-08-09 1977-01-20 Republican
## 3 Carter 1977-01-20 1981-01-20 Democratic
## 4 Reagan 1981-01-20 1989-01-20 Republican
## 5 Bush 1989-01-20 1993-01-20 Republican
## 6 Clinton 1993-01-20 2001-01-20 Democratic
presidential
## # A tibble: 8 × 4
## name start end party
## <chr> <date> <date> <chr>
## 1 Nixon 1969-01-20 1974-08-09 Republican
## 2 Ford 1974-08-09 1977-01-20 Republican
## 3 Carter 1977-01-20 1981-01-20 Democratic
## 4 Reagan 1981-01-20 1989-01-20 Republican
## 5 Bush 1989-01-20 1993-01-20 Republican
## 6 Clinton 1993-01-20 2001-01-20 Democratic
## 7 Bush 2001-01-20 2009-01-20 Republican
## 8 Obama 2009-01-20 2017-01-20 Democratic
presidential1<-presidential %>% filter(start>economics$date[1])
presidential1
## # A tibble: 8 × 4
## name start end party
## <chr> <date> <date> <chr>
## 1 Nixon 1969-01-20 1974-08-09 Republican
## 2 Ford 1974-08-09 1977-01-20 Republican
## 3 Carter 1977-01-20 1981-01-20 Democratic
## 4 Reagan 1981-01-20 1989-01-20 Republican
## 5 Bush 1989-01-20 1993-01-20 Republican
## 6 Clinton 1993-01-20 2001-01-20 Democratic
## 7 Bush 2001-01-20 2009-01-20 Republican
## 8 Obama 2009-01-20 2017-01-20 Democratic
ggplot(economics) +
geom_rect(
aes(xmin = start, xmax = end, fill = party),
ymin = -Inf, ymax = Inf, alpha = 0.7,
data = presidential)+
geom_vline(
aes(xintercept = as.numeric(start)),
data = presidential,
colour = "grey50", alpha = 0.5)+
geom_text(
aes(x = start, y = 2500, label = name),
data = presidential,
size = 3, vjust = 0, hjust = 0, nudge_x = 50)+
geom_line(aes(date, unemploy)) +
scale_fill_manual(values = c("blue", "red"))

# https://tidyverse.github.io/ggplot2-docs/reference/geom_text.html
yrng <- range(economics$unemploy)
xrng <- range(economics$date)
caption <- paste(strwrap("Unemployment rates in the US have
varied a lot over the years", 40), collapse = '\n')
ggplot(diamonds, aes(log10(carat), log10(price))) +
geom_bin2d()+facet_wrap(~cut, nrow = 1)

mod_coef <- coef(lm(log10(price) ~log10(carat), data = diamonds))
mod_coef
## (Intercept) log10(carat)
## 3.669207 1.675817
ggplot(diamonds, aes(log10(carat), log10(price))) +
geom_bin2d() +
geom_abline(intercept = mod_coef[1], slope = mod_coef[2],
colour = "white", size = 1) +
facet_wrap(~cut, nrow = 1)

# 3.5 Collective Geoms
# https://rdrr.io/cran/mlmRev/man/Oxboys.html
data(Oxboys, package = "nlme")
head(Oxboys)
## Subject age height Occasion
## 1 1 -1.0000 140.5 1
## 2 1 -0.7479 143.4 2
## 3 1 -0.4630 144.8 3
## 4 1 -0.1643 147.1 4
## 5 1 -0.0027 147.7 5
## 6 1 0.2466 150.2 6
glimpse(Oxboys)
## Rows: 234
## Columns: 4
## $ Subject <ord> 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3…
## $ age <dbl> -1.0000, -0.7479, -0.4630, -0.1643, -0.0027, 0.2466, 0.5562, …
## $ height <dbl> 140.5, 143.4, 144.8, 147.1, 147.7, 150.2, 151.7, 153.3, 155.8…
## $ Occasion <ord> 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3…
summary(Oxboys)
## Subject age height Occasion
## 10 : 9 Min. :-1.00000 Min. :126.2 1 :26
## 26 : 9 1st Qu.:-0.46300 1st Qu.:143.8 2 :26
## 25 : 9 Median :-0.00270 Median :149.5 3 :26
## 9 : 9 Mean : 0.02263 Mean :149.5 4 :26
## 2 : 9 3rd Qu.: 0.55620 3rd Qu.:155.5 5 :26
## 6 : 9 Max. : 1.00550 Max. :174.8 6 :26
## (Other):180 (Other):78
ggplot(Oxboys, aes(age, height, group = Subject)) +
geom_point() +
geom_line()

getwd()
## [1] "C:/data_R"
setwd('c:/data_R')
library(dplyr)
library(readxl)
airseoul<-read_excel("period1.xlsx")
str(airseoul)
## tibble [1,535 × 8] (S3: tbl_df/tbl/data.frame)
## $ 날짜 : chr [1:1535] "전체" "2022-03-31" "2022-03-31" "2022-03-31" ...
## $ 측정소명 : chr [1:1535] "평균" "평균" "강남구" "강동구" ...
## $ 미세먼지 PM10
## (㎍/m3) : num [1:1535] 41 20 21 26 NA 23 19 21 23 17 ...
## $ 초미세먼지
## PM2.5 (㎍/m3): num [1:1535] 23 11 10 13 17 9 9 10 10 9 ...
## $ 오존
## O3 (ppm) : chr [1:1535] "0.026" "0.032" "0.033" "0.026" ...
## $ 이산화질소
## NO2 (ppm) : chr [1:1535] "0.026" "0.014" "0.015" "0.014" ...
## $ 일산화탄소
## CO (ppm) : chr [1:1535] "0.5" "0.3" "0.3" "0.3" ...
## $ 아황산가스
## SO2(ppm) : num [1:1535] 0.003 0.002 0.003 0.002 0.003 0.003 0.002 0.003 0.003 0.002 ...
names(airseoul)
## [1] "날짜" "측정소명"
## [3] "미세먼지 PM10\r\n(㎍/m3)" "초미세먼지\r\nPM2.5 (㎍/m3)"
## [5] "오존\r\nO3 (ppm)" "이산화질소\r\nNO2 (ppm)"
## [7] "일산화탄소\r\nCO (ppm)" "아황산가스\r\nSO2(ppm)"
airseoul1<-airseoul %>%
rename(date="날짜",
region="측정소명",
pm10="미세먼지 PM10\r\n(㎍/m3)",
pm2.5="초미세먼지\r\nPM2.5 (㎍/m3)") %>%
select(date,region,pm10,pm2.5)
table(airseoul1$date)
##
## 2022-02-01 2022-02-02 2022-02-03 2022-02-04 2022-02-05 2022-02-06 2022-02-07
## 26 26 26 26 26 26 26
## 2022-02-08 2022-02-09 2022-02-10 2022-02-11 2022-02-12 2022-02-13 2022-02-14
## 26 26 26 26 26 26 26
## 2022-02-15 2022-02-16 2022-02-17 2022-02-18 2022-02-19 2022-02-20 2022-02-21
## 26 26 26 26 26 26 26
## 2022-02-22 2022-02-23 2022-02-24 2022-02-25 2022-02-26 2022-02-27 2022-02-28
## 26 26 26 26 26 26 26
## 2022-03-01 2022-03-02 2022-03-03 2022-03-04 2022-03-05 2022-03-06 2022-03-07
## 26 26 26 26 26 26 26
## 2022-03-08 2022-03-09 2022-03-10 2022-03-11 2022-03-12 2022-03-13 2022-03-14
## 26 26 26 26 26 26 26
## 2022-03-15 2022-03-16 2022-03-17 2022-03-18 2022-03-19 2022-03-20 2022-03-21
## 26 26 26 26 26 26 26
## 2022-03-22 2022-03-23 2022-03-24 2022-03-25 2022-03-26 2022-03-27 2022-03-28
## 26 26 26 26 26 26 26
## 2022-03-29 2022-03-30 2022-03-31 전체
## 26 26 26 1
table(airseoul1$region)
##
## 강남구 강동구 강북구 강서구 관악구 광진구 구로구 금천구
## 59 59 59 59 59 59 59 59
## 노원구 도봉구 동대문구 동작구 마포구 서대문구 서초구 성동구
## 59 59 59 59 59 59 59 59
## 성북구 송파구 양천구 영등포구 용산구 은평구 종로구 중구
## 59 59 59 59 59 59 59 59
## 중랑구 평균
## 59 60
airseoul1<-airseoul1 %>% filter(date!="전체"®ion!="평균")
table(airseoul1$date)
##
## 2022-02-01 2022-02-02 2022-02-03 2022-02-04 2022-02-05 2022-02-06 2022-02-07
## 25 25 25 25 25 25 25
## 2022-02-08 2022-02-09 2022-02-10 2022-02-11 2022-02-12 2022-02-13 2022-02-14
## 25 25 25 25 25 25 25
## 2022-02-15 2022-02-16 2022-02-17 2022-02-18 2022-02-19 2022-02-20 2022-02-21
## 25 25 25 25 25 25 25
## 2022-02-22 2022-02-23 2022-02-24 2022-02-25 2022-02-26 2022-02-27 2022-02-28
## 25 25 25 25 25 25 25
## 2022-03-01 2022-03-02 2022-03-03 2022-03-04 2022-03-05 2022-03-06 2022-03-07
## 25 25 25 25 25 25 25
## 2022-03-08 2022-03-09 2022-03-10 2022-03-11 2022-03-12 2022-03-13 2022-03-14
## 25 25 25 25 25 25 25
## 2022-03-15 2022-03-16 2022-03-17 2022-03-18 2022-03-19 2022-03-20 2022-03-21
## 25 25 25 25 25 25 25
## 2022-03-22 2022-03-23 2022-03-24 2022-03-25 2022-03-26 2022-03-27 2022-03-28
## 25 25 25 25 25 25 25
## 2022-03-29 2022-03-30 2022-03-31
## 25 25 25
table(airseoul1$region)
##
## 강남구 강동구 강북구 강서구 관악구 광진구 구로구 금천구
## 59 59 59 59 59 59 59 59
## 노원구 도봉구 동대문구 동작구 마포구 서대문구 서초구 성동구
## 59 59 59 59 59 59 59 59
## 성북구 송파구 양천구 영등포구 용산구 은평구 종로구 중구
## 59 59 59 59 59 59 59 59
## 중랑구
## 59
summary(airseoul1$pm10)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 3.00 27.00 36.00 40.54 50.00 112.00 7
summary(airseoul1$pm2.5)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.00 13.00 19.00 23.28 32.00 92.00 3
airseoul1<-airseoul1 %>% filter(!is.na(pm10)&!is.na(pm2.5))
# 1
#01)
airseoul1 %>% filter(pm10==max(pm10)) %>%select(date,region,pm10)
## # A tibble: 1 × 3
## date region pm10
## <chr> <chr> <dbl>
## 1 2022-03-05 구로구 112
# A tibble : 1 x 3
#02)
airseoul1 %>% group_by(region) %>% summarize(m=mean(pm10)) %>%
arrange(desc(m)) %>% head(5)
## # A tibble: 5 × 2
## region m
## <chr> <dbl>
## 1 양천구 44.4
## 2 강북구 44.2
## 3 강서구 43.8
## 4 노원구 43.7
## 5 강동구 43.6
# A tibble : 5 x 2