getwd()
## [1] "C:/Users/samsung/Desktop"
rm=list(ls())
setwd('c:/data')

library(dplyr)
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
df<-read.csv("서울시.csv",fileEncoding="euc-kr")
glimpse(df)
## Rows: 14,390
## Columns: 8
## $ 측정월              <int> 202312, 202312, 202312, 202312, 202312, 202312, 20…
## $ 측정소명            <chr> "강남구", "강남대로", "강동구", "강변북로", "강북…
## $ 이산화질소농도.ppm. <dbl> 0.0357, 0.0394, 0.0372, 0.0378, 0.0264, 0.0403, 0.…
## $ 오존농도.ppm.       <dbl> 0.0127, 0.0088, 0.0119, 0.0104, 0.0157, 0.0136, 0.…
## $ 일산화탄소농도.ppm. <dbl> 0.53, 0.90, 0.64, 0.52, 0.59, 0.70, 0.67, 0.68, 0.…
## $ 아황산가스.ppm.     <dbl> 0.0034, 0.0033, 0.0032, 0.0037, 0.0027, 0.0037, 0.…
## $ 미세먼지.....       <int> 39, 48, 41, 43, 35, 44, 47, 46, 33, 37, 45, 40, 36…
## $ 초미세먼지.....     <int> 24, 28, 24, 28, 23, 23, 25, 26, 19, 23, 26, 26, 23…
names(df) # 변수 이름 출력
## [1] "측정월"              "측정소명"            "이산화질소농도.ppm."
## [4] "오존농도.ppm."       "일산화탄소농도.ppm." "아황산가스.ppm."    
## [7] "미세먼지....."       "초미세먼지....."
rename<-dplyr::rename
df<-df %>% rename(month="측정월",
                  region= "측정소명",
                  no2="이산화질소농도.ppm.",
                  ozone="오존농도.ppm.",
                  co="일산화탄소농도.ppm." ,
                  s="아황산가스.ppm." ,
                  pm10="미세먼지.....",
                  pm2.5="초미세먼지.....") %>% 
  select(month,region,no2,ozone,co,s,pm10,pm2.5)
glimpse(df)
## Rows: 14,390
## Columns: 8
## $ month  <int> 202312, 202312, 202312, 202312, 202312, 202312, 202312, 202312,…
## $ region <chr> "강남구", "강남대로", "강동구", "강변북로", "강북구", "강서구",…
## $ no2    <dbl> 0.0357, 0.0394, 0.0372, 0.0378, 0.0264, 0.0403, 0.0406, 0.0385,…
## $ ozone  <dbl> 0.0127, 0.0088, 0.0119, 0.0104, 0.0157, 0.0136, 0.0100, 0.0128,…
## $ co     <dbl> 0.53, 0.90, 0.64, 0.52, 0.59, 0.70, 0.67, 0.68, 0.56, 0.76, 0.5…
## $ s      <dbl> 0.0034, 0.0033, 0.0032, 0.0037, 0.0027, 0.0037, 0.0032, 0.0031,…
## $ pm10   <int> 39, 48, 41, 43, 35, 44, 47, 46, 33, 37, 45, 40, 36, 42, 37, 49,…
## $ pm2.5  <int> 24, 28, 24, 28, 23, 23, 25, 26, 19, 23, 26, 26, 23, 28, 25, 31,…
# 결측치 유무확인
colSums(is.na(df))
##  month region    no2  ozone     co      s   pm10  pm2.5 
##      0      0    474    736    499    758   2422   8620
# 결측값 전체 삭제
df<-na.omit(df)

# 결측값 재확인
colSums(is.na(df))
##  month region    no2  ozone     co      s   pm10  pm2.5 
##      0      0      0      0      0      0      0      0
summary(df)
##      month           region               no2              ozone        
##  Min.   :201201   Length:5755        Min.   :0.00400   Min.   :0.00400  
##  1st Qu.:201602   Class :character   1st Qu.:0.02260   1st Qu.:0.01400  
##  Median :201902   Mode  :character   Median :0.03100   Median :0.02200  
##  Mean   :201839                      Mean   :0.03144   Mean   :0.02308  
##  3rd Qu.:202108                      3rd Qu.:0.03900   3rd Qu.:0.03045  
##  Max.   :202312                      Max.   :0.09000   Max.   :0.06500  
##        co               s                 pm10            pm2.5      
##  Min.   :0.1000   Min.   :0.001000   Min.   : 10.00   Min.   : 5.00  
##  1st Qu.:0.4000   1st Qu.:0.003000   1st Qu.: 31.00   1st Qu.:17.00  
##  Median :0.5000   Median :0.004000   Median : 41.00   Median :22.00  
##  Mean   :0.5369   Mean   :0.004186   Mean   : 42.12   Mean   :22.81  
##  3rd Qu.:0.6000   3rd Qu.:0.005000   3rd Qu.: 51.00   3rd Qu.:28.00  
##  Max.   :1.8000   Max.   :0.011000   Max.   :107.00   Max.   :55.00
glimpse(df)
## Rows: 5,755
## Columns: 8
## $ month  <int> 202312, 202312, 202312, 202312, 202312, 202312, 202312, 202312,…
## $ region <chr> "강남구", "강남대로", "강동구", "강변북로", "강북구", "강서구",…
## $ no2    <dbl> 0.0357, 0.0394, 0.0372, 0.0378, 0.0264, 0.0403, 0.0406, 0.0385,…
## $ ozone  <dbl> 0.0127, 0.0088, 0.0119, 0.0104, 0.0157, 0.0136, 0.0100, 0.0128,…
## $ co     <dbl> 0.53, 0.90, 0.64, 0.52, 0.59, 0.70, 0.67, 0.68, 0.56, 0.76, 0.5…
## $ s      <dbl> 0.0034, 0.0033, 0.0032, 0.0037, 0.0027, 0.0037, 0.0032, 0.0031,…
## $ pm10   <int> 39, 48, 41, 43, 35, 44, 47, 46, 33, 37, 45, 40, 36, 42, 37, 49,…
## $ pm2.5  <int> 24, 28, 24, 28, 23, 23, 25, 26, 19, 23, 26, 26, 23, 28, 25, 31,…
table(df$region)
## 
##       강남구     강남대로       강동구     강변북로       강북구       강서구 
##          125          135          125          136          125          135 
##     공항대로       관악구       관악산       광진구       구로구       금천구 
##          135          124           57          136          125          125 
##         남산       노원구       도봉구     도산대로     동대문구       동작구 
##           62          126          125          144          126          126 
##     동작대로       마포구 마포아트센터       북한산     서대문구       서울숲 
##          136          125           56           62          125           56 
##       서초구       성동구       성북구         세곡       송파구     시흥대로 
##          125          126          125           62          125           62 
##       신촌로       양천구     영등포구     영등포로   올림픽공원       용산구 
##          138          125          125          136           56          125 
##       은평구 자연사박물관       정릉로         종로       종로구         중구 
##          125           48          134          137          137          125 
##       중랑구     천호대로     청계천로     한강대로         항동         행주 
##          125          137          137          136           61           62 
##       홍릉로       홍지문       화랑로 
##          138           38          133
df %>% filter(pm10==max(pm10))
##    month   region   no2 ozone  co     s pm10 pm2.5
## 1 201502 공항대로 0.033 0.011 0.8 0.007  107    27
df %>% group_by(region) %>% summarize(m=mean(pm10)) %>% arrange(desc(m)) %>% 
  head(5)
## # A tibble: 5 × 2
##   region       m
##   <chr>    <dbl>
## 1 공항대로  52.0
## 2 한강대로  49.1
## 3 신촌로    48.6
## 4 동작대로  48.4
## 5 강남대로  48.2
df %>% filter(pm2.5==min(pm2.5)) %>% arrange(desc(pm10))
##    month       region   no2 ozone  co     s pm10 pm2.5
## 1 202109       도봉구 0.009 0.029 0.4 0.002   16     5
## 2 202009     동작대로 0.032 0.017 0.3 0.003   16     5
## 3 202109     강남대로 0.024 0.020 0.5 0.003   15     5
## 4 202109       서울숲 0.012 0.031 0.4 0.003   15     5
## 5 202109       관악산 0.005 0.042 0.3 0.003   13     5
## 6 202109       서초구 0.015 0.031 0.4 0.002   12     5
## 7 202109 자연사박물관 0.009 0.043 0.4 0.003   11     5
## 8 202109   올림픽공원 0.012 0.036 0.4 0.003   10     5