rm(list=ls())
getwd()
## [1] "C:/R"
library(dplyr)
## 
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
setwd("c:/R")
mpg2<-read.csv("mpg2.csv")
mpg3<-read.csv("mpg3.csv")
mpg2
##   id manufacturer cty
## 1  1         audi  18
## 2  2         audi  21
## 3  3         audi  20
mpg3
##   id hwy
## 1  1  29
## 2  4  26
## 3  5  26
left_join(mpg2,mpg3,by="id")
##   id manufacturer cty hwy
## 1  1         audi  18  29
## 2  2         audi  21  NA
## 3  3         audi  20  NA
inner_join(mpg2,mpg3,by="id")
##   id manufacturer cty hwy
## 1  1         audi  18  29
full_join(mpg2,mpg3,by="id")
##   id manufacturer cty hwy
## 1  1         audi  18  29
## 2  2         audi  21  NA
## 3  3         audi  20  NA
## 4  4         <NA>  NA  26
## 5  5         <NA>  NA  26
mpg5<-read.csv("mpg5.csv")
mpg6<-read.csv("mpg6.csv")
mpg5
##   manufacturer cty
## 1         audi  18
## 2         audi  21
mpg6
##   manufacturer hwy
## 1       toyota  20
## 2       toyota  20
bind_rows(mpg5,mpg6)
##   manufacturer cty hwy
## 1         audi  18  NA
## 2         audi  21  NA
## 3       toyota  NA  20
## 4       toyota  NA  20
economics<-ggplot2::economics#economics
head(economics)
## # A tibble: 6 x 6
##   date         pce    pop psavert uempmed unemploy
##   <date>     <dbl>  <dbl>   <dbl>   <dbl>    <dbl>
## 1 1967-07-01  507. 198712    12.6     4.5     2944
## 2 1967-08-01  510. 198911    12.6     4.7     2945
## 3 1967-09-01  516. 199113    11.9     4.6     2958
## 4 1967-10-01  512. 199311    12.9     4.9     3143
## 5 1967-11-01  517. 199498    12.8     4.7     3066
## 6 1967-12-01  525. 199657    11.8     4.8     3018
economics$year<-substr(economics$date,1,4)
table(economics$year)
## 
## 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 
##    6   12   12   12   12   12   12   12   12   12   12   12   12   12   12   12 
## 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 
##   12   12   12   12   12   12   12   12   12   12   12   12   12   12   12   12 
## 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 
##   12   12   12   12   12   12   12   12   12   12   12   12   12   12   12   12 
## 2015 
##    4
economics %>% 
  group_by(year) %>% 
  summarise(m=mean(psavert)) %>% 
  arrange(desc(m)) %>%
  head(5)
## # A tibble: 5 x 2
##   year      m
##   <chr> <dbl>
## 1 1971   13.5
## 2 1973   13.4
## 3 1975   13.4
## 4 1974   13.3
## 5 1970   12.8
exam_na<-read.csv("exam.csv")
is.na(exam_na)
##         no   sex korean english  math
## [1,] FALSE FALSE  FALSE   FALSE FALSE
## [2,] FALSE FALSE  FALSE   FALSE FALSE
## [3,] FALSE FALSE  FALSE   FALSE FALSE
## [4,] FALSE FALSE  FALSE   FALSE FALSE
## [5,] FALSE FALSE  FALSE   FALSE FALSE
exam_na<-read.csv("exam_na.csv")
is.na(exam_na)                  
##         id   sex korean english  math
## [1,] FALSE FALSE  FALSE    TRUE FALSE
## [2,] FALSE FALSE  FALSE   FALSE FALSE
## [3,] FALSE FALSE  FALSE   FALSE FALSE
## [4,] FALSE FALSE   TRUE   FALSE FALSE
## [5,] FALSE FALSE  FALSE    TRUE FALSE
summary(is.na(exam_na))
##      id             sex            korean         english       
##  Mode :logical   Mode :logical   Mode :logical   Mode :logical  
##  FALSE:5         FALSE:5         FALSE:4         FALSE:3        
##                                  TRUE :1         TRUE :2        
##     math        
##  Mode :logical  
##  FALSE:5        
## 
table(is.na(exam_na))
## 
## FALSE  TRUE 
##    22     3
mean(exam_na$korean)
## [1] NA
mean(exam_na$korean,na.rm=T)
## [1] 90.25
na.omit(exam_na)
##   id sex korean english math
## 2  2   F     92      95   93
## 3  3   F     95      92   90
exam_na %>% filter(!is.na(korean))
##   id sex korean english math
## 1  1   M     87      NA   82
## 2  2   F     92      95   93
## 3  3   F     95      92   90
## 4  5   F     87      NA   88
exam_na %>% filter(!is.na(korean)&!is.na(english)) 
##   id sex korean english math
## 1  2   F     92      95   93
## 2  3   F     95      92   90
exam_na$korean<-ifelse(is.na(exam_na$korean),90.25,exam_na$korean)
exam_na$korean
## [1] 87.00 92.00 95.00 90.25 87.00
data("airquality")
str(airquality)
## 'data.frame':    153 obs. of  6 variables:
##  $ Ozone  : int  41 36 12 18 NA 28 23 19 8 NA ...
##  $ Solar.R: int  190 118 149 313 NA NA 299 99 19 194 ...
##  $ Wind   : num  7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
##  $ Temp   : int  67 72 74 62 56 66 65 59 61 69 ...
##  $ Month  : int  5 5 5 5 5 5 5 5 5 5 ...
##  $ Day    : int  1 2 3 4 5 6 7 8 9 10 ...
summary(is.na(airquality))
##    Ozone          Solar.R           Wind            Temp        
##  Mode :logical   Mode :logical   Mode :logical   Mode :logical  
##  FALSE:116       FALSE:146       FALSE:153       FALSE:153      
##  TRUE :37        TRUE :7                                        
##    Month            Day         
##  Mode :logical   Mode :logical  
##  FALSE:153       FALSE:153      
## 
table(is.na(airquality$Ozone))
## 
## FALSE  TRUE 
##   116    37
mean(airquality$Ozone,na.rm=TRUE)
## [1] 42.12931
airquality$Ozone<-ifelse(is.na(airquality$Ozone),42.129,airquality$Ozone)
airquality$Ozone
##   [1]  41.000  36.000  12.000  18.000  42.129  28.000  23.000  19.000   8.000
##  [10]  42.129   7.000  16.000  11.000  14.000  18.000  14.000  34.000   6.000
##  [19]  30.000  11.000   1.000  11.000   4.000  32.000  42.129  42.129  42.129
##  [28]  23.000  45.000 115.000  37.000  42.129  42.129  42.129  42.129  42.129
##  [37]  42.129  29.000  42.129  71.000  39.000  42.129  42.129  23.000  42.129
##  [46]  42.129  21.000  37.000  20.000  12.000  13.000  42.129  42.129  42.129
##  [55]  42.129  42.129  42.129  42.129  42.129  42.129  42.129 135.000  49.000
##  [64]  32.000  42.129  64.000  40.000  77.000  97.000  97.000  85.000  42.129
##  [73]  10.000  27.000  42.129   7.000  48.000  35.000  61.000  79.000  63.000
##  [82]  16.000  42.129  42.129  80.000 108.000  20.000  52.000  82.000  50.000
##  [91]  64.000  59.000  39.000   9.000  16.000  78.000  35.000  66.000 122.000
## [100]  89.000 110.000  42.129  42.129  44.000  28.000  65.000  42.129  22.000
## [109]  59.000  23.000  31.000  44.000  21.000   9.000  42.129  45.000 168.000
## [118]  73.000  42.129  76.000 118.000  84.000  85.000  96.000  78.000  73.000
## [127]  91.000  47.000  32.000  20.000  23.000  21.000  24.000  44.000  21.000
## [136]  28.000   9.000  13.000  46.000  18.000  13.000  24.000  16.000  13.000
## [145]  23.000  36.000   7.000  14.000  30.000  42.129  14.000  18.000  20.000