rm(list=ls())
getwd()
## [1] "C:/R"
library(dplyr)
##
## 다음의 패키지를 부착합니다: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
setwd("c:/R")
mpg2<-read.csv("mpg2.csv")
mpg3<-read.csv("mpg3.csv")
mpg2
## id manufacturer cty
## 1 1 audi 18
## 2 2 audi 21
## 3 3 audi 20
mpg3
## id hwy
## 1 1 29
## 2 4 26
## 3 5 26
left_join(mpg2,mpg3,by="id")
## id manufacturer cty hwy
## 1 1 audi 18 29
## 2 2 audi 21 NA
## 3 3 audi 20 NA
inner_join(mpg2,mpg3,by="id")
## id manufacturer cty hwy
## 1 1 audi 18 29
full_join(mpg2,mpg3,by="id")
## id manufacturer cty hwy
## 1 1 audi 18 29
## 2 2 audi 21 NA
## 3 3 audi 20 NA
## 4 4 <NA> NA 26
## 5 5 <NA> NA 26
mpg5<-read.csv("mpg5.csv")
mpg6<-read.csv("mpg6.csv")
mpg5
## manufacturer cty
## 1 audi 18
## 2 audi 21
mpg6
## manufacturer hwy
## 1 toyota 20
## 2 toyota 20
bind_rows(mpg5,mpg6)
## manufacturer cty hwy
## 1 audi 18 NA
## 2 audi 21 NA
## 3 toyota NA 20
## 4 toyota NA 20
economics<-ggplot2::economics#economics
head(economics)
## # A tibble: 6 x 6
## date pce pop psavert uempmed unemploy
## <date> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1967-07-01 507. 198712 12.6 4.5 2944
## 2 1967-08-01 510. 198911 12.6 4.7 2945
## 3 1967-09-01 516. 199113 11.9 4.6 2958
## 4 1967-10-01 512. 199311 12.9 4.9 3143
## 5 1967-11-01 517. 199498 12.8 4.7 3066
## 6 1967-12-01 525. 199657 11.8 4.8 3018
economics$year<-substr(economics$date,1,4)
table(economics$year)
##
## 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982
## 6 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12
## 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998
## 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12
## 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
## 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12
## 2015
## 4
economics %>%
group_by(year) %>%
summarise(m=mean(psavert)) %>%
arrange(desc(m)) %>%
head(5)
## # A tibble: 5 x 2
## year m
## <chr> <dbl>
## 1 1971 13.5
## 2 1973 13.4
## 3 1975 13.4
## 4 1974 13.3
## 5 1970 12.8
exam_na<-read.csv("exam.csv")
is.na(exam_na)
## no sex korean english math
## [1,] FALSE FALSE FALSE FALSE FALSE
## [2,] FALSE FALSE FALSE FALSE FALSE
## [3,] FALSE FALSE FALSE FALSE FALSE
## [4,] FALSE FALSE FALSE FALSE FALSE
## [5,] FALSE FALSE FALSE FALSE FALSE
exam_na<-read.csv("exam_na.csv")
is.na(exam_na)
## id sex korean english math
## [1,] FALSE FALSE FALSE TRUE FALSE
## [2,] FALSE FALSE FALSE FALSE FALSE
## [3,] FALSE FALSE FALSE FALSE FALSE
## [4,] FALSE FALSE TRUE FALSE FALSE
## [5,] FALSE FALSE FALSE TRUE FALSE
summary(is.na(exam_na))
## id sex korean english
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:5 FALSE:5 FALSE:4 FALSE:3
## TRUE :1 TRUE :2
## math
## Mode :logical
## FALSE:5
##
table(is.na(exam_na))
##
## FALSE TRUE
## 22 3
mean(exam_na$korean)
## [1] NA
mean(exam_na$korean,na.rm=T)
## [1] 90.25
na.omit(exam_na)
## id sex korean english math
## 2 2 F 92 95 93
## 3 3 F 95 92 90
exam_na %>% filter(!is.na(korean))
## id sex korean english math
## 1 1 M 87 NA 82
## 2 2 F 92 95 93
## 3 3 F 95 92 90
## 4 5 F 87 NA 88
exam_na %>% filter(!is.na(korean)&!is.na(english))
## id sex korean english math
## 1 2 F 92 95 93
## 2 3 F 95 92 90
exam_na$korean<-ifelse(is.na(exam_na$korean),90.25,exam_na$korean)
exam_na$korean
## [1] 87.00 92.00 95.00 90.25 87.00
data("airquality")
str(airquality)
## 'data.frame': 153 obs. of 6 variables:
## $ Ozone : int 41 36 12 18 NA 28 23 19 8 NA ...
## $ Solar.R: int 190 118 149 313 NA NA 299 99 19 194 ...
## $ Wind : num 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
## $ Temp : int 67 72 74 62 56 66 65 59 61 69 ...
## $ Month : int 5 5 5 5 5 5 5 5 5 5 ...
## $ Day : int 1 2 3 4 5 6 7 8 9 10 ...
summary(is.na(airquality))
## Ozone Solar.R Wind Temp
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:116 FALSE:146 FALSE:153 FALSE:153
## TRUE :37 TRUE :7
## Month Day
## Mode :logical Mode :logical
## FALSE:153 FALSE:153
##
table(is.na(airquality$Ozone))
##
## FALSE TRUE
## 116 37
mean(airquality$Ozone,na.rm=TRUE)
## [1] 42.12931
airquality$Ozone<-ifelse(is.na(airquality$Ozone),42.129,airquality$Ozone)
airquality$Ozone
## [1] 41.000 36.000 12.000 18.000 42.129 28.000 23.000 19.000 8.000
## [10] 42.129 7.000 16.000 11.000 14.000 18.000 14.000 34.000 6.000
## [19] 30.000 11.000 1.000 11.000 4.000 32.000 42.129 42.129 42.129
## [28] 23.000 45.000 115.000 37.000 42.129 42.129 42.129 42.129 42.129
## [37] 42.129 29.000 42.129 71.000 39.000 42.129 42.129 23.000 42.129
## [46] 42.129 21.000 37.000 20.000 12.000 13.000 42.129 42.129 42.129
## [55] 42.129 42.129 42.129 42.129 42.129 42.129 42.129 135.000 49.000
## [64] 32.000 42.129 64.000 40.000 77.000 97.000 97.000 85.000 42.129
## [73] 10.000 27.000 42.129 7.000 48.000 35.000 61.000 79.000 63.000
## [82] 16.000 42.129 42.129 80.000 108.000 20.000 52.000 82.000 50.000
## [91] 64.000 59.000 39.000 9.000 16.000 78.000 35.000 66.000 122.000
## [100] 89.000 110.000 42.129 42.129 44.000 28.000 65.000 42.129 22.000
## [109] 59.000 23.000 31.000 44.000 21.000 9.000 42.129 45.000 168.000
## [118] 73.000 42.129 76.000 118.000 84.000 85.000 96.000 78.000 73.000
## [127] 91.000 47.000 32.000 20.000 23.000 21.000 24.000 44.000 21.000
## [136] 28.000 9.000 13.000 46.000 18.000 13.000 24.000 16.000 13.000
## [145] 23.000 36.000 7.000 14.000 30.000 42.129 14.000 18.000 20.000