library("tidyr")
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df <- dplyr::data_frame(
year = c(2015, NA, NA, NA,2017),
count = c(1, NA, 2, NA,NA)
)
df
## # A tibble: 5 x 2
## year count
## <dbl> <dbl>
## 1 2015 1
## 2 NA NA
## 3 NA 2
## 4 NA NA
## 5 2017 NA
fill and replace_na
fill(df ,year, count)
## # A tibble: 5 x 2
## year count
## <dbl> <dbl>
## 1 2015 1
## 2 2015 1
## 3 2015 2
## 4 2015 2
## 5 2017 2
replace_na(df, list(year=2016,count=10))
## # A tibble: 5 x 2
## year count
## <dbl> <dbl>
## 1 2015 1
## 2 2016 10
## 3 2016 2
## 4 2016 10
## 5 2017 10
fill all NA cell
df %>% replace(is.na(.),'Missing')
## # A tibble: 5 x 2
## year count
## <chr> <chr>
## 1 2015 1
## 2 Missing Missing
## 3 Missing 2
## 4 Missing Missing
## 5 2017 Missing
expend missing
df <- dplyr::data_frame(
year = c(2015, NA, 2016, NA,2017),
month = c(1, NA, 2, NA,3),
count =c(1,2,3,4,5)
)
complete(df,year,month)%>%
replace(is.na(.),0)
## # A tibble: 9 x 3
## year month count
## <dbl> <dbl> <dbl>
## 1 2015 1 1
## 2 2015 2 0
## 3 2015 3 0
## 4 2016 1 0
## 5 2016 2 3
## 6 2016 3 0
## 7 2017 1 0
## 8 2017 2 0
## 9 2017 3 5
fill empty string
df <- dplyr::data_frame(
year = c(2015, '', '', '',2017),
count = c(1, '', 2, '',''))
df%>%
mutate(year=ifelse(year=="",'Unknow',year), count=ifelse(count=="",'0',count))
## # A tibble: 5 x 2
## year count
## <chr> <chr>
## 1 2015 1
## 2 Unknow 0
## 3 Unknow 2
## 4 Unknow 0
## 5 2017 0
split cell into row
df <- dplyr::data_frame(x = 1:2, y = c("1,2", "3,4,5,6,7"))
df%>%
mutate(y = strsplit(y, ","))%>%
unnest()
## # A tibble: 7 x 2
## x y
## <int> <chr>
## 1 1 1
## 2 1 2
## 3 2 3
## 4 2 4
## 5 2 5
## 6 2 6
## 7 2 7