tidyr packageCase Study: Tidy data
This small exercise is designed to help you to practice to tidy untidy datasets. Dataset used in this tutorial is called who as below
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.4.3
## -- Attaching packages ------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1 v purrr 0.2.4
## v tibble 1.3.4 v dplyr 0.7.4
## v tidyr 0.7.2 v stringr 1.2.0
## v readr 1.1.1 v forcats 0.2.0
## Warning: package 'tidyr' was built under R version 3.4.3
## Warning: package 'purrr' was built under R version 3.4.3
## Warning: package 'dplyr' was built under R version 3.4.3
## -- Conflicts ---------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
# Working with an untidy dataset called who
names(who)
## [1] "country" "iso2" "iso3" "year"
## [5] "new_sp_m014" "new_sp_m1524" "new_sp_m2534" "new_sp_m3544"
## [9] "new_sp_m4554" "new_sp_m5564" "new_sp_m65" "new_sp_f014"
## [13] "new_sp_f1524" "new_sp_f2534" "new_sp_f3544" "new_sp_f4554"
## [17] "new_sp_f5564" "new_sp_f65" "new_sn_m014" "new_sn_m1524"
## [21] "new_sn_m2534" "new_sn_m3544" "new_sn_m4554" "new_sn_m5564"
## [25] "new_sn_m65" "new_sn_f014" "new_sn_f1524" "new_sn_f2534"
## [29] "new_sn_f3544" "new_sn_f4554" "new_sn_f5564" "new_sn_f65"
## [33] "new_ep_m014" "new_ep_m1524" "new_ep_m2534" "new_ep_m3544"
## [37] "new_ep_m4554" "new_ep_m5564" "new_ep_m65" "new_ep_f014"
## [41] "new_ep_f1524" "new_ep_f2534" "new_ep_f3544" "new_ep_f4554"
## [45] "new_ep_f5564" "new_ep_f65" "newrel_m014" "newrel_m1524"
## [49] "newrel_m2534" "newrel_m3544" "newrel_m4554" "newrel_m5564"
## [53] "newrel_m65" "newrel_f014" "newrel_f1524" "newrel_f2534"
## [57] "newrel_f3544" "newrel_f4554" "newrel_f5564" "newrel_f65"
head(who)
Taking wide dataset to long dataset
m1<-who %>% gather(key="Types",value = "Cases",new_sp_m014:newrel_f65,na.rm = T)
head(m1)
Counting the number of cases
mycount<- m1 %>% count(Cases)
head(mycount)
Replacing newrl by new_rl
library(stringr)
df<-m1 %>% mutate(Types=stringr::str_replace(Types, "newrel","new_rel"))
head(df)
Separate Types into three different columns
df1<- df %>% separate(Types,c("Col1","Col2","SexAge"),sep="_")
head(df1)
Separate sex and age into two columns
df2<- df1 %>% separate(SexAge,c("Sex","Age"),sep=1) # if sep=2, it separates from second value from the left to right
head(df2)