Practice tidyr package

Case Study: Tidy data

This small exercise is designed to help you to practice to tidy untidy datasets. Dataset used in this tutorial is called who as below

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.4.3
## -- Attaching packages ------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1     v purrr   0.2.4
## v tibble  1.3.4     v dplyr   0.7.4
## v tidyr   0.7.2     v stringr 1.2.0
## v readr   1.1.1     v forcats 0.2.0
## Warning: package 'tidyr' was built under R version 3.4.3
## Warning: package 'purrr' was built under R version 3.4.3
## Warning: package 'dplyr' was built under R version 3.4.3
## -- Conflicts ---------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
# Working with an untidy dataset called who
names(who)
##  [1] "country"      "iso2"         "iso3"         "year"        
##  [5] "new_sp_m014"  "new_sp_m1524" "new_sp_m2534" "new_sp_m3544"
##  [9] "new_sp_m4554" "new_sp_m5564" "new_sp_m65"   "new_sp_f014" 
## [13] "new_sp_f1524" "new_sp_f2534" "new_sp_f3544" "new_sp_f4554"
## [17] "new_sp_f5564" "new_sp_f65"   "new_sn_m014"  "new_sn_m1524"
## [21] "new_sn_m2534" "new_sn_m3544" "new_sn_m4554" "new_sn_m5564"
## [25] "new_sn_m65"   "new_sn_f014"  "new_sn_f1524" "new_sn_f2534"
## [29] "new_sn_f3544" "new_sn_f4554" "new_sn_f5564" "new_sn_f65"  
## [33] "new_ep_m014"  "new_ep_m1524" "new_ep_m2534" "new_ep_m3544"
## [37] "new_ep_m4554" "new_ep_m5564" "new_ep_m65"   "new_ep_f014" 
## [41] "new_ep_f1524" "new_ep_f2534" "new_ep_f3544" "new_ep_f4554"
## [45] "new_ep_f5564" "new_ep_f65"   "newrel_m014"  "newrel_m1524"
## [49] "newrel_m2534" "newrel_m3544" "newrel_m4554" "newrel_m5564"
## [53] "newrel_m65"   "newrel_f014"  "newrel_f1524" "newrel_f2534"
## [57] "newrel_f3544" "newrel_f4554" "newrel_f5564" "newrel_f65"
head(who)

Taking wide dataset to long dataset

m1<-who %>% gather(key="Types",value = "Cases",new_sp_m014:newrel_f65,na.rm = T)

head(m1)

Counting the number of cases

mycount<- m1 %>% count(Cases)

head(mycount)

Replacing newrl by new_rl

library(stringr)

df<-m1 %>% mutate(Types=stringr::str_replace(Types, "newrel","new_rel"))

head(df)

Separate Types into three different columns

df1<- df %>% separate(Types,c("Col1","Col2","SexAge"),sep="_")

head(df1)

Separate sex and age into two columns

df2<- df1 %>% separate(SexAge,c("Sex","Age"),sep=1) # if sep=2, it separates from second value from the left to right

head(df2)