R Markdown

# import library

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.2.5
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Warning: package 'ggplot2' was built under R version 3.2.5
## Warning: package 'tibble' was built under R version 3.2.5
## Warning: package 'tidyr' was built under R version 3.2.5
## Warning: package 'readr' was built under R version 3.2.5
## Warning: package 'purrr' was built under R version 3.2.5
## Warning: package 'dplyr' was built under R version 3.2.5
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats

This demonstration will be workig with iris data from caret package as well as other datasets along the way

Using gather to transform wide data to long data

dataset<-iris

# Check the first six rows of dataset

head(dataset)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
# 'gather' function allows to tidy wide data into long data

# Creating a new column ID is that we can use it to make spread in following step

dataset_gather<-dataset %>% dplyr::mutate(ID=row_number(Species)) %>% tidyr::gather(key=Mesuare_Type, value = Values,1:4)

head(dataset_gather)
##   Species ID Mesuare_Type Values
## 1  setosa  1 Sepal.Length    5.1
## 2  setosa  2 Sepal.Length    4.9
## 3  setosa  3 Sepal.Length    4.7
## 4  setosa  4 Sepal.Length    4.6
## 5  setosa  5 Sepal.Length    5.0
## 6  setosa  6 Sepal.Length    5.4
dataset_spread<- dataset_gather %>% dplyr::group_by(ID) %>% tidyr::spread(key=Mesuare_Type,value = Values) %>% dplyr::ungroup() %>% dplyr::select(-ID)

# Check the first 6 rows of dataset_spread

head(dataset_spread)
## # A tibble: 6 x 5
##   Species Petal.Length Petal.Width Sepal.Length Sepal.Width
##    <fctr>        <dbl>       <dbl>        <dbl>       <dbl>
## 1  setosa          1.4         0.2          5.1         3.5
## 2  setosa          1.4         0.2          4.9         3.0
## 3  setosa          1.3         0.2          4.7         3.2
## 4  setosa          1.5         0.2          4.6         3.1
## 5  setosa          1.4         0.2          5.0         3.6
## 6  setosa          1.7         0.4          5.4         3.9
# using 'unite' to colapse two variables into a single variable

dataset_unite<- dataset %>% tidyr::unite(Species_Length,Species, Petal.Length,sep="_")

# Check the data

head(dataset_unite)
##   Sepal.Length Sepal.Width Species_Length Petal.Width
## 1          5.1         3.5     setosa_1.4         0.2
## 2          4.9         3.0     setosa_1.4         0.2
## 3          4.7         3.2     setosa_1.3         0.2
## 4          4.6         3.1     setosa_1.5         0.2
## 5          5.0         3.6     setosa_1.4         0.2
## 6          5.4         3.9     setosa_1.7         0.4
# Now, in some cases one may want to split it out into two columns

dataset_seperate<-dataset_unite %>% tidyr::separate(Species_Length, c("Species", "Petal.Length"),sep="_") %>% dplyr::select(1,2,4,5,everything())

# The last argument for re-arranging the column names only, and nothing speacial about it.

# Check the data

head(dataset_seperate)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa