download.file("https://raw.githubusercontent.com/ucdavis-bioinformatics-training/2022_February_Introduction_to_R_for_Bioinformatics/main/birthweight.csv", "birthweight.csv")

birthweight <- read.csv("birthweight.csv")

download.file("https://raw.githubusercontent.com/ucdavis-bioinformatics-training/2022_February_Introduction_to_R_for_Bioinformatics/main/miRNA.csv", "miRNA.csv")
mir <- read.csv("miRNA.csv", row.names = 1)

mir

##          sample.27 sample.1522 sample.569 sample.365 sample.1369 sample.1023
## miR-16          46          56         47         54          56          59
## miR-21          52          43         40         35          59          47
## miR-146a        98          97         87         96          84          96
## miR-182         53          45         63         41          46          50
##          sample.1272 sample.1262 sample.575 sample.792 sample.752 sample.619
## miR-16            49          55         62         63         46         52
## miR-21            42          45         55         45         42         43
## miR-146a          88          97         96        104        103         92
## miR-182           49          50         62         51         64         58
##          sample.1764 sample.516 sample.272 sample.1388 sample.1363 sample.300
## miR-16            46         61         49          46          61         60
## miR-21            40         51         43          44          47         48
## miR-146a          98         97         91         105          77         89
## miR-182           57         59         55          60          60         65
##          sample.57 sample.431 sample.532 sample.223 sample.1107 sample.697
## miR-16          46         70         60         60          57         68
## miR-21          39         51         44         46          49         47
## miR-146a       105         84         94         87         116         98
## miR-182         40         48         49         52          48         57
##          sample.1683 sample.808 sample.822 sample.1016 sample.820 sample.1081
## miR-16            49         59         54          69         58          55
## miR-21            48         56         52          41         55          52
## miR-146a          98        101         86          98        102          93
## miR-182           55         74         49          51         53          52
##          sample.321 sample.1636 sample.1360 sample.1058 sample.755 sample.462
## miR-16           68          63          70          77         56         65
## miR-21           46          39          57          55         46         58
## miR-146a        125         104         111         124        101        101
## miR-182          60          43          46          56         50         60
##          sample.1088 sample.553 sample.1191 sample.1313 sample.1600 sample.1187
## miR-16            42         63          66          64          50          57
## miR-21            54         54          48          47          44          46
## miR-146a         107        106         102         104         111          86
## miR-182           63         60          50          42          67          43

mir <- as.data.frame(t(mir))
mir$ID <- gsub("sample.", "", rownames(mir))
experiment <- merge(birthweight, mir)

experiment

##      ID birth.date    location length birthweight head.circumference
## 1    27   3/9/1967 Silver Hill     53        3.55                 37
## 2    57  8/12/1968    Memorial     51        3.32                 38
## 3   223 12/11/1968     General     50        3.87                 33
## 4   272  1/10/1968    Memorial     52        3.86                 36
## 5   300  7/18/1968 Silver Hill     46        2.05                 32
## 6   321  1/21/1968 Silver Hill     48        3.11                 33
## 7   365  4/23/1967    Memorial     52        3.53                 37
## 8   431  9/16/1968 Silver Hill     48        1.92                 30
## 9   462  6/19/1968 Silver Hill     58        4.10                 39
## 10  516   1/8/1968 Silver Hill     47        2.66                 33
## 11  532 10/25/1968     General     53        3.59                 34
## 12  553  8/17/1968 Silver Hill     54        3.94                 37
## 13  569  3/23/1967    Memorial     50        2.51                 35
## 14  575  7/12/1967    Memorial     50        2.78                 30
## 15  619  11/1/1967    Memorial     52        3.41                 33
## 16  697   2/6/1967 Silver Hill     48        3.03                 35
## 17  752 10/19/1967     General     49        3.32                 36
## 18  755  4/25/1968    Memorial     53        3.20                 33
## 19  792   9/7/1967    Memorial     53        3.64                 38
## 20  808   5/5/1967 Silver Hill     48        2.92                 33
## 21  820  10/7/1967     General     52        3.77                 34
## 22  822  6/14/1967    Memorial     50        3.42                 35
## 23 1016  7/13/1967 Silver Hill     53        4.32                 36
## 24 1023   6/7/1967    Memorial     52        3.00                 35
## 25 1058  4/24/1968 Silver Hill     53        3.15                 34
## 26 1081 12/14/1967 Silver Hill     54        3.63                 38
## 27 1088  7/24/1968     General     51        3.27                 36
## 28 1107  1/25/1967     General     52        3.23                 36
## 29 1187 12/19/1968 Silver Hill     53        4.07                 38
## 30 1191   9/7/1968     General     53        3.65                 33
## 31 1262  6/25/1967 Silver Hill     53        3.19                 34
## 32 1272  6/20/1967    Memorial     53        2.75                 32
## 33 1313  9/27/1968 Silver Hill     43        2.65                 32
## 34 1360  2/16/1968     General     56        4.55                 34
## 35 1363   4/2/1968     General     48        2.37                 30
## 36 1369   6/4/1967 Silver Hill     49        3.18                 34
## 37 1388  2/22/1968    Memorial     51        3.14                 33
## 38 1522  3/13/1967    Memorial     50        2.74                 33
## 39 1600  10/9/1968     General     53        2.90                 34
## 40 1636   2/2/1968 Silver Hill     51        3.93                 38
## 41 1683  2/14/1967 Silver Hill     53        3.35                 33
## 42 1764  12/7/1967 Silver Hill     58        4.57                 39
##    weeks.gestation smoker maternal.age maternal.cigarettes maternal.height
## 1               41    yes           37                  25             161
## 2               39    yes           23                  17             157
## 3               45    yes           28                  25             163
## 4               39    yes           30                  25             170
## 5               35    yes           41                   7             166
## 6               37     no           28                   0             158
## 7               40    yes           26                  25             170
## 8               33    yes           20                   7             161
## 9               41     no           35                   0             172
## 10              35    yes           20                  35             170
## 11              40    yes           31                  12             163
## 12              42     no           24                   0             175
## 13              39    yes           22                   7             159
## 14              37    yes           19                   7             165
## 15              39    yes           23                  25             181
## 16              39     no           27                   0             162
## 17              40    yes           27                  12             152
## 18              41     no           21                   0             155
## 19              40    yes           20                   2             170
## 20              34     no           26                   0             167
## 21              40     no           24                   0             157
## 22              38     no           20                   0             157
## 23              40     no           19                   0             171
## 24              38    yes           30                  12             165
## 25              40     no           29                   0             167
## 26              38     no           18                   0             172
## 27              40     no           24                   0             168
## 28              38     no           31                   0             164
## 29              44     no           20                   0             174
## 30              42     no           21                   0             165
## 31              41    yes           27                  35             163
## 32              40    yes           37                  50             168
## 33              33     no           24                   0             149
## 34              44     no           20                   0             162
## 35              37    yes           20                   7             163
## 36              38    yes           31                  25             162
## 37              41    yes           22                   7             160
## 38              39    yes           21                  17             156
## 39              39     no           19                   0             165
## 40              38     no           29                   0             165
## 41              41     no           27                   0             164
## 42              41    yes           32                  12             173
##    maternal.prepregnant.weight paternal.age paternal.education
## 1                           66           46                 NA
## 2                           48           NA                 NA
## 3                           54           30                 16
## 4                           78           40                 16
## 5                           57           37                 14
## 6                           54           39                 10
## 7                           62           30                 10
## 8                           50           20                 10
## 9                           58           31                 16
## 10                          57           23                 12
## 11                          49           41                 12
## 12                          66           30                 12
## 13                          52           23                 14
## 14                          60           20                 14
## 15                          69           23                 16
## 16                          62           27                 14
## 17                          48           37                 12
## 18                          55           25                 14
## 19                          59           24                 12
## 20                          64           25                 12
## 21                          50           31                 16
## 22                          48           22                 14
## 23                          62           19                 12
## 24                          64           38                 14
## 25                          60           30                 16
## 26                          50           20                 12
## 27                          53           29                 16
## 28                          57           NA                 NA
## 29                          68           26                 14
## 30                          61           21                 10
## 31                          51           31                 16
## 32                          61           31                 16
## 33                          45           26                 16
## 34                          57           23                 10
## 35                          47           20                 10
## 36                          57           32                 16
## 37                          53           24                 16
## 38                          53           24                 12
## 39                          57           NA                 NA
## 40                          61           NA                 NA
## 41                          62           37                 14
## 42                          70           38                 14
##    paternal.cigarettes paternal.height low.birthweight geriatric.pregnancy
## 1                    0             175               0                TRUE
## 2                   NA              NA               0               FALSE
## 3                    0             183               0               FALSE
## 4                   50             178               0               FALSE
## 5                   25             173               1                TRUE
## 6                    0             171               0               FALSE
## 7                   25             181               0               FALSE
## 8                   35             180               1               FALSE
## 9                   25             185               0                TRUE
## 10                  50             186               1               FALSE
## 11                  50             191               0               FALSE
## 12                   0             184               0               FALSE
## 13                  25              NA               1               FALSE
## 14                   0             183               0               FALSE
## 15                   2             181               0               FALSE
## 16                   0             178               0               FALSE
## 17                  25             170               0               FALSE
## 18                  25             183               0               FALSE
## 19                  12             185               0               FALSE
## 20                  25             175               0               FALSE
## 21                   0             173               0               FALSE
## 22                   0             179               0               FALSE
## 23                   0             183               0               FALSE
## 24                  50             180               0               FALSE
## 25                  NA             182               0               FALSE
## 26                   7             172               0               FALSE
## 27                   0             181               0               FALSE
## 28                  NA              NA               0               FALSE
## 29                  25             189               0               FALSE
## 30                  25             185               0               FALSE
## 31                  25             185               0               FALSE
## 32                   0             173               0                TRUE
## 33                   0             169               1               FALSE
## 34                  35             179               0               FALSE
## 35                  35             185               1               FALSE
## 36                  50             194               0               FALSE
## 37                  12             176               0               FALSE
## 38                   7             179               0               FALSE
## 39                  NA              NA               0               FALSE
## 40                  NA              NA               0               FALSE
## 41                   0             170               0               FALSE
## 42                  25             180               0               FALSE
##    miR-16 miR-21 miR-146a miR-182
## 1      46     52       98      53
## 2      46     39      105      40
## 3      60     46       87      52
## 4      49     43       91      55
## 5      60     48       89      65
## 6      68     46      125      60
## 7      54     35       96      41
## 8      70     51       84      48
## 9      65     58      101      60
## 10     61     51       97      59
## 11     60     44       94      49
## 12     63     54      106      60
## 13     47     40       87      63
## 14     62     55       96      62
## 15     52     43       92      58
## 16     68     47       98      57
## 17     46     42      103      64
## 18     56     46      101      50
## 19     63     45      104      51
## 20     59     56      101      74
## 21     58     55      102      53
## 22     54     52       86      49
## 23     69     41       98      51
## 24     59     47       96      50
## 25     77     55      124      56
## 26     55     52       93      52
## 27     42     54      107      63
## 28     57     49      116      48
## 29     57     46       86      43
## 30     66     48      102      50
## 31     55     45       97      50
## 32     49     42       88      49
## 33     64     47      104      42
## 34     70     57      111      46
## 35     61     47       77      60
## 36     56     59       84      46
## 37     46     44      105      60
## 38     56     43       97      45
## 39     50     44      111      67
## 40     63     39      104      43
## 41     49     48       98      55
## 42     46     40       98      57

if (!("BiocManager" %in% rownames(installed.packages()))){
  install.packages("BiocManager")
}
if (!("ggplot2" %in% rownames(installed.packages()))){
  BiocManager::install("ggplot2")
}
if (!("tidyr" %in% rownames(installed.packages()))){
  BiocManager::install("tidyr")
}
if (!("dplyr" %in% rownames(installed.packages()))){
  BiocManager::install("dplyr")
}
if (!("magrittr" %in% rownames(installed.packages()))){
  BiocManager::install("magrittr")
}
if (!("viridis" %in% rownames(installed.packages()))){
  BiocManager::install("viridis")
}

7 Introduction to the tidyverse

The tidyverse is a collection of packages by the creators of RStudio that share an approach to data science.

The authors model data science like this:

Data science life cycle

The tidyverse packages replace some of the base R functions with alternatives that are intended to be more user friendly for data scientists who are following this life cycle.

We will only be covering a few of the packages from the tidyverse.

library(tidyr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(magrittr)

## 
## Attaching package: 'magrittr'

## The following object is masked from 'package:tidyr':
## 
##     extract

##7.1 Defining tidy data In “tidy” data, every column is a variable, every row is an observation, and every cell contains a single observation. Is the birthweight data frame tidy? Why or why not?

##7.2 Pipes: combining tidyverse functions The tidyverse employs piping to send the output of one function to another function, rather than the nesting used in base r. The “pipe” is written with a greater than symbol sandwiched between two percent signs, like this: %>%.

experiment %>%
  filter(low.birthweight == TRUE) %>%
  select(birth.date, length, birthweight, smoker)

##   birth.date length birthweight smoker
## 1  7/18/1968     46        2.05    yes
## 2  9/16/1968     48        1.92    yes
## 3   1/8/1968     47        2.66    yes
## 4  3/23/1967     50        2.51    yes
## 5  9/27/1968     43        2.65     no
## 6   4/2/1968     48        2.37    yes

##7.3 Transforming data The separate() function makes the conversion of the “birth.date” column into “month,” “day,” and “year” trivial.

experiment %>%
  filter(low.birthweight == TRUE) %>%
  select(birth.date, length, birthweight, smoker) %>%
  separate(col = birth.date, sep = "[/]", into = c("month", "day", "year"))

##   month day year length birthweight smoker
## 1     7  18 1968     46        2.05    yes
## 2     9  16 1968     48        1.92    yes
## 3     1   8 1968     47        2.66    yes
## 4     3  23 1967     50        2.51    yes
## 5     9  27 1968     43        2.65     no
## 6     4   2 1968     48        2.37    yes

The mutate() function adds a new column based on data contained in the existing columns.

experiment %>%
  filter(low.birthweight == TRUE) %>%
  select(birth.date, length, birthweight, smoker) %>%
  mutate(d = birthweight / length)

##   birth.date length birthweight smoker          d
## 1  7/18/1968     46        2.05    yes 0.04456522
## 2  9/16/1968     48        1.92    yes 0.04000000
## 3   1/8/1968     47        2.66    yes 0.05659574
## 4  3/23/1967     50        2.51    yes 0.05020000
## 5  9/27/1968     43        2.65     no 0.06162791
## 6   4/2/1968     48        2.37    yes 0.04937500

##7.4 Summarizing data The group_by() and summarize() functions apply a function to a group defined by one or more categorical variables.

experiment %>%
  group_by(smoker) %>%
  summarize(mean.birthweight = mean(birthweight))

## # A tibble: 2 × 2
##   smoker mean.birthweight
##   <chr>             <dbl>
## 1 no                 3.51
## 2 yes                3.13

experiment %>%
  group_by(smoker, low.birthweight) %>%
  summarize(mean.birthweight = mean(birthweight))

## `summarise()` has grouped output by 'smoker'. You can override using the
## `.groups` argument.

## # A tibble: 4 × 3
## # Groups:   smoker [2]
##   smoker low.birthweight mean.birthweight
##   <chr>            <int>            <dbl>
## 1 no                   0             3.55
## 2 no                   1             2.65
## 3 yes                  0             3.38
## 4 yes                  1             2.30

To change the order of rows, use arrange(). To return one or more specified rows, use slice().

experiment %>%
  group_by(smoker) %>%
  select(smoker, birthweight, length, head.circumference, weeks.gestation) %>%
  slice_max(order_by = birthweight, n = 5)

## # A tibble: 10 × 5
## # Groups:   smoker [2]
##    smoker birthweight length head.circumference weeks.gestation
##    <chr>        <dbl>  <int>              <int>           <int>
##  1 no            4.55     56                 34              44
##  2 no            4.32     53                 36              40
##  3 no            4.1      58                 39              41
##  4 no            4.07     53                 38              44
##  5 no            3.94     54                 37              42
##  6 yes           4.57     58                 39              41
##  7 yes           3.87     50                 33              45
##  8 yes           3.86     52                 36              39
##  9 yes           3.64     53                 38              40
## 10 yes           3.59     53                 34              40

The pivot_longer() and pivot_wider() functions rearrange data, decreasing or increasing the number of columns. The use of this will become more evident during visualization.

experiment %>%
  filter(low.birthweight == TRUE) %>%
  select(smoker, `miR-16`, `miR-21`, `miR-146a`, `miR-182`) %>%
  pivot_longer(cols = c(`miR-16`, `miR-21`, `miR-146a`, `miR-182`),
               names_to = "gene",
               values_to = "expression")

## # A tibble: 24 × 3
##    smoker gene     expression
##    <chr>  <chr>         <int>
##  1 yes    miR-16           60
##  2 yes    miR-21           48
##  3 yes    miR-146a         89
##  4 yes    miR-182          65
##  5 yes    miR-16           70
##  6 yes    miR-21           51
##  7 yes    miR-146a         84
##  8 yes    miR-182          48
##  9 yes    miR-16           61
## 10 yes    miR-21           51
## # ℹ 14 more rows

#Refrensi :

https://ucdavis-bioinformatics-training.github.io/2022_February_Introduction_to_R_for_Bioinformatics/introduction-to-the-tidyverse.html

Introduction to the tidyverse

Syukur Halim, NIM:220605220014, Dosen Pembimbing Prof. Dr. Suhartono, M.Kom., Magister Informatika UIN Maulana Malik Ibrahim Malang, Indonesia

2023-10-31

7 Introduction to the tidyverse