In the last section, you were asked to convert the “smoker” column to logical values. The solution is fairly simple: ``

birthweight <- read.csv("birthweight.csv")
birthweight$smoker == "yes"
##  [1] FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE
## [13]  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE
## [25] FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE
## [37]  TRUE FALSE FALSE  TRUE  TRUE FALSE
birthweight$smoker <- (birthweight$smoker == "yes")
birthweight
##      ID birth.date    location length birthweight head.circumference
## 1  1107  1/25/1967     General     52        3.23                 36
## 2   697   2/6/1967 Silver Hill     48        3.03                 35
## 3  1683  2/14/1967 Silver Hill     53        3.35                 33
## 4    27   3/9/1967 Silver Hill     53        3.55                 37
## 5  1522  3/13/1967    Memorial     50        2.74                 33
## 6   569  3/23/1967    Memorial     50        2.51                 35
## 7   365  4/23/1967    Memorial     52        3.53                 37
## 8   808   5/5/1967 Silver Hill     48        2.92                 33
## 9  1369   6/4/1967 Silver Hill     49        3.18                 34
## 10 1023   6/7/1967    Memorial     52        3.00                 35
## 11  822  6/14/1967    Memorial     50        3.42                 35
## 12 1272  6/20/1967    Memorial     53        2.75                 32
## 13 1262  6/25/1967 Silver Hill     53        3.19                 34
## 14  575  7/12/1967    Memorial     50        2.78                 30
## 15 1016  7/13/1967 Silver Hill     53        4.32                 36
## 16  792   9/7/1967    Memorial     53        3.64                 38
## 17  820  10/7/1967     General     52        3.77                 34
## 18  752 10/19/1967     General     49        3.32                 36
## 19  619  11/1/1967    Memorial     52        3.41                 33
## 20 1764  12/7/1967 Silver Hill     58        4.57                 39
## 21 1081 12/14/1967 Silver Hill     54        3.63                 38
## 22  516   1/8/1968 Silver Hill     47        2.66                 33
## 23  272  1/10/1968    Memorial     52        3.86                 36
## 24  321  1/21/1968 Silver Hill     48        3.11                 33
## 25 1636   2/2/1968 Silver Hill     51        3.93                 38
## 26 1360  2/16/1968     General     56        4.55                 34
## 27 1388  2/22/1968    Memorial     51        3.14                 33
## 28 1363   4/2/1968     General     48        2.37                 30
## 29 1058  4/24/1968 Silver Hill     53        3.15                 34
## 30  755  4/25/1968    Memorial     53        3.20                 33
## 31  462  6/19/1968 Silver Hill     58        4.10                 39
## 32  300  7/18/1968 Silver Hill     46        2.05                 32
## 33 1088  7/24/1968     General     51        3.27                 36
## 34   57  8/12/1968    Memorial     51        3.32                 38
## 35  553  8/17/1968 Silver Hill     54        3.94                 37
## 36 1191   9/7/1968     General     53        3.65                 33
## 37  431  9/16/1968 Silver Hill     48        1.92                 30
## 38 1313  9/27/1968 Silver Hill     43        2.65                 32
## 39 1600  10/9/1968     General     53        2.90                 34
## 40  532 10/25/1968     General     53        3.59                 34
## 41  223 12/11/1968     General     50        3.87                 33
## 42 1187 12/19/1968 Silver Hill     53        4.07                 38
##    weeks.gestation smoker maternal.age maternal.cigarettes maternal.height
## 1               38  FALSE           31                   0             164
## 2               39  FALSE           27                   0             162
## 3               41  FALSE           27                   0             164
## 4               41   TRUE           37                  25             161
## 5               39   TRUE           21                  17             156
## 6               39   TRUE           22                   7             159
## 7               40   TRUE           26                  25             170
## 8               34  FALSE           26                   0             167
## 9               38   TRUE           31                  25             162
## 10              38   TRUE           30                  12             165
## 11              38  FALSE           20                   0             157
## 12              40   TRUE           37                  50             168
## 13              41   TRUE           27                  35             163
## 14              37   TRUE           19                   7             165
## 15              40  FALSE           19                   0             171
## 16              40   TRUE           20                   2             170
## 17              40  FALSE           24                   0             157
## 18              40   TRUE           27                  12             152
## 19              39   TRUE           23                  25             181
## 20              41   TRUE           32                  12             173
## 21              38  FALSE           18                   0             172
## 22              35   TRUE           20                  35             170
## 23              39   TRUE           30                  25             170
## 24              37  FALSE           28                   0             158
## 25              38  FALSE           29                   0             165
## 26              44  FALSE           20                   0             162
## 27              41   TRUE           22                   7             160
## 28              37   TRUE           20                   7             163
## 29              40  FALSE           29                   0             167
## 30              41  FALSE           21                   0             155
## 31              41  FALSE           35                   0             172
## 32              35   TRUE           41                   7             166
## 33              40  FALSE           24                   0             168
## 34              39   TRUE           23                  17             157
## 35              42  FALSE           24                   0             175
## 36              42  FALSE           21                   0             165
## 37              33   TRUE           20                   7             161
## 38              33  FALSE           24                   0             149
## 39              39  FALSE           19                   0             165
## 40              40   TRUE           31                  12             163
## 41              45   TRUE           28                  25             163
## 42              44  FALSE           20                   0             174
##    maternal.prepregnant.weight paternal.age paternal.education
## 1                           57           NA                 NA
## 2                           62           27                 14
## 3                           62           37                 14
## 4                           66           46                 NA
## 5                           53           24                 12
## 6                           52           23                 14
## 7                           62           30                 10
## 8                           64           25                 12
## 9                           57           32                 16
## 10                          64           38                 14
## 11                          48           22                 14
## 12                          61           31                 16
## 13                          51           31                 16
## 14                          60           20                 14
## 15                          62           19                 12
## 16                          59           24                 12
## 17                          50           31                 16
## 18                          48           37                 12
## 19                          69           23                 16
## 20                          70           38                 14
## 21                          50           20                 12
## 22                          57           23                 12
## 23                          78           40                 16
## 24                          54           39                 10
## 25                          61           NA                 NA
## 26                          57           23                 10
## 27                          53           24                 16
## 28                          47           20                 10
## 29                          60           30                 16
## 30                          55           25                 14
## 31                          58           31                 16
## 32                          57           37                 14
## 33                          53           29                 16
## 34                          48           NA                 NA
## 35                          66           30                 12
## 36                          61           21                 10
## 37                          50           20                 10
## 38                          45           26                 16
## 39                          57           NA                 NA
## 40                          49           41                 12
## 41                          54           30                 16
## 42                          68           26                 14
##    paternal.cigarettes paternal.height low.birthweight geriatric.pregnancy
## 1                   NA              NA               0               FALSE
## 2                    0             178               0               FALSE
## 3                    0             170               0               FALSE
## 4                    0             175               0                TRUE
## 5                    7             179               0               FALSE
## 6                   25              NA               1               FALSE
## 7                   25             181               0               FALSE
## 8                   25             175               0               FALSE
## 9                   50             194               0               FALSE
## 10                  50             180               0               FALSE
## 11                   0             179               0               FALSE
## 12                   0             173               0                TRUE
## 13                  25             185               0               FALSE
## 14                   0             183               0               FALSE
## 15                   0             183               0               FALSE
## 16                  12             185               0               FALSE
## 17                   0             173               0               FALSE
## 18                  25             170               0               FALSE
## 19                   2             181               0               FALSE
## 20                  25             180               0               FALSE
## 21                   7             172               0               FALSE
## 22                  50             186               1               FALSE
## 23                  50             178               0               FALSE
## 24                   0             171               0               FALSE
## 25                  NA              NA               0               FALSE
## 26                  35             179               0               FALSE
## 27                  12             176               0               FALSE
## 28                  35             185               1               FALSE
## 29                  NA             182               0               FALSE
## 30                  25             183               0               FALSE
## 31                  25             185               0                TRUE
## 32                  25             173               1                TRUE
## 33                   0             181               0               FALSE
## 34                  NA              NA               0               FALSE
## 35                   0             184               0               FALSE
## 36                  25             185               0               FALSE
## 37                  35             180               1               FALSE
## 38                   0             169               1               FALSE
## 39                  NA              NA               0               FALSE
## 40                  50             191               0               FALSE
## 41                   0             183               0               FALSE
## 42                  25             189               0               FALSE

Converting the dates (currently stored as characters) to a more usable format is not as simple, and will require a function.

R is filled with functions. We have already used a few: read.csv(), class(), dim(), mean(), colnames(), as.logical(), and as.numeric(). To find the documentation on a function, use the ? character. This opens a page containing details on the function in the help pane.

Custom functions

What if we want to do this for all three fields, but we don’t want to write the code three times?

Take a few minutes to dissect the following function, working from the inside out. What does each piece do?

# custom function takes a vector of dates and returns a data frame with columns day, month, and year
split_MMDDYYYY <- function(date_vector){
  date_list = lapply(seq(1:3), function(i){
    as.integer(sapply(strsplit(date_vector, split = "/"), '[[', i))
  })
  names(date_list) = c("month", "day", "year")
  as.data.frame(do.call("cbind", date_list))
}
split_MMDDYYYY(birthweight$birth.date)
##    month day year
## 1      1  25 1967
## 2      2   6 1967
## 3      2  14 1967
## 4      3   9 1967
## 5      3  13 1967
## 6      3  23 1967
## 7      4  23 1967
## 8      5   5 1967
## 9      6   4 1967
## 10     6   7 1967
## 11     6  14 1967
## 12     6  20 1967
## 13     6  25 1967
## 14     7  12 1967
## 15     7  13 1967
## 16     9   7 1967
## 17    10   7 1967
## 18    10  19 1967
## 19    11   1 1967
## 20    12   7 1967
## 21    12  14 1967
## 22     1   8 1968
## 23     1  10 1968
## 24     1  21 1968
## 25     2   2 1968
## 26     2  16 1968
## 27     2  22 1968
## 28     4   2 1968
## 29     4  24 1968
## 30     4  25 1968
## 31     6  19 1968
## 32     7  18 1968
## 33     7  24 1968
## 34     8  12 1968
## 35     8  17 1968
## 36     9   7 1968
## 37     9  16 1968
## 38     9  27 1968
## 39    10   9 1968
## 40    10  25 1968
## 41    12  11 1968
## 42    12  19 1968
birthweight <- cbind(birthweight, split_MMDDYYYY(birthweight$birth.date))

The cbind() function combines objects by column. Now that we have the birth month in its own column, we can get the mean of birth weight by month.

tapply(birthweight$birthweight, birthweight$month, mean)
##        1        2        3        4        5        6        7        8 
## 3.215000 3.600000 2.933333 3.062500 2.920000 3.273333 3.105000 3.630000 
##        9       10       11       12 
## 2.965000 3.395000 3.410000 4.035000

Merging data frames

For this example experiment, we also have placental miRNA expression data. These values are not from real samples, but have been manipulated to roughly reproduce the effect observed by Maccani et al. 2010.

download.file("https://raw.githubusercontent.com/ucdavis-bioinformatics-training/2022_February_Introduction_to_R_for_Bioinformatics/main/miRNA.csv", "miRNA.csv")
mir <- read.csv("miRNA.csv", row.names = 1)
mir
##          sample.27 sample.1522 sample.569 sample.365 sample.1369 sample.1023
## miR-16          46          56         47         54          56          59
## miR-21          52          43         40         35          59          47
## miR-146a        98          97         87         96          84          96
## miR-182         53          45         63         41          46          50
##          sample.1272 sample.1262 sample.575 sample.792 sample.752 sample.619
## miR-16            49          55         62         63         46         52
## miR-21            42          45         55         45         42         43
## miR-146a          88          97         96        104        103         92
## miR-182           49          50         62         51         64         58
##          sample.1764 sample.516 sample.272 sample.1388 sample.1363 sample.300
## miR-16            46         61         49          46          61         60
## miR-21            40         51         43          44          47         48
## miR-146a          98         97         91         105          77         89
## miR-182           57         59         55          60          60         65
##          sample.57 sample.431 sample.532 sample.223 sample.1107 sample.697
## miR-16          46         70         60         60          57         68
## miR-21          39         51         44         46          49         47
## miR-146a       105         84         94         87         116         98
## miR-182         40         48         49         52          48         57
##          sample.1683 sample.808 sample.822 sample.1016 sample.820 sample.1081
## miR-16            49         59         54          69         58          55
## miR-21            48         56         52          41         55          52
## miR-146a          98        101         86          98        102          93
## miR-182           55         74         49          51         53          52
##          sample.321 sample.1636 sample.1360 sample.1058 sample.755 sample.462
## miR-16           68          63          70          77         56         65
## miR-21           46          39          57          55         46         58
## miR-146a        125         104         111         124        101        101
## miR-182          60          43          46          56         50         60
##          sample.1088 sample.553 sample.1191 sample.1313 sample.1600 sample.1187
## miR-16            42         63          66          64          50          57
## miR-21            54         54          48          47          44          46
## miR-146a         107        106         102         104         111          86
## miR-182           63         60          50          42          67          43

In this object, the rows are the gene identifiers, and the columns are the samples.

mir <- as.data.frame(t(mir))
mir$ID <- gsub("sample.", "", rownames(mir))
experiment <- merge(birthweight, mir)

What did these lines of code do? Explore each of them, looking up the function help using ? as necessary.

experiment
##      ID birth.date    location length birthweight head.circumference
## 1    27   3/9/1967 Silver Hill     53        3.55                 37
## 2    57  8/12/1968    Memorial     51        3.32                 38
## 3   223 12/11/1968     General     50        3.87                 33
## 4   272  1/10/1968    Memorial     52        3.86                 36
## 5   300  7/18/1968 Silver Hill     46        2.05                 32
## 6   321  1/21/1968 Silver Hill     48        3.11                 33
## 7   365  4/23/1967    Memorial     52        3.53                 37
## 8   431  9/16/1968 Silver Hill     48        1.92                 30
## 9   462  6/19/1968 Silver Hill     58        4.10                 39
## 10  516   1/8/1968 Silver Hill     47        2.66                 33
## 11  532 10/25/1968     General     53        3.59                 34
## 12  553  8/17/1968 Silver Hill     54        3.94                 37
## 13  569  3/23/1967    Memorial     50        2.51                 35
## 14  575  7/12/1967    Memorial     50        2.78                 30
## 15  619  11/1/1967    Memorial     52        3.41                 33
## 16  697   2/6/1967 Silver Hill     48        3.03                 35
## 17  752 10/19/1967     General     49        3.32                 36
## 18  755  4/25/1968    Memorial     53        3.20                 33
## 19  792   9/7/1967    Memorial     53        3.64                 38
## 20  808   5/5/1967 Silver Hill     48        2.92                 33
## 21  820  10/7/1967     General     52        3.77                 34
## 22  822  6/14/1967    Memorial     50        3.42                 35
## 23 1016  7/13/1967 Silver Hill     53        4.32                 36
## 24 1023   6/7/1967    Memorial     52        3.00                 35
## 25 1058  4/24/1968 Silver Hill     53        3.15                 34
## 26 1081 12/14/1967 Silver Hill     54        3.63                 38
## 27 1088  7/24/1968     General     51        3.27                 36
## 28 1107  1/25/1967     General     52        3.23                 36
## 29 1187 12/19/1968 Silver Hill     53        4.07                 38
## 30 1191   9/7/1968     General     53        3.65                 33
## 31 1262  6/25/1967 Silver Hill     53        3.19                 34
## 32 1272  6/20/1967    Memorial     53        2.75                 32
## 33 1313  9/27/1968 Silver Hill     43        2.65                 32
## 34 1360  2/16/1968     General     56        4.55                 34
## 35 1363   4/2/1968     General     48        2.37                 30
## 36 1369   6/4/1967 Silver Hill     49        3.18                 34
## 37 1388  2/22/1968    Memorial     51        3.14                 33
## 38 1522  3/13/1967    Memorial     50        2.74                 33
## 39 1600  10/9/1968     General     53        2.90                 34
## 40 1636   2/2/1968 Silver Hill     51        3.93                 38
## 41 1683  2/14/1967 Silver Hill     53        3.35                 33
## 42 1764  12/7/1967 Silver Hill     58        4.57                 39
##    weeks.gestation smoker maternal.age maternal.cigarettes maternal.height
## 1               41   TRUE           37                  25             161
## 2               39   TRUE           23                  17             157
## 3               45   TRUE           28                  25             163
## 4               39   TRUE           30                  25             170
## 5               35   TRUE           41                   7             166
## 6               37  FALSE           28                   0             158
## 7               40   TRUE           26                  25             170
## 8               33   TRUE           20                   7             161
## 9               41  FALSE           35                   0             172
## 10              35   TRUE           20                  35             170
## 11              40   TRUE           31                  12             163
## 12              42  FALSE           24                   0             175
## 13              39   TRUE           22                   7             159
## 14              37   TRUE           19                   7             165
## 15              39   TRUE           23                  25             181
## 16              39  FALSE           27                   0             162
## 17              40   TRUE           27                  12             152
## 18              41  FALSE           21                   0             155
## 19              40   TRUE           20                   2             170
## 20              34  FALSE           26                   0             167
## 21              40  FALSE           24                   0             157
## 22              38  FALSE           20                   0             157
## 23              40  FALSE           19                   0             171
## 24              38   TRUE           30                  12             165
## 25              40  FALSE           29                   0             167
## 26              38  FALSE           18                   0             172
## 27              40  FALSE           24                   0             168
## 28              38  FALSE           31                   0             164
## 29              44  FALSE           20                   0             174
## 30              42  FALSE           21                   0             165
## 31              41   TRUE           27                  35             163
## 32              40   TRUE           37                  50             168
## 33              33  FALSE           24                   0             149
## 34              44  FALSE           20                   0             162
## 35              37   TRUE           20                   7             163
## 36              38   TRUE           31                  25             162
## 37              41   TRUE           22                   7             160
## 38              39   TRUE           21                  17             156
## 39              39  FALSE           19                   0             165
## 40              38  FALSE           29                   0             165
## 41              41  FALSE           27                   0             164
## 42              41   TRUE           32                  12             173
##    maternal.prepregnant.weight paternal.age paternal.education
## 1                           66           46                 NA
## 2                           48           NA                 NA
## 3                           54           30                 16
## 4                           78           40                 16
## 5                           57           37                 14
## 6                           54           39                 10
## 7                           62           30                 10
## 8                           50           20                 10
## 9                           58           31                 16
## 10                          57           23                 12
## 11                          49           41                 12
## 12                          66           30                 12
## 13                          52           23                 14
## 14                          60           20                 14
## 15                          69           23                 16
## 16                          62           27                 14
## 17                          48           37                 12
## 18                          55           25                 14
## 19                          59           24                 12
## 20                          64           25                 12
## 21                          50           31                 16
## 22                          48           22                 14
## 23                          62           19                 12
## 24                          64           38                 14
## 25                          60           30                 16
## 26                          50           20                 12
## 27                          53           29                 16
## 28                          57           NA                 NA
## 29                          68           26                 14
## 30                          61           21                 10
## 31                          51           31                 16
## 32                          61           31                 16
## 33                          45           26                 16
## 34                          57           23                 10
## 35                          47           20                 10
## 36                          57           32                 16
## 37                          53           24                 16
## 38                          53           24                 12
## 39                          57           NA                 NA
## 40                          61           NA                 NA
## 41                          62           37                 14
## 42                          70           38                 14
##    paternal.cigarettes paternal.height low.birthweight geriatric.pregnancy
## 1                    0             175               0                TRUE
## 2                   NA              NA               0               FALSE
## 3                    0             183               0               FALSE
## 4                   50             178               0               FALSE
## 5                   25             173               1                TRUE
## 6                    0             171               0               FALSE
## 7                   25             181               0               FALSE
## 8                   35             180               1               FALSE
## 9                   25             185               0                TRUE
## 10                  50             186               1               FALSE
## 11                  50             191               0               FALSE
## 12                   0             184               0               FALSE
## 13                  25              NA               1               FALSE
## 14                   0             183               0               FALSE
## 15                   2             181               0               FALSE
## 16                   0             178               0               FALSE
## 17                  25             170               0               FALSE
## 18                  25             183               0               FALSE
## 19                  12             185               0               FALSE
## 20                  25             175               0               FALSE
## 21                   0             173               0               FALSE
## 22                   0             179               0               FALSE
## 23                   0             183               0               FALSE
## 24                  50             180               0               FALSE
## 25                  NA             182               0               FALSE
## 26                   7             172               0               FALSE
## 27                   0             181               0               FALSE
## 28                  NA              NA               0               FALSE
## 29                  25             189               0               FALSE
## 30                  25             185               0               FALSE
## 31                  25             185               0               FALSE
## 32                   0             173               0                TRUE
## 33                   0             169               1               FALSE
## 34                  35             179               0               FALSE
## 35                  35             185               1               FALSE
## 36                  50             194               0               FALSE
## 37                  12             176               0               FALSE
## 38                   7             179               0               FALSE
## 39                  NA              NA               0               FALSE
## 40                  NA              NA               0               FALSE
## 41                   0             170               0               FALSE
## 42                  25             180               0               FALSE
##    month day year miR-16 miR-21 miR-146a miR-182
## 1      3   9 1967     46     52       98      53
## 2      8  12 1968     46     39      105      40
## 3     12  11 1968     60     46       87      52
## 4      1  10 1968     49     43       91      55
## 5      7  18 1968     60     48       89      65
## 6      1  21 1968     68     46      125      60
## 7      4  23 1967     54     35       96      41
## 8      9  16 1968     70     51       84      48
## 9      6  19 1968     65     58      101      60
## 10     1   8 1968     61     51       97      59
## 11    10  25 1968     60     44       94      49
## 12     8  17 1968     63     54      106      60
## 13     3  23 1967     47     40       87      63
## 14     7  12 1967     62     55       96      62
## 15    11   1 1967     52     43       92      58
## 16     2   6 1967     68     47       98      57
## 17    10  19 1967     46     42      103      64
## 18     4  25 1968     56     46      101      50
## 19     9   7 1967     63     45      104      51
## 20     5   5 1967     59     56      101      74
## 21    10   7 1967     58     55      102      53
## 22     6  14 1967     54     52       86      49
## 23     7  13 1967     69     41       98      51
## 24     6   7 1967     59     47       96      50
## 25     4  24 1968     77     55      124      56
## 26    12  14 1967     55     52       93      52
## 27     7  24 1968     42     54      107      63
## 28     1  25 1967     57     49      116      48
## 29    12  19 1968     57     46       86      43
## 30     9   7 1968     66     48      102      50
## 31     6  25 1967     55     45       97      50
## 32     6  20 1967     49     42       88      49
## 33     9  27 1968     64     47      104      42
## 34     2  16 1968     70     57      111      46
## 35     4   2 1968     61     47       77      60
## 36     6   4 1967     56     59       84      46
## 37     2  22 1968     46     44      105      60
## 38     3  13 1967     56     43       97      45
## 39    10   9 1968     50     44      111      67
## 40     2   2 1968     63     39      104      43
## 41     2  14 1967     49     48       98      55
## 42    12   7 1967     46     40       98      57

reference : https://ucdavis-bioinformatics-training.github.io/2022_February_Introduction_to_R_for_Bioinformatics/basic-data-types.html#coercion-converting-between-classes