#Computations in R
#Sum 2 and 3 using the + operator. (5 pt)
sum <- 2 + 3
print(sum)
## [1] 5
#Assign the sum of 2, 3, and 4 to variable x. (5 pt)
x <- (2+3+4)
print(x)
## [1] 9
#Find the absolute value of the expression 5 - 145 using abs(). (5 pt)
abs(5-145)
## [1] 140
#Data Structures in R
#Make a vector of 1, 2, 3, 5, and 10 using c() and assign it to the variable vec. (5 pt)
vec <- c(1, 2, 3, 5, 10)
print(vec)
## [1] 1 2 3 5 10
#Make a logical vector with TRUE, FALSE values of length 4 using c(). (5 pt)
logvec <- c(TRUE, FALSE, FALSE, FALSE)
print(logvec)
## [1] TRUE FALSE FALSE FALSE
#Subset vec to get the 5th element using []. (5 pt)
vec[5]
## [1] 10
#Matrices in R
#Make a 5x3 matrix (5 rows, 3 columns) using matrix(), named mat. (5 pt)
x <- c(1, 5, 3, 8, 9)
y <- c(3, 7, 0, 12, 5)
z <- c(2, 5, 8, 1, 31)
mat <- cbind(x, y, z)
mat
## x y z
## [1,] 1 3 2
## [2,] 5 7 5
## [3,] 3 0 8
## [4,] 8 12 1
## [5,] 9 5 31
#Extract the first 3 columns and first 3 rows of your matrix using []. (5 pt)
mat[1:3, 1:3]
## x y z
## [1,] 1 3 2
## [2,] 5 7 5
## [3,] 3 0 8
#Data Frames in R
#Make a data frame named df with 3 columns and 5 rows: (10 pt)
#First column: Sequence of numbers 1:5.
#Second column: A character vector.
num <- c(1:5)
chr <- c("chr1", "chr2", "chr3", "chr4", "chr5")
strand <- c("+", "-", "-", "+", "+")
df <- data.frame(num, chr, strand)
df
## num chr strand
## 1 1 chr1 +
## 2 2 chr2 -
## 3 3 chr3 -
## 4 4 chr4 +
## 5 5 chr5 +
#Subset the data frame df you created to extract only rows where the first column is greater than 3. Name the subset subset_df. (5 pt)
subset_df <- df[df$num > 3, ]
subset_df
## num chr strand
## 4 4 chr4 +
## 5 5 chr5 +
#Lists in R
#Make a list called my_list with 4 elements using list(). (5 pt)
my_list <- list(nums = c(3, 6, 7, 2), matrix = matrix(1:10, ncol = 5), name = "Susan", height = 5.6)
my_list
## $nums
## [1] 3 6 7 2
##
## $matrix
## [,1] [,2] [,3] [,4] [,5]
## [1,] 1 3 5 7 9
## [2,] 2 4 6 8 10
##
## $name
## [1] "Susan"
##
## $height
## [1] 5.6
#Select the 1st element of your list using $ notation.(5 pt)
my_list$nums
## [1] 3 6 7 2
#Reading and Writing Data
#Read CpG island data using read.table() with appropriate arguments. (5 pt)
#To get the data, download the tar.gz file here: https://github.com/compgenomr/compGenomRData/releases/tag/v1.0
#Then locate the file by navigating > compGenomRData-1.0 > inst > extdata > cpgi.hg19.chr21.bed
current_directory <- getwd()
print(current_directory)
## [1] "/Users/jourdanh/Documents/BIN620 Biological Sequence Analysis/Week 2"
list.files()
## [1] "~$Lecture_2_Introduction_to_R.pptx" "cpgi.hg19.chr21.bed"
## [3] "homework2.Rmd" "Lecture_2_Introduction_to_R.pptx"
## [5] "Lecture_2_Study_Guide.docx"
cpgi.df <- read.table("cpgi.hg19.chr21.bed", header = FALSE)
cpgi.df
## V1 V2 V3 V4
## 1 chr21 9825442 9826296 CpG:_120
## 2 chr21 9909011 9909218 CpG:_20
## 3 chr21 9968264 9968620 CpG:_30
## 4 chr21 10989913 10991413 CpG:_152
## 5 chr21 14409412 14410501 CpG:_73
## 6 chr21 15351908 15352658 CpG:_61
## 7 chr21 15588439 15588908 CpG:_38
## 8 chr21 15755371 15756086 CpG:_50
## 9 chr21 16436496 16437864 CpG:_148
## 10 chr21 17101853 17103000 CpG:_123
## 11 chr21 18884806 18886111 CpG:_138
## 12 chr21 18984535 18985697 CpG:_154
## 13 chr21 19191095 19191952 CpG:_108
## 14 chr21 19617098 19617874 CpG:_77
## 15 chr21 26934423 26934805 CpG:_51
## 16 chr21 26979577 26980252 CpG:_60
## 17 chr21 27011624 27012398 CpG:_64
## 18 chr21 27106814 27108211 CpG:_112
## 19 chr21 27541893 27543524 CpG:_168
## 20 chr21 27945010 27945646 CpG:_61
## 21 chr21 28216558 28218117 CpG:_157
## 22 chr21 28337856 28340237 CpG:_193
## 23 chr21 30257425 30257771 CpG:_40
## 24 chr21 30364965 30365342 CpG:_33
## 25 chr21 30391264 30391758 CpG:_44
## 26 chr21 30396848 30397410 CpG:_40
## 27 chr21 30445575 30446255 CpG:_59
## 28 chr21 30670744 30671786 CpG:_119
## 29 chr21 31311386 31312106 CpG:_61
## 30 chr21 32624144 32624382 CpG:_24
## 31 chr21 32929927 32932017 CpG:_249
## 32 chr21 33031734 33032657 CpG:_91
## 33 chr21 33103456 33104975 CpG:_163
## 34 chr21 33244910 33246609 CpG:_169
## 35 chr21 33650908 33651513 CpG:_63
## 36 chr21 33765013 33765300 CpG:_32
## 37 chr21 33765512 33765752 CpG:_21
## 38 chr21 33783914 33785717 CpG:_173
## 39 chr21 33984367 33985450 CpG:_93
## 40 chr21 34099716 34100796 CpG:_109
## 41 chr21 34143453 34144141 CpG:_100
## 42 chr21 34395128 34400245 CpG:_348
## 43 chr21 34442206 34444543 CpG:_202
## 44 chr21 34602241 34603265 CpG:_97
## 45 chr21 34638515 34639075 CpG:_50
## 46 chr21 34696532 34697750 CpG:_110
## 47 chr21 34775437 34776493 CpG:_99
## 48 chr21 34851229 34852702 CpG:_149
## 49 chr21 34863644 34864268 CpG:_54
## 50 chr21 34914303 34915906 CpG:_124
## 51 chr21 34960388 34961252 CpG:_93
## 52 chr21 35013876 35014257 CpG:_33
## 53 chr21 35014823 35015792 CpG:_105
## 54 chr21 35287803 35288208 CpG:_36
## 55 chr21 35445087 35446013 CpG:_96
## 56 chr21 35747326 35748122 CpG:_73
## 57 chr21 35831697 35832365 CpG:_57
## 58 chr21 35986623 35987798 CpG:_134
## 59 chr21 36041305 36043224 CpG:_204
## 60 chr21 36164144 36164934 CpG:_89
## 61 chr21 36258952 36259472 CpG:_53
## 62 chr21 36260450 36263687 CpG:_306
## 63 chr21 37432123 37433150 CpG:_90
## 64 chr21 37442140 37442778 CpG:_70
## 65 chr21 37507198 37508259 CpG:_106
## 66 chr21 37528205 37529561 CpG:_118
## 67 chr21 37617467 37617872 CpG:_30
## 68 chr21 37692208 37693059 CpG:_84
## 69 chr21 37757496 37758113 CpG:_68
## 70 chr21 38068193 38073891 CpG:_438
## 71 chr21 38079941 38081833 CpG:_153
## 72 chr21 38119793 38120742 CpG:_84
## 73 chr21 38338384 38338904 CpG:_55
## 74 chr21 38352856 38353274 CpG:_43
## 75 chr21 38362015 38362868 CpG:_103
## 76 chr21 38377937 38379437 CpG:_149
## 77 chr21 38444860 38446497 CpG:_174
## 78 chr21 38592987 38593674 CpG:_62
## 79 chr21 38639235 38640491 CpG:_121
## 80 chr21 39287946 39289267 CpG:_107
## 81 chr21 40032243 40033665 CpG:_140
## 82 chr21 40177002 40178667 CpG:_152
## 83 chr21 40194644 40194849 CpG:_21
## 84 chr21 40554764 40556117 CpG:_124
## 85 chr21 40684370 40686324 CpG:_217
## 86 chr21 40720185 40721625 CpG:_125
## 87 chr21 40752087 40752825 CpG:_64
## 88 chr21 40817302 40817973 CpG:_80
## 89 chr21 41516237 41516472 CpG:_18
## 90 chr21 42218489 42219222 CpG:_66
## 91 chr21 42539367 42540872 CpG:_149
## 92 chr21 42617817 42618020 CpG:_16
## 93 chr21 42688746 42689085 CpG:_29
## 94 chr21 42792293 42792704 CpG:_29
## 95 chr21 42798146 42798884 CpG:_67
## 96 chr21 42878751 42880674 CpG:_152
## 97 chr21 43160916 43161948 CpG:_81
## 98 chr21 43176662 43176894 CpG:_17
## 99 chr21 43186280 43187436 CpG:_120
## 100 chr21 43221474 43221842 CpG:_24
## 101 chr21 43230458 43230716 CpG:_22
## 102 chr21 43236025 43236510 CpG:_37
## 103 chr21 43240351 43240586 CpG:_16
## 104 chr21 43242237 43242468 CpG:_18
## 105 chr21 43255476 43255722 CpG:_28
## 106 chr21 43274625 43274913 CpG:_25
## 107 chr21 43327734 43328024 CpG:_25
## 108 chr21 43373135 43374062 CpG:_98
## 109 chr21 43429279 43431489 CpG:_249
## 110 chr21 43547871 43548089 CpG:_16
## 111 chr21 43638948 43640132 CpG:_113
## 112 chr21 43916278 43916900 CpG:_58
## 113 chr21 44073201 44074650 CpG:_114
## 114 chr21 44106168 44106546 CpG:_36
## 115 chr21 44299111 44299808 CpG:_78
## 116 chr21 44312913 44313860 CpG:_95
## 117 chr21 44394084 44395850 CpG:_168
## 118 chr21 44473839 44474083 CpG:_20
## 119 chr21 44480559 44480772 CpG:_19
## 120 chr21 44486140 44486465 CpG:_28
## 121 chr21 44494624 44496989 CpG:_232
## 122 chr21 44515262 44515560 CpG:_23
## 123 chr21 44527163 44528066 CpG:_95
## 124 chr21 44590612 44590826 CpG:_19
## 125 chr21 44592182 44592384 CpG:_18
## 126 chr21 44592719 44593643 CpG:_69
## 127 chr21 44836730 44837207 CpG:_40
## 128 chr21 44840119 44840450 CpG:_27
## 129 chr21 45077671 45079821 CpG:_167
## 130 chr21 45113961 45114264 CpG:_21
## 131 chr21 45114398 45114604 CpG:_17
## 132 chr21 45138293 45139831 CpG:_156
## 133 chr21 45177520 45177727 CpG:_14
## 134 chr21 45195887 45196755 CpG:_94
## 135 chr21 45209221 45209880 CpG:_83
## 136 chr21 45223462 45223724 CpG:_19
## 137 chr21 45379516 45379816 CpG:_24
## 138 chr21 45402484 45402842 CpG:_28
## 139 chr21 45431646 45432978 CpG:_151
## 140 chr21 45526956 45527813 CpG:_108
## 141 chr21 45553021 45554037 CpG:_100
## 142 chr21 45660557 45663172 CpG:_233
## 143 chr21 45678290 45678544 CpG:_24
## 144 chr21 45705428 45706044 CpG:_66
## 145 chr21 45713509 45713813 CpG:_30
## 146 chr21 45719515 45720564 CpG:_119
## 147 chr21 45744313 45744847 CpG:_37
## 148 chr21 45758975 45759525 CpG:_60
## 149 chr21 45789090 45789373 CpG:_21
## 150 chr21 45811345 45811550 CpG:_20
## 151 chr21 45844992 45845693 CpG:_49
## 152 chr21 45875391 45877326 CpG:_131
## 153 chr21 45926531 45926754 CpG:_19
## 154 chr21 46221388 46222456 CpG:_113
## 155 chr21 46236726 46238958 CpG:_217
## 156 chr21 46292709 46294744 CpG:_204
## 157 chr21 46306508 46306795 CpG:_22
## 158 chr21 46321397 46321644 CpG:_18
## 159 chr21 46359647 46360423 CpG:_90
## 160 chr21 46387051 46387302 CpG:_18
## 161 chr21 46410338 46410661 CpG:_31
## 162 chr21 46494173 46495472 CpG:_142
## 163 chr21 46685115 46685379 CpG:_24
## 164 chr21 46686907 46687121 CpG:_22
## 165 chr21 46696972 46697214 CpG:_24
## 166 chr21 46707629 46708751 CpG:_96
## 167 chr21 46714984 46715206 CpG:_16
## 168 chr21 46824531 46826234 CpG:_197
## 169 chr21 46894816 46896312 CpG:_108
## 170 chr21 46897841 46898158 CpG:_24
## 171 chr21 46912381 46912628 CpG:_21
## 172 chr21 46929225 46930211 CpG:_80
## 173 chr21 46932081 46932387 CpG:_28
## 174 chr21 46950586 46952286 CpG:_132
## 175 chr21 46957315 46957916 CpG:_46
## 176 chr21 46961668 46963078 CpG:_144
## 177 chr21 47404108 47404312 CpG:_26
## 178 chr21 47421135 47421398 CpG:_23
## 179 chr21 47421844 47423883 CpG:_160
## 180 chr21 47517651 47518999 CpG:_150
## 181 chr21 47531899 47532821 CpG:_76
## 182 chr21 47545187 47546212 CpG:_88
## 183 chr21 47548938 47549427 CpG:_40
## 184 chr21 47551837 47552629 CpG:_83
## 185 chr21 47565285 47566345 CpG:_78
## 186 chr21 47580492 47582065 CpG:_148
## 187 chr21 47602431 47602740 CpG:_28
## 188 chr21 47648191 47649622 CpG:_141
## 189 chr21 47705627 47706428 CpG:_96
## 190 chr21 47717235 47718059 CpG:_84
## 191 chr21 47742779 47743269 CpG:_37
## 192 chr21 47743685 47745351 CpG:_175
## 193 chr21 47808671 47809121 CpG:_34
## 194 chr21 47811130 47811351 CpG:_18
## 195 chr21 47831533 47831931 CpG:_35
## 196 chr21 47832879 47833257 CpG:_30
## 197 chr21 47845595 47845864 CpG:_20
## 198 chr21 47851448 47851683 CpG:_18
## 199 chr21 47855892 47856143 CpG:_20
## 200 chr21 47878070 47879576 CpG:_167
## 201 chr21 47918497 47918728 CpG:_19
## 202 chr21 48018542 48018791 CpG:_21
## 203 chr21 48055199 48056060 CpG:_88
## 204 chr21 48068517 48068808 CpG:_24
## 205 chr21 48081241 48081849 CpG:_55
#Plotting in R
#Make a scatter plot using vectors named x1 and y1. Each vector should have 10 numbers (either random, consecutive, or some other organization, provided they aren’t too far apart in value). Add a title, x-axis label, and y-axis label. (5 pt)
x1 <- c(1:10)
y1 <- c(4:13)
plot(x1, y1, main = "Scatterplot of Vectors", xlab = "x values", ylab = "y values")
#Modify your scatter plot from Question 14 to use the pch argument to change the point style and the col argument to change the colors. Hint: Look up the documentation for the pch argument using ?plot in RStudio. (5 pt)
x1 <- c(1:10)
y1 <- c(4:13)
plot(x1, y1, main = "Scatterplot of Vectors", xlab = "x values", ylab = "y values", pch = 2, col = "red")
#Make a histogram of the previously defined vector x1 using hist() and customize it with col, xlab, ylab, and main. (5 pt)
x <- rnorm(x1)
hist(x, main = "Histogram of x", xlab = "x", ylab = "Frequency", col = "blue")
#Functions and Control Structures
#Write a function called classify_gc to classify GC percentages as low (< 60), medium (60–75), or high (> 75). (10 pt)
gc_values <- c(55, 87, 72, 82, 63, 48, 59)
classify_gc <- function(gc) {
for (value in gc_values) {
if (value < 60) {
cat("GC percentage is low\n")
} else if (value >= 60 & value <75) {
cat("GC percentage is medium\n")
} else {
cat("GC percentage is high\n")
}
}
}
classify_gc(gc_values)
## GC percentage is low
## GC percentage is high
## GC percentage is medium
## GC percentage is high
## GC percentage is medium
## GC percentage is low
## GC percentage is low
#Use lapply to apply your GC classification function to a vector of GC percentages. You will need to create the GC vector using several possible percentages (I.E. 55, 65, 85) and name the vector gc_values. (5 pt)
class_gc <- function(gc) {
if (gc < 60) {
return("GC percentage is low\n")
} else if (gc >= 60 & gc <75) {
return("GC percentage is medium \n")
} else {
return("GC percentage is high \n")
}
}
gc_values <- c(55, 87, 72, 82, 63, 48, 59)
classified_gc <- lapply(gc_values, class_gc)
names(classified_gc) <- gc_values
classified_gc
## $`55`
## [1] "GC percentage is low\n"
##
## $`87`
## [1] "GC percentage is high \n"
##
## $`72`
## [1] "GC percentage is medium \n"
##
## $`82`
## [1] "GC percentage is high \n"
##
## $`63`
## [1] "GC percentage is medium \n"
##
## $`48`
## [1] "GC percentage is low\n"
##
## $`59`
## [1] "GC percentage is low\n"