#Computations in R

#Sum 2 and 3 using the + operator. (5 pt)

sum <- 2 + 3
print(sum)
## [1] 5
#Assign the sum of 2, 3, and 4 to variable x. (5 pt)
x <- (2+3+4)
print(x)
## [1] 9
#Find the absolute value of the expression 5 - 145 using abs(). (5 pt)
abs(5-145)
## [1] 140

#Data Structures in R

#Make a vector of 1, 2, 3, 5, and 10 using c() and assign it to the variable vec. (5 pt)
vec <- c(1, 2, 3, 5, 10)
print(vec)
## [1]  1  2  3  5 10
#Make a logical vector with TRUE, FALSE values of length 4 using c(). (5 pt)
logvec <- c(TRUE, FALSE, FALSE, FALSE)
print(logvec)
## [1]  TRUE FALSE FALSE FALSE
#Subset vec to get the 5th element using []. (5 pt)
vec[5]
## [1] 10

#Matrices in R

#Make a 5x3 matrix (5 rows, 3 columns) using matrix(), named mat. (5 pt)
x <- c(1, 5, 3, 8, 9)
y <- c(3, 7, 0, 12, 5)
z <- c(2, 5, 8, 1, 31)
mat <- cbind(x, y, z)
mat
##      x  y  z
## [1,] 1  3  2
## [2,] 5  7  5
## [3,] 3  0  8
## [4,] 8 12  1
## [5,] 9  5 31
#Extract the first 3 columns and first 3 rows of your matrix using []. (5 pt)
mat[1:3, 1:3]
##      x y z
## [1,] 1 3 2
## [2,] 5 7 5
## [3,] 3 0 8

#Data Frames in R

#Make a data frame named df with 3 columns and 5 rows: (10 pt)
  #First column: Sequence of numbers 1:5.
  #Second column: A character vector.
num <- c(1:5)
chr <- c("chr1", "chr2", "chr3", "chr4", "chr5")
strand <- c("+", "-", "-", "+", "+")
df <- data.frame(num, chr, strand)
df
##   num  chr strand
## 1   1 chr1      +
## 2   2 chr2      -
## 3   3 chr3      -
## 4   4 chr4      +
## 5   5 chr5      +
#Subset the data frame df you created to extract only rows where the first column is greater than 3. Name the subset subset_df. (5 pt)

subset_df <- df[df$num > 3, ]
subset_df
##   num  chr strand
## 4   4 chr4      +
## 5   5 chr5      +

#Lists in R

#Make a list called my_list with 4 elements using list(). (5 pt)
my_list <- list(nums = c(3, 6, 7, 2), matrix = matrix(1:10, ncol = 5), name = "Susan", height = 5.6)
my_list
## $nums
## [1] 3 6 7 2
## 
## $matrix
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    1    3    5    7    9
## [2,]    2    4    6    8   10
## 
## $name
## [1] "Susan"
## 
## $height
## [1] 5.6
#Select the 1st element of your list using $ notation.(5 pt)
my_list$nums
## [1] 3 6 7 2

#Reading and Writing Data

#Read CpG island data using read.table() with appropriate arguments. (5 pt)
#To get the data, download the tar.gz file here: https://github.com/compgenomr/compGenomRData/releases/tag/v1.0 
#Then locate the file by navigating > compGenomRData-1.0 > inst > extdata > cpgi.hg19.chr21.bed
current_directory <- getwd()
print(current_directory)
## [1] "/Users/jourdanh/Documents/BIN620 Biological Sequence Analysis/Week 2"
list.files()
## [1] "~$Lecture_2_Introduction_to_R.pptx" "cpgi.hg19.chr21.bed"               
## [3] "homework2.Rmd"                      "Lecture_2_Introduction_to_R.pptx"  
## [5] "Lecture_2_Study_Guide.docx"
cpgi.df <- read.table("cpgi.hg19.chr21.bed", header = FALSE)
cpgi.df
##        V1       V2       V3       V4
## 1   chr21  9825442  9826296 CpG:_120
## 2   chr21  9909011  9909218  CpG:_20
## 3   chr21  9968264  9968620  CpG:_30
## 4   chr21 10989913 10991413 CpG:_152
## 5   chr21 14409412 14410501  CpG:_73
## 6   chr21 15351908 15352658  CpG:_61
## 7   chr21 15588439 15588908  CpG:_38
## 8   chr21 15755371 15756086  CpG:_50
## 9   chr21 16436496 16437864 CpG:_148
## 10  chr21 17101853 17103000 CpG:_123
## 11  chr21 18884806 18886111 CpG:_138
## 12  chr21 18984535 18985697 CpG:_154
## 13  chr21 19191095 19191952 CpG:_108
## 14  chr21 19617098 19617874  CpG:_77
## 15  chr21 26934423 26934805  CpG:_51
## 16  chr21 26979577 26980252  CpG:_60
## 17  chr21 27011624 27012398  CpG:_64
## 18  chr21 27106814 27108211 CpG:_112
## 19  chr21 27541893 27543524 CpG:_168
## 20  chr21 27945010 27945646  CpG:_61
## 21  chr21 28216558 28218117 CpG:_157
## 22  chr21 28337856 28340237 CpG:_193
## 23  chr21 30257425 30257771  CpG:_40
## 24  chr21 30364965 30365342  CpG:_33
## 25  chr21 30391264 30391758  CpG:_44
## 26  chr21 30396848 30397410  CpG:_40
## 27  chr21 30445575 30446255  CpG:_59
## 28  chr21 30670744 30671786 CpG:_119
## 29  chr21 31311386 31312106  CpG:_61
## 30  chr21 32624144 32624382  CpG:_24
## 31  chr21 32929927 32932017 CpG:_249
## 32  chr21 33031734 33032657  CpG:_91
## 33  chr21 33103456 33104975 CpG:_163
## 34  chr21 33244910 33246609 CpG:_169
## 35  chr21 33650908 33651513  CpG:_63
## 36  chr21 33765013 33765300  CpG:_32
## 37  chr21 33765512 33765752  CpG:_21
## 38  chr21 33783914 33785717 CpG:_173
## 39  chr21 33984367 33985450  CpG:_93
## 40  chr21 34099716 34100796 CpG:_109
## 41  chr21 34143453 34144141 CpG:_100
## 42  chr21 34395128 34400245 CpG:_348
## 43  chr21 34442206 34444543 CpG:_202
## 44  chr21 34602241 34603265  CpG:_97
## 45  chr21 34638515 34639075  CpG:_50
## 46  chr21 34696532 34697750 CpG:_110
## 47  chr21 34775437 34776493  CpG:_99
## 48  chr21 34851229 34852702 CpG:_149
## 49  chr21 34863644 34864268  CpG:_54
## 50  chr21 34914303 34915906 CpG:_124
## 51  chr21 34960388 34961252  CpG:_93
## 52  chr21 35013876 35014257  CpG:_33
## 53  chr21 35014823 35015792 CpG:_105
## 54  chr21 35287803 35288208  CpG:_36
## 55  chr21 35445087 35446013  CpG:_96
## 56  chr21 35747326 35748122  CpG:_73
## 57  chr21 35831697 35832365  CpG:_57
## 58  chr21 35986623 35987798 CpG:_134
## 59  chr21 36041305 36043224 CpG:_204
## 60  chr21 36164144 36164934  CpG:_89
## 61  chr21 36258952 36259472  CpG:_53
## 62  chr21 36260450 36263687 CpG:_306
## 63  chr21 37432123 37433150  CpG:_90
## 64  chr21 37442140 37442778  CpG:_70
## 65  chr21 37507198 37508259 CpG:_106
## 66  chr21 37528205 37529561 CpG:_118
## 67  chr21 37617467 37617872  CpG:_30
## 68  chr21 37692208 37693059  CpG:_84
## 69  chr21 37757496 37758113  CpG:_68
## 70  chr21 38068193 38073891 CpG:_438
## 71  chr21 38079941 38081833 CpG:_153
## 72  chr21 38119793 38120742  CpG:_84
## 73  chr21 38338384 38338904  CpG:_55
## 74  chr21 38352856 38353274  CpG:_43
## 75  chr21 38362015 38362868 CpG:_103
## 76  chr21 38377937 38379437 CpG:_149
## 77  chr21 38444860 38446497 CpG:_174
## 78  chr21 38592987 38593674  CpG:_62
## 79  chr21 38639235 38640491 CpG:_121
## 80  chr21 39287946 39289267 CpG:_107
## 81  chr21 40032243 40033665 CpG:_140
## 82  chr21 40177002 40178667 CpG:_152
## 83  chr21 40194644 40194849  CpG:_21
## 84  chr21 40554764 40556117 CpG:_124
## 85  chr21 40684370 40686324 CpG:_217
## 86  chr21 40720185 40721625 CpG:_125
## 87  chr21 40752087 40752825  CpG:_64
## 88  chr21 40817302 40817973  CpG:_80
## 89  chr21 41516237 41516472  CpG:_18
## 90  chr21 42218489 42219222  CpG:_66
## 91  chr21 42539367 42540872 CpG:_149
## 92  chr21 42617817 42618020  CpG:_16
## 93  chr21 42688746 42689085  CpG:_29
## 94  chr21 42792293 42792704  CpG:_29
## 95  chr21 42798146 42798884  CpG:_67
## 96  chr21 42878751 42880674 CpG:_152
## 97  chr21 43160916 43161948  CpG:_81
## 98  chr21 43176662 43176894  CpG:_17
## 99  chr21 43186280 43187436 CpG:_120
## 100 chr21 43221474 43221842  CpG:_24
## 101 chr21 43230458 43230716  CpG:_22
## 102 chr21 43236025 43236510  CpG:_37
## 103 chr21 43240351 43240586  CpG:_16
## 104 chr21 43242237 43242468  CpG:_18
## 105 chr21 43255476 43255722  CpG:_28
## 106 chr21 43274625 43274913  CpG:_25
## 107 chr21 43327734 43328024  CpG:_25
## 108 chr21 43373135 43374062  CpG:_98
## 109 chr21 43429279 43431489 CpG:_249
## 110 chr21 43547871 43548089  CpG:_16
## 111 chr21 43638948 43640132 CpG:_113
## 112 chr21 43916278 43916900  CpG:_58
## 113 chr21 44073201 44074650 CpG:_114
## 114 chr21 44106168 44106546  CpG:_36
## 115 chr21 44299111 44299808  CpG:_78
## 116 chr21 44312913 44313860  CpG:_95
## 117 chr21 44394084 44395850 CpG:_168
## 118 chr21 44473839 44474083  CpG:_20
## 119 chr21 44480559 44480772  CpG:_19
## 120 chr21 44486140 44486465  CpG:_28
## 121 chr21 44494624 44496989 CpG:_232
## 122 chr21 44515262 44515560  CpG:_23
## 123 chr21 44527163 44528066  CpG:_95
## 124 chr21 44590612 44590826  CpG:_19
## 125 chr21 44592182 44592384  CpG:_18
## 126 chr21 44592719 44593643  CpG:_69
## 127 chr21 44836730 44837207  CpG:_40
## 128 chr21 44840119 44840450  CpG:_27
## 129 chr21 45077671 45079821 CpG:_167
## 130 chr21 45113961 45114264  CpG:_21
## 131 chr21 45114398 45114604  CpG:_17
## 132 chr21 45138293 45139831 CpG:_156
## 133 chr21 45177520 45177727  CpG:_14
## 134 chr21 45195887 45196755  CpG:_94
## 135 chr21 45209221 45209880  CpG:_83
## 136 chr21 45223462 45223724  CpG:_19
## 137 chr21 45379516 45379816  CpG:_24
## 138 chr21 45402484 45402842  CpG:_28
## 139 chr21 45431646 45432978 CpG:_151
## 140 chr21 45526956 45527813 CpG:_108
## 141 chr21 45553021 45554037 CpG:_100
## 142 chr21 45660557 45663172 CpG:_233
## 143 chr21 45678290 45678544  CpG:_24
## 144 chr21 45705428 45706044  CpG:_66
## 145 chr21 45713509 45713813  CpG:_30
## 146 chr21 45719515 45720564 CpG:_119
## 147 chr21 45744313 45744847  CpG:_37
## 148 chr21 45758975 45759525  CpG:_60
## 149 chr21 45789090 45789373  CpG:_21
## 150 chr21 45811345 45811550  CpG:_20
## 151 chr21 45844992 45845693  CpG:_49
## 152 chr21 45875391 45877326 CpG:_131
## 153 chr21 45926531 45926754  CpG:_19
## 154 chr21 46221388 46222456 CpG:_113
## 155 chr21 46236726 46238958 CpG:_217
## 156 chr21 46292709 46294744 CpG:_204
## 157 chr21 46306508 46306795  CpG:_22
## 158 chr21 46321397 46321644  CpG:_18
## 159 chr21 46359647 46360423  CpG:_90
## 160 chr21 46387051 46387302  CpG:_18
## 161 chr21 46410338 46410661  CpG:_31
## 162 chr21 46494173 46495472 CpG:_142
## 163 chr21 46685115 46685379  CpG:_24
## 164 chr21 46686907 46687121  CpG:_22
## 165 chr21 46696972 46697214  CpG:_24
## 166 chr21 46707629 46708751  CpG:_96
## 167 chr21 46714984 46715206  CpG:_16
## 168 chr21 46824531 46826234 CpG:_197
## 169 chr21 46894816 46896312 CpG:_108
## 170 chr21 46897841 46898158  CpG:_24
## 171 chr21 46912381 46912628  CpG:_21
## 172 chr21 46929225 46930211  CpG:_80
## 173 chr21 46932081 46932387  CpG:_28
## 174 chr21 46950586 46952286 CpG:_132
## 175 chr21 46957315 46957916  CpG:_46
## 176 chr21 46961668 46963078 CpG:_144
## 177 chr21 47404108 47404312  CpG:_26
## 178 chr21 47421135 47421398  CpG:_23
## 179 chr21 47421844 47423883 CpG:_160
## 180 chr21 47517651 47518999 CpG:_150
## 181 chr21 47531899 47532821  CpG:_76
## 182 chr21 47545187 47546212  CpG:_88
## 183 chr21 47548938 47549427  CpG:_40
## 184 chr21 47551837 47552629  CpG:_83
## 185 chr21 47565285 47566345  CpG:_78
## 186 chr21 47580492 47582065 CpG:_148
## 187 chr21 47602431 47602740  CpG:_28
## 188 chr21 47648191 47649622 CpG:_141
## 189 chr21 47705627 47706428  CpG:_96
## 190 chr21 47717235 47718059  CpG:_84
## 191 chr21 47742779 47743269  CpG:_37
## 192 chr21 47743685 47745351 CpG:_175
## 193 chr21 47808671 47809121  CpG:_34
## 194 chr21 47811130 47811351  CpG:_18
## 195 chr21 47831533 47831931  CpG:_35
## 196 chr21 47832879 47833257  CpG:_30
## 197 chr21 47845595 47845864  CpG:_20
## 198 chr21 47851448 47851683  CpG:_18
## 199 chr21 47855892 47856143  CpG:_20
## 200 chr21 47878070 47879576 CpG:_167
## 201 chr21 47918497 47918728  CpG:_19
## 202 chr21 48018542 48018791  CpG:_21
## 203 chr21 48055199 48056060  CpG:_88
## 204 chr21 48068517 48068808  CpG:_24
## 205 chr21 48081241 48081849  CpG:_55

#Plotting in R

#Make a scatter plot using vectors named x1 and y1. Each vector should have 10 numbers (either random, consecutive, or some other organization, provided they aren’t too far apart in value). Add a title, x-axis label, and y-axis label. (5 pt)
x1 <- c(1:10)
y1 <- c(4:13)
plot(x1, y1, main = "Scatterplot of Vectors", xlab = "x values", ylab = "y values")

#Modify your scatter plot from Question 14 to use the pch argument to change the point style and the col argument to change the colors. Hint: Look up the documentation for the pch argument using ?plot in RStudio. (5 pt)

x1 <- c(1:10)
y1 <- c(4:13)
plot(x1, y1, main = "Scatterplot of Vectors", xlab = "x values", ylab = "y values", pch = 2, col = "red")

#Make a histogram of the previously defined vector x1 using hist() and customize it with col, xlab, ylab, and main. (5 pt) 
x <- rnorm(x1)
hist(x, main = "Histogram of x", xlab = "x", ylab = "Frequency", col = "blue")

#Functions and Control Structures

#Write a function called classify_gc to classify GC percentages as low (< 60), medium (60–75), or high (> 75). (10 pt) 
gc_values <- c(55, 87, 72, 82, 63, 48, 59)

classify_gc <- function(gc) {
  for (value in gc_values) {
   if (value < 60) {
      cat("GC percentage is low\n")
   } else if (value >= 60 & value <75) {
     cat("GC percentage is medium\n")
   } else {
     cat("GC percentage is high\n")
   }
  }
}
classify_gc(gc_values)
## GC percentage is low
## GC percentage is high
## GC percentage is medium
## GC percentage is high
## GC percentage is medium
## GC percentage is low
## GC percentage is low
#Use lapply to apply your GC classification function to a vector of GC percentages. You will need to create the GC vector using several possible percentages (I.E. 55, 65, 85) and name the vector gc_values. (5 pt)

class_gc <- function(gc) {
  if (gc < 60) {
    return("GC percentage is low\n")
  } else if (gc >= 60 & gc <75) {
    return("GC percentage is medium \n")
  } else {
    return("GC percentage is high \n")
  }
}

gc_values <- c(55, 87, 72, 82, 63, 48, 59)

classified_gc <- lapply(gc_values, class_gc)
names(classified_gc) <- gc_values
classified_gc
## $`55`
## [1] "GC percentage is low\n"
## 
## $`87`
## [1] "GC percentage is high \n"
## 
## $`72`
## [1] "GC percentage is medium \n"
## 
## $`82`
## [1] "GC percentage is high \n"
## 
## $`63`
## [1] "GC percentage is medium \n"
## 
## $`48`
## [1] "GC percentage is low\n"
## 
## $`59`
## [1] "GC percentage is low\n"