# Install and load the rlang package
#install.packages("rlang")
library(rlang)
#need dplyer and tidyr to do the next step
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library (tidyr)
library(ggplot2)
#Sydney Burch
#2-4-2024
#homework #1
#Problem 1
#part A ----
#pulling the data table up
data<- read.csv("C:/Users/sydne/Documents/Sydney Not Synced/BSBTProgram2/Spring 2025/bioinformatics/R homework 1/Prob1.csv", header=TRUE)
class(data)
## [1] "data.frame"
#part B ----
# Pulmonary vascular resistance=(Mean Pulmonary Pressure-Pulmonary Capillary Wedge Pressure)/Cardiac Output
#(mPaP-PCWP)/CO
#for patient 6
mpap=data[6,"mPAP"]
pcwp=data[6,"PCWP"]
co=data[6,"CO"]
PVR6=(mpap-pcwp)/co
print(PVR6)
## [1] 2.281369
#or
mpap2=data["mPAP"]
pcwp2=data["PCWP"]
co2=data["CO"]
PVR2=(mpap2-pcwp2)/co2
pvR3=(unlist(PVR2))
print(pvR3)
## mPAP1 mPAP2 mPAP3 mPAP4 mPAP5 mPAP6 mPAP7 mPAP8
## 2.564103 17.763158 16.173121 5.800000 6.986028 2.281369 1.257862 5.639098
## mPAP9 mPAP10 mPAP11 mPAP12 mPAP13 mPAP14 mPAP15 mPAP16
## 3.846154 9.487666 26.739927 10.879630 6.925208 8.644068 32.365145 7.631579
## mPAP17 mPAP18 mPAP19 mPAP20 mPAP21 mPAP22 mPAP23 mPAP24
## 1.781737 5.263158 3.197674 4.364570 1.520468 1.307190 2.579666 10.909091
## mPAP25 mPAP26 mPAP27 mPAP28 mPAP29 mPAP30 mPAP31 mPAP32
## 2.237762 7.419355 9.090909 6.060606 13.648294 8.450704 10.037879 27.173913
## mPAP33 mPAP34 mPAP35 mPAP36 mPAP37 mPAP38 mPAP39 mPAP40
## 8.940397 5.729167 3.654485 11.627907 12.713936 5.820106 8.641975 2.139800
## mPAP41 mPAP42 mPAP43 mPAP44 mPAP45 mPAP46 mPAP47 mPAP48
## 5.734767 38.970588 14.960630 4.291845 15.151515 16.042781 8.806818 16.190476
## mPAP49 mPAP50 mPAP51 mPAP52 mPAP53 mPAP54 mPAP55 mPAP56
## 20.769231 7.843137 3.325942 3.614458 20.765027 2.808989 5.352113 2.608696
## mPAP57 mPAP58 mPAP59 mPAP60 mPAP61 mPAP62 mPAP63 mPAP64
## 3.810976 3.302374 3.453947 4.061896 5.769231 7.741935 23.437500 19.909502
## mPAP65 mPAP66 mPAP67 mPAP68 mPAP69 mPAP70 mPAP71 mPAP72
## 14.056225 7.063197 5.974843 13.186813 6.849315 2.342606 5.259087 6.582633
## mPAP73 mPAP74 mPAP75 mPAP76 mPAP77 mPAP78 mPAP79 mPAP80
## 22.388060 7.730673 4.430380 3.676471 8.148148 7.800000 3.790614 4.245974
## mPAP81 mPAP82 mPAP83 mPAP84 mPAP85 mPAP86 mPAP87 mPAP88
## 12.903226 6.926407 7.032967 8.237986 23.214286 1.782820 3.481894 2.420242
## mPAP89 mPAP90 mPAP91 mPAP92 mPAP93 mPAP94 mPAP95 mPAP96
## 9.398496 6.635071 13.071895 9.900990 10.802469 11.642412 3.339192 4.054054
## mPAP97 mPAP98 mPAP99 mPAP100 mPAP101 mPAP102 mPAP103 mPAP104
## 5.737705 6.278027 5.321508 10.312500 12.987013 9.734513 3.754941 6.286550
## mPAP105 mPAP106 mPAP107 mPAP108 mPAP109 mPAP110 mPAP111 mPAP112
## 5.027933 22.834646 4.166667 12.974684 8.881579 3.649635 34.355828 15.546218
## mPAP113 mPAP114 mPAP115
## 7.072692 13.028169 8.000000
#part C ----
# loop for every patient
PVR=numeric(nrow(data))
for (i in 1:nrow(data)){
mpap=data[i,"mPAP"]
pcwp=data[i,"PCWP"]
co=data[i,"CO"]
PVR[i]=(mpap-pcwp)/co
}
print(PVR)
## [1] 2.564103 17.763158 16.173121 5.800000 6.986028 2.281369 1.257862
## [8] 5.639098 3.846154 9.487666 26.739927 10.879630 6.925208 8.644068
## [15] 32.365145 7.631579 1.781737 5.263158 3.197674 4.364570 1.520468
## [22] 1.307190 2.579666 10.909091 2.237762 7.419355 9.090909 6.060606
## [29] 13.648294 8.450704 10.037879 27.173913 8.940397 5.729167 3.654485
## [36] 11.627907 12.713936 5.820106 8.641975 2.139800 5.734767 38.970588
## [43] 14.960630 4.291845 15.151515 16.042781 8.806818 16.190476 20.769231
## [50] 7.843137 3.325942 3.614458 20.765027 2.808989 5.352113 2.608696
## [57] 3.810976 3.302374 3.453947 4.061896 5.769231 7.741935 23.437500
## [64] 19.909502 14.056225 7.063197 5.974843 13.186813 6.849315 2.342606
## [71] 5.259087 6.582633 22.388060 7.730673 4.430380 3.676471 8.148148
## [78] 7.800000 3.790614 4.245974 12.903226 6.926407 7.032967 8.237986
## [85] 23.214286 1.782820 3.481894 2.420242 9.398496 6.635071 13.071895
## [92] 9.900990 10.802469 11.642412 3.339192 4.054054 5.737705 6.278027
## [99] 5.321508 10.312500 12.987013 9.734513 3.754941 6.286550 5.027933
## [106] 22.834646 4.166667 12.974684 8.881579 3.649635 34.355828 15.546218
## [113] 7.072692 13.028169 8.000000
#Part D
# box plot-----
boxplot(data$sPAP, main = "Boxplot of SPAP", ylab = "Systolic Pulmonary Arterial Pressure", col = "lightblue")

# looping the box plot to see all variables (except MRN, and degree of flattening)
columns1=names(data[2:14])
for (col in columns1) {
boxplot(data[[col]],
main = paste("Boxplot of", col),
ylab = col,
col = "lightblue")
}













#Part E ----
#Create scatter plots
par(mfrow=c(3,1))
#Plot A
plot(PVR,data$NT.proBNP, xlab="Pulmonary Vascular Resistance",ylab="NT.proBNP",main="PVR vs. NT.ProBNP")
#Plot B
PVR_age_ratio= (PVR/(data$Age))
plot(PVR_age_ratio,data$NT.proBNP, xlab="PVR:AGE ratio",ylab="NT.proBNP",main="PVR:AGE ratio vs. NT.proBNP ")
#plot C
plot(data$PP,data$C.pulm, xlab="Pulse Pressure",ylab="Pulmonary Arterial Compliance",main="PP vs. Pulmonary Arterial Compliance")

#Part F----
# List the data I want
septalflat1=data["septalflat___1"]
septalflat2=data["septalflat___2"]
septalflat3=data["septalflat___3"]
print(septalflat1)
## septalflat___1
## 1 1
## 2 0
## 3 0
## 4 1
## 5 1
## 6 1
## 7 0
## 8 1
## 9 1
## 10 0
## 11 0
## 12 0
## 13 0
## 14 0
## 15 0
## 16 0
## 17 0
## 18 1
## 19 1
## 20 0
## 21 0
## 22 0
## 23 0
## 24 0
## 25 0
## 26 0
## 27 1
## 28 1
## 29 0
## 30 0
## 31 0
## 32 0
## 33 0
## 34 1
## 35 0
## 36 0
## 37 0
## 38 1
## 39 0
## 40 0
## 41 0
## 42 0
## 43 0
## 44 0
## 45 0
## 46 0
## 47 1
## 48 0
## 49 0
## 50 0
## 51 0
## 52 1
## 53 0
## 54 0
## 55 0
## 56 0
## 57 0
## 58 0
## 59 1
## 60 1
## 61 0
## 62 0
## 63 0
## 64 0
## 65 1
## 66 1
## 67 0
## 68 1
## 69 0
## 70 0
## 71 1
## 72 1
## 73 0
## 74 1
## 75 1
## 76 1
## 77 0
## 78 1
## 79 0
## 80 0
## 81 0
## 82 0
## 83 0
## 84 0
## 85 0
## 86 0
## 87 1
## 88 0
## 89 0
## 90 0
## 91 1
## 92 1
## 93 0
## 94 0
## 95 1
## 96 0
## 97 0
## 98 0
## 99 1
## 100 0
## 101 0
## 102 0
## 103 1
## 104 1
## 105 1
## 106 1
## 107 0
## 108 0
## 109 0
## 110 1
## 111 0
## 112 0
## 113 0
## 114 0
## 115 1
# make them accurate
sf1=(septalflat1*1)
sf2=(septalflat2*2)
sf3=(septalflat3*3)
#move from list to factors (categorical, ordinal variables)
sf1u=unlist(sf1)
sf2u=unlist(sf2)
sf3u=unlist(sf3)
sft=factor(c(sf1u,sf2u,sf3u))
print(sft)
## septalflat___11 septalflat___12 septalflat___13 septalflat___14
## 1 0 0 1
## septalflat___15 septalflat___16 septalflat___17 septalflat___18
## 1 1 0 1
## septalflat___19 septalflat___110 septalflat___111 septalflat___112
## 1 0 0 0
## septalflat___113 septalflat___114 septalflat___115 septalflat___116
## 0 0 0 0
## septalflat___117 septalflat___118 septalflat___119 septalflat___120
## 0 1 1 0
## septalflat___121 septalflat___122 septalflat___123 septalflat___124
## 0 0 0 0
## septalflat___125 septalflat___126 septalflat___127 septalflat___128
## 0 0 1 1
## septalflat___129 septalflat___130 septalflat___131 septalflat___132
## 0 0 0 0
## septalflat___133 septalflat___134 septalflat___135 septalflat___136
## 0 1 0 0
## septalflat___137 septalflat___138 septalflat___139 septalflat___140
## 0 1 0 0
## septalflat___141 septalflat___142 septalflat___143 septalflat___144
## 0 0 0 0
## septalflat___145 septalflat___146 septalflat___147 septalflat___148
## 0 0 1 0
## septalflat___149 septalflat___150 septalflat___151 septalflat___152
## 0 0 0 1
## septalflat___153 septalflat___154 septalflat___155 septalflat___156
## 0 0 0 0
## septalflat___157 septalflat___158 septalflat___159 septalflat___160
## 0 0 1 1
## septalflat___161 septalflat___162 septalflat___163 septalflat___164
## 0 0 0 0
## septalflat___165 septalflat___166 septalflat___167 septalflat___168
## 1 1 0 1
## septalflat___169 septalflat___170 septalflat___171 septalflat___172
## 0 0 1 1
## septalflat___173 septalflat___174 septalflat___175 septalflat___176
## 0 1 1 1
## septalflat___177 septalflat___178 septalflat___179 septalflat___180
## 0 1 0 0
## septalflat___181 septalflat___182 septalflat___183 septalflat___184
## 0 0 0 0
## septalflat___185 septalflat___186 septalflat___187 septalflat___188
## 0 0 1 0
## septalflat___189 septalflat___190 septalflat___191 septalflat___192
## 0 0 1 1
## septalflat___193 septalflat___194 septalflat___195 septalflat___196
## 0 0 1 0
## septalflat___197 septalflat___198 septalflat___199 septalflat___1100
## 0 0 1 0
## septalflat___1101 septalflat___1102 septalflat___1103 septalflat___1104
## 0 0 1 1
## septalflat___1105 septalflat___1106 septalflat___1107 septalflat___1108
## 1 1 0 0
## septalflat___1109 septalflat___1110 septalflat___1111 septalflat___1112
## 0 1 0 0
## septalflat___1113 septalflat___1114 septalflat___1115 septalflat___21
## 0 0 1 0
## septalflat___22 septalflat___23 septalflat___24 septalflat___25
## 2 2 0 0
## septalflat___26 septalflat___27 septalflat___28 septalflat___29
## 0 0 0 0
## septalflat___210 septalflat___211 septalflat___212 septalflat___213
## 0 0 0 0
## septalflat___214 septalflat___215 septalflat___216 septalflat___217
## 0 2 0 0
## septalflat___218 septalflat___219 septalflat___220 septalflat___221
## 0 0 0 0
## septalflat___222 septalflat___223 septalflat___224 septalflat___225
## 0 0 0 0
## septalflat___226 septalflat___227 septalflat___228 septalflat___229
## 0 0 0 0
## septalflat___230 septalflat___231 septalflat___232 septalflat___233
## 0 0 0 0
## septalflat___234 septalflat___235 septalflat___236 septalflat___237
## 0 0 0 2
## septalflat___238 septalflat___239 septalflat___240 septalflat___241
## 0 0 0 0
## septalflat___242 septalflat___243 septalflat___244 septalflat___245
## 0 0 0 0
## septalflat___246 septalflat___247 septalflat___248 septalflat___249
## 0 0 0 0
## septalflat___250 septalflat___251 septalflat___252 septalflat___253
## 0 0 0 0
## septalflat___254 septalflat___255 septalflat___256 septalflat___257
## 0 0 0 0
## septalflat___258 septalflat___259 septalflat___260 septalflat___261
## 0 0 0 0
## septalflat___262 septalflat___263 septalflat___264 septalflat___265
## 0 0 0 0
## septalflat___266 septalflat___267 septalflat___268 septalflat___269
## 0 0 0 0
## septalflat___270 septalflat___271 septalflat___272 septalflat___273
## 0 0 0 2
## septalflat___274 septalflat___275 septalflat___276 septalflat___277
## 0 0 0 2
## septalflat___278 septalflat___279 septalflat___280 septalflat___281
## 0 2 2 0
## septalflat___282 septalflat___283 septalflat___284 septalflat___285
## 0 0 0 2
## septalflat___286 septalflat___287 septalflat___288 septalflat___289
## 0 0 0 0
## septalflat___290 septalflat___291 septalflat___292 septalflat___293
## 0 0 0 2
## septalflat___294 septalflat___295 septalflat___296 septalflat___297
## 2 0 2 0
## septalflat___298 septalflat___299 septalflat___2100 septalflat___2101
## 0 0 0 0
## septalflat___2102 septalflat___2103 septalflat___2104 septalflat___2105
## 0 0 0 0
## septalflat___2106 septalflat___2107 septalflat___2108 septalflat___2109
## 0 0 2 2
## septalflat___2110 septalflat___2111 septalflat___2112 septalflat___2113
## 0 2 2 0
## septalflat___2114 septalflat___2115 septalflat___31 septalflat___32
## 0 0 0 0
## septalflat___33 septalflat___34 septalflat___35 septalflat___36
## 0 0 0 0
## septalflat___37 septalflat___38 septalflat___39 septalflat___310
## 0 0 0 0
## septalflat___311 septalflat___312 septalflat___313 septalflat___314
## 3 0 0 0
## septalflat___315 septalflat___316 septalflat___317 septalflat___318
## 0 0 0 0
## septalflat___319 septalflat___320 septalflat___321 septalflat___322
## 0 0 0 0
## septalflat___323 septalflat___324 septalflat___325 septalflat___326
## 0 0 0 0
## septalflat___327 septalflat___328 septalflat___329 septalflat___330
## 0 0 3 3
## septalflat___331 septalflat___332 septalflat___333 septalflat___334
## 3 3 0 0
## septalflat___335 septalflat___336 septalflat___337 septalflat___338
## 0 0 0 0
## septalflat___339 septalflat___340 septalflat___341 septalflat___342
## 3 0 0 0
## septalflat___343 septalflat___344 septalflat___345 septalflat___346
## 0 0 0 0
## septalflat___347 septalflat___348 septalflat___349 septalflat___350
## 0 0 0 0
## septalflat___351 septalflat___352 septalflat___353 septalflat___354
## 0 0 0 0
## septalflat___355 septalflat___356 septalflat___357 septalflat___358
## 0 0 0 0
## septalflat___359 septalflat___360 septalflat___361 septalflat___362
## 0 0 0 0
## septalflat___363 septalflat___364 septalflat___365 septalflat___366
## 0 0 0 0
## septalflat___367 septalflat___368 septalflat___369 septalflat___370
## 0 0 0 0
## septalflat___371 septalflat___372 septalflat___373 septalflat___374
## 0 0 0 0
## septalflat___375 septalflat___376 septalflat___377 septalflat___378
## 0 0 0 0
## septalflat___379 septalflat___380 septalflat___381 septalflat___382
## 0 0 0 0
## septalflat___383 septalflat___384 septalflat___385 septalflat___386
## 0 0 0 0
## septalflat___387 septalflat___388 septalflat___389 septalflat___390
## 0 0 0 0
## septalflat___391 septalflat___392 septalflat___393 septalflat___394
## 0 0 0 0
## septalflat___395 septalflat___396 septalflat___397 septalflat___398
## 0 0 0 0
## septalflat___399 septalflat___3100 septalflat___3101 septalflat___3102
## 0 0 3 3
## septalflat___3103 septalflat___3104 septalflat___3105 septalflat___3106
## 0 0 0 0
## septalflat___3107 septalflat___3108 septalflat___3109 septalflat___3110
## 0 0 0 0
## septalflat___3111 septalflat___3112 septalflat___3113 septalflat___3114
## 0 0 0 3
## septalflat___3115
## 0
## Levels: 0 1 2 3
#make in to data frame
newdf=data.frame(PVR,sft)
#make into boxplot
par(mfrow=c(1,1))
boxplot(newdf$PVR~newdf$sft,xlab="sft",ylab="PVR", main=" septal flattening vs. Pulmonary Vascular Resistance")

#It looks like patients with higher severity septal flattening have lower PVR in general.
#Problem 2: ----
#pulling the data table up
#Problem 2: part 1
prob2d=read.csv("C:/Users/sydne/Documents/Sydney Not Synced/BSBTProgram2/Spring 2025/bioinformatics/R homework 1/Prob2-1.csv", header=TRUE)
View(prob2d)
#The data needs the patients on 1 row and the target name.
# Question 2 Part 2----
#make dataframe with target name
dp=prob2d[order(prob2d$Target.Name),]
#order the data frame by patient
dp1=dp[order(dp$Patient..No., decreasing= FALSE),]
# Re-organize the data
reorganized_data <- dp1 %>%
select(Patient..No., Biological.Group.Name, Target.Name, RQ) %>%
spread(key = Target.Name, value = RQ)
# Display the re-organized dataframe
print(reorganized_data)
## Patient..No. Biological.Group.Name ath-miR159a-000338 hsa-let-7a-000377
## 1 1 mild 0.116952937 1
## 2 2 mild 0.429282718 1
## 3 3 mild 11.424031840 1
## 4 4 mild 2.363623094 1
## 5 5 mild 1.005560580 1
## 6 6 mild 0.758909626 1
## 7 7 mild 1.464085696 1
## 8 8 mild 0.660211421 1
## 9 9 severe 0.058032303 1
## 10 10 severe 2.086377187 1
## 11 11 severe 0.171347851 1
## 12 12 severe 0.271683716 1
## 13 13 severe 0.003572129 1
## 14 14 severe 0.226722582 1
## 15 15 severe 0.071743901 1
## 16 16 severe 0.050590139 1
## hsa-let-7b-002619 hsa-let-7c-000379 hsa-let-7d-002283 hsa-let-7e-002406
## 1 0.05854237 1.3221400 1.456368054 1.42664363
## 2 6.54605171 1.3221400 1.456368054 1.42664363
## 3 0.08036020 1.3221400 1.456368054 1.42664363
## 4 2.72758660 1.3221400 0.071961796 1.42664363
## 5 4.79531305 0.1415982 1.456368054 0.20584424
## 6 0.05793685 1.3221400 1.456368054 1.42664363
## 7 6.54605171 1.3221400 1.456368054 0.57619338
## 8 6.54605171 1.3221400 1.456368054 1.42664363
## 9 2.24485926 1.3221400 0.062299958 0.09976136
## 10 12.12258110 1.3221400 1.456368054 1.42664363
## 11 6.54605171 1.3221400 1.456368054 1.42664363
## 12 3.30036366 1.3221400 1.456368054 1.42664363
## 13 6.54605171 1.3221400 0.001806452 0.03161220
## 14 1.17090579 1.3221400 1.456368054 0.01543795
## 15 6.54605171 1.3221400 1.456368054 1.42664363
## 16 6.54605171 1.3221400 1.456368054 404.11581440
## hsa-let-7f-000382 hsa-let-7g-002282 hsa-miR-21-000397
## 1 3.894810e-01 1.647671e-01 1.51492881
## 2 7.355222e+02 3.033240e+05 2.12763400
## 3 3.894810e-01 1.647671e-01 0.01309582
## 4 3.894810e-01 1.647671e-01 1.74987273
## 5 3.894810e-01 1.647671e-01 7.38833868
## 6 3.894810e-01 1.647671e-01 2.86244902
## 7 3.894810e-01 1.647671e-01 0.42256641
## 8 3.894810e-01 1.647671e-01 1.51492881
## 9 1.353839e+06 1.182221e+00 3.33861561
## 10 3.894810e-01 1.647671e-01 1.21693322
## 11 3.894810e-01 1.647671e-01 1.54247809
## 12 3.894810e-01 1.647671e-01 3.71728680
## 13 3.894810e-01 1.647671e-01 6.58074081
## 14 3.894810e-01 7.251886e-02 7.88576107
## 15 3.894810e-01 1.177197e-02 1.46840507
## 16 3.894810e-01 1.647671e-01 1.16251554
#Question 2:Part 3
# Filter out columns where more than 50% of the RQ values are the same
threshold <- 0.5 * nrow(reorganized_data)
columns_to_keep <- sapply(reorganized_data, function(col) sum(duplicated(col)) < threshold)
columns_to_keep[c("Biological.Group.Name", "Patient..No.")] <- TRUE
filtered_data <- reorganized_data[, columns_to_keep]
# Display the filtered dataframe
print(filtered_data)
## Patient..No. Biological.Group.Name ath-miR159a-000338 hsa-let-7b-002619
## 1 1 mild 0.116952937 0.05854237
## 2 2 mild 0.429282718 6.54605171
## 3 3 mild 11.424031840 0.08036020
## 4 4 mild 2.363623094 2.72758660
## 5 5 mild 1.005560580 4.79531305
## 6 6 mild 0.758909626 0.05793685
## 7 7 mild 1.464085696 6.54605171
## 8 8 mild 0.660211421 6.54605171
## 9 9 severe 0.058032303 2.24485926
## 10 10 severe 2.086377187 12.12258110
## 11 11 severe 0.171347851 6.54605171
## 12 12 severe 0.271683716 3.30036366
## 13 13 severe 0.003572129 6.54605171
## 14 14 severe 0.226722582 1.17090579
## 15 15 severe 0.071743901 6.54605171
## 16 16 severe 0.050590139 6.54605171
## hsa-miR-21-000397
## 1 1.51492881
## 2 2.12763400
## 3 0.01309582
## 4 1.74987273
## 5 7.38833868
## 6 2.86244902
## 7 0.42256641
## 8 1.51492881
## 9 3.33861561
## 10 1.21693322
## 11 1.54247809
## 12 3.71728680
## 13 6.58074081
## 14 7.88576107
## 15 1.46840507
## 16 1.16251554
#making the name easier to type.
fddf=filtered_data
#Problem 4
# Create boxplots for each column
# Reshape the data to long format
long_data <- filtered_data %>%
gather(key = "miRNA", value = "RQ", -Patient..No., -Biological.Group.Name)
# Create the boxplots
ggplot(long_data, aes(x = Biological.Group.Name, y = RQ, fill = Biological.Group.Name)) +
geom_boxplot() +
facet_wrap(~ miRNA, scales = "free_y") +
theme_minimal() +
labs(title = "Comparison of miRNA between Mild and Severe Patients",
x = "Biological Group Name",
y = "RQ Value") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

#It looks like a-miR21-00035 is the most up-regulated in the severe patients.
#severe patients are lacking 1-miR59a-0003