#1a
age <- c(18,19,23,19,24,20,18,21,22,23,18)
#1b
mean(age)
## [1] 20.45455
median(age)
## [1] 20
min(age)
## [1] 18
max(age)
## [1] 24
range(age)
## [1] 18 24
quantile(age)
## 0% 25% 50% 75% 100%
## 18.0 18.5 20.0 22.5 24.0
sd(age)
## [1] 2.252272
var(age)
## [1] 5.072727
summary(age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.00 18.50 20.00 20.45 22.50 24.00
length(age)
## [1] 11
sum(age)
## [1] 225
IQR(age)
## [1] 4
table (age)
## age
## 18 19 20 21 22 23 24
## 3 2 1 1 1 2 1
#1c
?min
"In essence, this function will return the smallest value within the data set"
## [1] "In essence, this function will return the smallest value within the data set"
#1d
?sd
"sd function divides by n-1, this means that it calculates the sample deviation"
## [1] "sd function divides by n-1, this means that it calculates the sample deviation"
#2a
score <- c(75,77,94,75,79, 80,66,82,86,80,78)
#2b
id <- 1:11
#2c
df <- data.frame(id = id, age = age, score = score)
#2d
df[,1]
## [1] 1 2 3 4 5 6 7 8 9 10 11
"provides all the values in the first row of the df which is the id row"
## [1] "provides all the values in the first row of the df which is the id row"
df[,2]
## [1] 18 19 23 19 24 20 18 21 22 23 18
"provides all the values the second row of the df which is the age row"
## [1] "provides all the values the second row of the df which is the age row"
df[1,1]
## [1] 1
"provides the first value in the first row"
## [1] "provides the first value in the first row"
df[2,2]
## [1] 19
"provide the second value in the second row"
## [1] "provide the second value in the second row"
df[1,]
## id age score
## 1 1 18 75
"provides all the values in the first column"
## [1] "provides all the values in the first column"
df[2,]
## id age score
## 2 2 19 77
"provides all the values in the second column"
## [1] "provides all the values in the second column"
df$id
## [1] 1 2 3 4 5 6 7 8 9 10 11
"calls all the values in the id function"
## [1] "calls all the values in the id function"
df["id"]
## id
## 1 1
## 2 2
## 3 3
## 4 4
## 5 5
## 6 6
## 7 7
## 8 8
## 9 9
## 10 10
## 11 11
"calls the id column as a dataframe"
## [1] "calls the id column as a dataframe"
df$age
## [1] 18 19 23 19 24 20 18 21 22 23 18
"calls all the values ib in the age function"
## [1] "calls all the values ib in the age function"
df$score
## [1] 75 77 94 75 79 80 66 82 86 80 78
"calls all the values in the score function"
## [1] "calls all the values in the score function"
df["score"]
## score
## 1 75
## 2 77
## 3 94
## 4 75
## 5 79
## 6 80
## 7 66
## 8 82
## 9 86
## 10 80
## 11 78
"calls the score column as a dataframe"
## [1] "calls the score column as a dataframe"
#3a
setwd("~/Desktop/lab1")
#3b
getwd()
## [1] "/Users/daraaina/Desktop/lab1"
file.exists("testData.txt")
## [1] FALSE
list.files()
## [1] "biostatslab1.Rmd"
## [2] "Lab_1.pdf"
## [3] "Lab1-1.R"
## [4] "Screenshot 2026-04-16 at 8.20.02 AM.png"
## [5] "Screenshot 2026-04-16 at 8.36.31 AM.png"
## [6] "testData-1.txt"
df <- read.table("testData-1.txt", sep=",")
colnames(df) <- c("id","age","score")
df <- as.data.frame(df)
#4a
df$age2 <- age ^2
df
## id age score age2
## 1 1 18 75 324
## 2 2 19 77 361
## 3 3 23 94 529
## 4 4 19 75 361
## 5 5 24 79 576
## 6 6 20 80 400
## 7 7 18 66 324
## 8 8 21 82 441
## 9 9 22 86 484
## 10 10 23 80 529
## 11 11 18 78 324
age +1
## [1] 19 20 24 20 25 21 19 22 23 24 19
"adds 1 to the values in the age column of the data frame"
## [1] "adds 1 to the values in the age column of the data frame"
age + age
## [1] 36 38 46 38 48 40 36 42 44 46 36
"doubles the age by adding them to themselves ie 18=18=36 "
## [1] "doubles the age by adding them to themselves ie 18=18=36 "
#5
age [1] <- 19
df$age[1] <- 19
#6
age [1] <- 18
length(age)
## [1] 11
"count"
## [1] "count"
mean(age)
## [1] 20.45455
sd(age)
## [1] 2.252272
median(age)
## [1] 20
quantile(age, probs = 0.25)
## 25%
## 18.5
quantile(age, probs = 0.75)
## 75%
## 22.5
min(age)
## [1] 18
max(age)
## [1] 24
range(age)
## [1] 18 24
#7 Biostatistics is integral to public health as it allows researchers to apply statistcal methods to analyze health data that will eventually contribute to evidence-based policies, decisions and therapies. Biostatistics provides tools to collect data, analyze and interpret it, and eventually apply it to real-world applications. It can be used for disease surveillance, designs for clinical trials, risk assessment, and evidence-based medicine. One key use for biostatistics, is its use for disease monitoring and prevention in epidemiological research. Another key use is in clinical trials, where biostatistics can be used to ensure that the trials are properly designed and have applicable and reliable conclusions. Another key use in in community health research, where biostatiscs can help ensure equity and proper resource allocation for specific populations. In general, Biostatistics aids to minimize bias and uncertainty through proven methods in an effort to undestand and improve public health.