biostatslab1

#1a

age <- c(18,19,23,19,24,20,18,21,22,23,18)

#1b

mean(age)

## [1] 20.45455

median(age)

## [1] 20

min(age)

## [1] 18

max(age)

## [1] 24

range(age)

## [1] 18 24

quantile(age)

##   0%  25%  50%  75% 100% 
## 18.0 18.5 20.0 22.5 24.0

sd(age)

## [1] 2.252272

var(age)

## [1] 5.072727

summary(age)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   18.00   18.50   20.00   20.45   22.50   24.00

length(age)

## [1] 11

sum(age)

## [1] 225

IQR(age)

## [1] 4

table (age)

## age
## 18 19 20 21 22 23 24 
##  3  2  1  1  1  2  1

#1c

?min
"In essence, this function will return the smallest value within the data set"

## [1] "In essence, this function will return the smallest value within the data set"

#1d

?sd
"sd function divides by n-1, this means that it calculates the sample deviation"

## [1] "sd function divides by n-1, this means that it calculates the sample deviation"

#2a

score <- c(75,77,94,75,79, 80,66,82,86,80,78)

#2b

id <- 1:11

#2c

df <- data.frame(id = id, age = age, score = score)

#2d

df[,1]

##  [1]  1  2  3  4  5  6  7  8  9 10 11

"provides all the values in the first row of the df which is the id row"

## [1] "provides all the values in the first row of the df which is the id row"

df[,2]

##  [1] 18 19 23 19 24 20 18 21 22 23 18

"provides all the values the second row of the df which is the age row"

## [1] "provides all the values the second row of the df which is the age row"

df[1,1]

## [1] 1

"provides the first value in the first row"

## [1] "provides the first value in the first row"

df[2,2]

## [1] 19

"provide the second value in the second row"

## [1] "provide the second value in the second row"

df[1,]

##   id age score
## 1  1  18    75

"provides all the values in the first column"

## [1] "provides all the values in the first column"

df[2,]

##   id age score
## 2  2  19    77

"provides all the values in the second column"

## [1] "provides all the values in the second column"

df$id

##  [1]  1  2  3  4  5  6  7  8  9 10 11

"calls all the values in the id function"

## [1] "calls all the values in the id function"

df["id"]

##    id
## 1   1
## 2   2
## 3   3
## 4   4
## 5   5
## 6   6
## 7   7
## 8   8
## 9   9
## 10 10
## 11 11

"calls the id column as a dataframe"

## [1] "calls the id column as a dataframe"

df$age

##  [1] 18 19 23 19 24 20 18 21 22 23 18

"calls all the values ib in the age function"

## [1] "calls all the values ib in the age function"

df$score

##  [1] 75 77 94 75 79 80 66 82 86 80 78

"calls all the values in the score function"

## [1] "calls all the values in the score function"

df["score"]

##    score
## 1     75
## 2     77
## 3     94
## 4     75
## 5     79
## 6     80
## 7     66
## 8     82
## 9     86
## 10    80
## 11    78

"calls the score column as a dataframe"

## [1] "calls the score column as a dataframe"

#3a

setwd("~/Desktop/lab1")

#3b

getwd()

## [1] "/Users/daraaina/Desktop/lab1"

file.exists("testData.txt")

## [1] FALSE

list.files()

## [1] "biostatslab1.Rmd"                       
## [2] "Lab_1.pdf"                              
## [3] "Lab1-1.R"                               
## [4] "Screenshot 2026-04-16 at 8.20.02 AM.png"
## [5] "Screenshot 2026-04-16 at 8.36.31 AM.png"
## [6] "testData-1.txt"

df <- read.table("testData-1.txt", sep=",")
colnames(df) <- c("id","age","score")
df <- as.data.frame(df)

#4a

df$age2 <- age ^2
df

##    id age score age2
## 1   1  18    75  324
## 2   2  19    77  361
## 3   3  23    94  529
## 4   4  19    75  361
## 5   5  24    79  576
## 6   6  20    80  400
## 7   7  18    66  324
## 8   8  21    82  441
## 9   9  22    86  484
## 10 10  23    80  529
## 11 11  18    78  324

age +1

##  [1] 19 20 24 20 25 21 19 22 23 24 19

"adds 1 to the values in the age column of the data frame"

## [1] "adds 1 to the values in the age column of the data frame"

age + age

##  [1] 36 38 46 38 48 40 36 42 44 46 36

"doubles the age by adding them to themselves ie 18=18=36 "

## [1] "doubles the age by adding them to themselves ie 18=18=36 "

age [1] <- 19
df$age[1] <- 19

age [1] <- 18
length(age)

## [1] 11

"count"

## [1] "count"

mean(age)

## [1] 20.45455

sd(age)

## [1] 2.252272

median(age)

## [1] 20

quantile(age, probs = 0.25)

##  25% 
## 18.5

quantile(age, probs = 0.75)

##  75% 
## 22.5

min(age)

## [1] 18

max(age)

## [1] 24

range(age)

## [1] 18 24

#7 Biostatistics is integral to public health as it allows researchers to apply statistcal methods to analyze health data that will eventually contribute to evidence-based policies, decisions and therapies. Biostatistics provides tools to collect data, analyze and interpret it, and eventually apply it to real-world applications. It can be used for disease surveillance, designs for clinical trials, risk assessment, and evidence-based medicine. One key use for biostatistics, is its use for disease monitoring and prevention in epidemiological research. Another key use is in clinical trials, where biostatistics can be used to ensure that the trials are properly designed and have applicable and reliable conclusions. Another key use in in community health research, where biostatiscs can help ensure equity and proper resource allocation for specific populations. In general, Biostatistics aids to minimize bias and uncertainty through proven methods in an effort to undestand and improve public health.