2.1.2 (a) Height and weight of 37 children. Height and weight are continuous quantitative variables, child is the observational unit, n=37 is the sample size.
2.1.3 (a) Number of leaves on each of 25 plants. Leaf count is a discrete quantitative variable, plant is the observational unit, n=25 is the sample size.
# create a frame of the data
dendrites <- tibble(branchCnt = c(23, 30, 54, 28, 31, 29, 34, 35, 30, 27, 21, 43, 51, 35, 51, 49, 35, 24,
26, 29, 21, 29, 37, 27, 28, 33, 33, 23, 37, 27, 40, 48, 41, 20, 30, 57))
avg = mean(dendrites$branchCnt) # get the mean
med = median(dendrites$branchCnt) # and median
G1 <- ggplot(dendrites, aes(x = branchCnt)) # set the plot data
G1 + geom_dotplot(binwidth = 1, dotsize = 1, fill = "darkgreen") + # dot plot
scale_y_continuous("Frequency", minor_breaks = NULL, labels = NULL) + # label y-axis and suppress break labels
labs(title = "Dendritic branch segment counts", x = "Count" ) + # title and x-axis label
theme_bw() # a clean plot theme
f1 <- dendrites %>% count(branchCnt) # count the frequencies
kable(f1, align = "c", # format a pretty frequency table
caption = "Dendrite branch count frequency", # give it a title
col.names = c("Branch Count", "Frequency")) %>% # label the columns
kableExtra::kable_paper() # use a clean theme
| Branch Count | Frequency |
|---|---|
| 20 | 1 |
| 21 | 2 |
| 23 | 2 |
| 24 | 1 |
| 26 | 1 |
| 27 | 3 |
| 28 | 2 |
| 29 | 3 |
| 30 | 3 |
| 31 | 1 |
| 33 | 2 |
| 34 | 1 |
| 35 | 3 |
| 37 | 2 |
| 40 | 1 |
| 41 | 1 |
| 43 | 1 |
| 48 | 1 |
| 49 | 1 |
| 51 | 2 |
| 54 | 1 |
| 57 | 1 |
hist(dendrites$branchCnt, # plot histogram of dendrite data
main = "Histogram of dendritic segment counts", # give it a title
xlab = "Segment Count", breaks = (20:60)) # label and set x-axis range
abline(v = avg, col = "red", lty = "longdash", lwd = 2) # add the mean as dashed line
text(33.5, 3.07, "mean", col = "red", cex = .7, pos = 4) # label the mean
abline(v = med, col = "blue", lty = "longdash", lwd = 2) # add median dashed line
text(30, 3.07, "median", col = "blue", cex = .7, pos = 4) # label the median
a1 <- c(14, 15, 13, 21, 15, 14, 26, 16, 20, 13) # assign the aliquots data
hist(a1, main = "Histogram of bacteria resistant aliquots", # histogram, title
xlab = "Aliquot Count", breaks = (12:26)) # x-axis label and range
avg <- mean(a1) #get the mean and median
med <- median(a1)
abline(v = avg, col = "red", lty = "longdash", lwd = 2) # display and label lines for mean and median
text(16.55, 2, "mean", col = "red", pos = 4, cex = .9)
abline(v = med, col = "blue", lty = "longdash", lwd = 2)
text(14.9, 2, "median", col = "blue", pos = 4, cex = .9)
Mean frequency of bacteria resistant E.coli aliquots: 16.7
Median frequency of bacteria resistant E.coli aliquots: 15
(a) Five Number Summary
milk <- c(56.5, 89.8, 110.1, 65.6, 63.7, 82.6, 75.1, 91.5, 102.9, 44.4, 108.1) # assign milk yeild data
summary(milk) # show summary statistics
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 44.40 64.65 82.60 80.94 97.20 110.10
(b) IQR
IQR (milk) # show IQR
## [1] 32.55
(c) Boxplot
boxplot (milk, horizontal = TRUE, # create a simple boxplot
main = "Sheep milk yields in liters", # label the plot
xlab = "yield (liters)", # label the x axis
col = "ivory") # give it a "creamy" color
Get the rowan tree data
library(readxl) # load library to read excel files and read the rowan tree data
rowanData <- read_excel("C:/Users/Owner/OneDrive/Documents/Math 217 Biostats/rowanData.xlsx")
head(rowanData) # look at the rowan tree data
## # A tibble: 6 x 3
## tree altitude respiration
## <dbl> <dbl> <dbl>
## 1 1 90 0.11
## 2 2 230 0.2
## 3 3 240 0.13
## 4 4 260 0.15
## 5 5 330 0.18
## 6 6 400 0.16
Respiration rates increase as altitude increases.
rowanData %>% # create rowan data plot
ggplot(aes(altitude, respiration)) + # x = altitude, y = respiration rate
geom_point(size = 2) + # scatter plot with larger point size
geom_smooth(method = "lm", se = FALSE) + # regression line, don't show standard error
geom_smooth(color = "darkgreen", se = FALSE) + # lowess line, don't show standard error
labs(title = "Rowan tree respiration rates", # label the plot and axis
x = "Altitude (m)",
y = "Respiration rate (µl / hr · mg)") +
theme_bw() # use a simple theme
Get the student exercise data
student <- read_excel("C:/Users/Owner/OneDrive/Documents/Math 217 Biostats/studentExerciseData.xlsx")
men <- student %>%
filter(gender == "M") # get the data for men
women <- student %>%
filter(gender == "F") # get the data for women
Five number summary for men:
summary(men$hrsExercise) # show summary stats
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 2.00 4.25 4.75 5.25 17.00
Five Number summary for women:
summary(women$hrsExercise) # show summary stats
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 2 3 5 6 14
plot_student <- ggplot(student, aes(hrsExercise)) + # plot the student hours exercised data on the x axis
geom_boxplot(aes(group = gender), # group by gender
color = c("magenta", "blue"), # pink for women, blue for men
outlier.size = 2.5) + # make outliers a bit bigger
labs(title = "Student hours exercise per week", # plot title
x = "Hours exercise") + # label the x axis
scale_y_continuous("Gender", breaks = NULL) + # label the y axis, but don't show the scale
theme_light() # use a simple plot theme
plot_student # show the plot
The boxplots show right skewed data due to high outliers. The range of hours exercised per week for men is larger than that for women, however the IQR for women is larger than the IQR for men. The median for men is greater than the median for women.
2.6.5 Birch seedling ATP
birchATP <- c(1.45, 1.19, 1.05, 1.07) # birch ATP data
paste("Birch ATP mean:", mean(birchATP)) # show the mean
## [1] "Birch ATP mean: 1.19"
paste("Birch ATP SD:", round(sd(birchATP), 3)) # show the std. deviation, round to 3 places
## [1] "Birch ATP SD: 0.184"
2.6.6 Patient blood pressure change
patientBP <- read_excel("C:/Users/Owner/OneDrive/Documents/Math 217 Biostats/patientBPData.xlsx")
paste("BP mean change:", mean(patientBP$Change)) # show the mean
## [1] "BP mean change: -12.4"
paste("BP change SD:", round(sd(patientBP$Change), 3)) # show the std. deviation, round to 3 places
## [1] "BP change SD: 17.589"
2.6.8 Lizard running distance in 2 minutes
Distance run summary:
lizardRun <- c(18.4, 22.2, 24.5, 26.4, 27.5, 28.7, 30.6, 32.9, 32.9, 34.0, 34.8, 37.5, 42.1, 45.5, 45.5)
summary(lizardRun)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.40 26.95 32.90 32.23 36.15 45.50
paste("Distance run IQR =", IQR(lizardRun), "meters") # show IQR
## [1] "Distance run IQR = 9.2 meters"
rangeLR <- max(lizardRun) - min(lizardRun) # assign lizard data range
paste("Distance run range = ", rangeLR, "meters") # show range
## [1] "Distance run range = 27.1 meters"
2.6.15 Estimate mean & SD for histogram
The mean is approximately 45. The standard deviation is approximately 15.
2.S.7 Seizures in epilepsy patients
Half of the patients had zero seizures, and one patient with 9 seizures skews the data.
seizure <- c(5, 0, 9, 6, 0, 0, 5, 0, 6, 1, 5, 0, 0, 0, 0, 7, 0, 0, 4, 7) # assign seizure data
avg <- mean(seizure) # get mean and median
med <- median(seizure)
paste("Median seizures:", med) # show mean and median values
## [1] "Median seizures: 0.5"
paste("Mean seizures:", avg)
## [1] "Mean seizures: 2.75"
hist(seizure, main = "Histogram of epilectic seizure counts", # create plot with title
xlab = "seizure counts per patient", # label the x-axis
breaks = 10, xlim = c(0, 10)) # set x-axis scale and limits
abline(v = avg, col = "red", lty = "longdash", lwd = 2) # display the mean and median dashed lines
text(2.7, 11.3, "mean", col = "red", pos = 4, cex = .8) # with labels
abline(v = med, col = "blue", lty = "longdash", lwd = 2)
text(.4, 11.3, "median", col = "blue", pos = 4, cex = .8)
2.S.13 Pony intestine nerve cells
Nerve Cell data summary:
#assign the nerve cell data
nerveCells <- c(35, 19, 33, 34, 17, 26, 16, 40, 28, 30, 23, 12, 27, 33, 22, 31, 28, 28, 35, 23, 23, 19, 29)
summary(nerveCells) # display summary statistics
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 12.00 22.50 28.00 26.57 32.00 40.00
paste("IQR nerve cell counts:", IQR(nerveCells)) # show the IQR
## [1] "IQR nerve cell counts: 9.5"
boxplot (nerveCells, horizontal = TRUE, # simple boxplot
main = "Nerve cell counts", # label the plot
xlab = "Count", # label the x axis
col = "tan") # give it a "horsey" color
abline(v = mean(nerveCells), col = "red", lty = "longdash", lwd = 2)
text(25, .6, "mean", col = "red", pos = 1)
2.S.14 Is the nerve cell data reasonably symmetric? Yes, although the data is slightly left skewed, it’s reasonably symmetric, there are no outliers and the mean is close to the median.
2.S.22 Match Boxplot to Histogram The boxplot and histogram (a) - both plots show right skewed data with high outliers.