Data Acquisition

library(gcookbook)
data(heightweight)
mydata <- heightweight

1.1 Calculate summary stats

data(mydata)
## Warning in data(mydata): data set 'mydata' not found
count <- length(mydata$weightLb)
mean_weight <- mean(mydata$weightLb)
sd_weight <- sd(mydata$weightLb)

count 
## [1] 236
mean_weight
## [1] 101.0106
sd_weight
## [1] 18.93395

Count: 236, the mean weight is 101.01, and the standard deviation is 18.93395

1.2 Histogram of the Population

ggplot(data=mydata, aes(x=mydata$weightLb))+
  geom_histogram(aes(y=stat(density)), color= "black", fill="white", bins=30)+
  geom_vline(aes(xintercept=mean_weight), size= 1, color = "black", linetype ="dashed")+
  geom_label(data=mydata, aes(x=mean_weight, y=.05), label= paste0("Mean:", round(mean_weight,)))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `stat(density)` was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

### Histogram and mean of sample #### 2.1 Create a random sample of 15 from the population, and calculate the mean, sd, and .90 and .99 CI of the mean predicted from the sample weights

data(mydata)
## Warning in data(mydata): data set 'mydata' not found
#Select random sample, set seed keeps sample the same
set.seed(123)
#Sample size of 15
sample_size <- 15
sample <- sample(mydata$weightLb, size = sample_size)
#Calculate Mean
sample_mean <- mean(sample)
#Calculate SD
sample_sd <- sd(sample)
#Calculate 90% confidence interval
ci_90 <- t.test(sample)$conf.int[1:2]
#Calculate 99% confidence interval
ci_99 <- t.test(sample, conf.level = 0.99)$conf.int[1:2]

sample_mean 
## [1] 106.2667
sample_sd 
## [1] 17.45491
ci_90
## [1]  96.60046 115.93287
ci_99
## [1]  92.85052 119.68281

2.2 Add to the plot from 1.2 (if you can’t do 1.2, then create this as a new plot) , by graphing properties of the sample

plot <- ggplot(data = mydata, aes(x = weightLb)) +
  geom_histogram(aes(y = stat(density)), color = "black", fill = "white", bins = 30) +
  geom_vline(aes(xintercept = mean_weight), size = 1, color = "black", linetype = "dashed") +
  geom_vline(xintercept = sample_mean, color = "red", linetype = "dotted") +
  geom_vline(xintercept = ci_90[1], color = "green", linetype = "solid") +
  geom_vline(xintercept = ci_90[2], color = "green", linetype = "solid") +
  geom_vline(xintercept = ci_99[1], color = "blue", linetype = "solid") +
  geom_vline(xintercept = ci_99[2], color = "blue", linetype = "solid") +
  theme(legend.position = "none") +
 geom_label(data = data.frame(x = mean(mydata$weightLb), y = 0.05), 
             aes(x = x, y = y), 
             label = paste0("Population Mean: ", round(mean_weight)),
             size = 3) +
  geom_label(data = data.frame(x = sample_mean, y = 0.045), 
             aes(x = x, y = y), 
             label = paste0("Sample Mean: ", round(sample_mean)),
             size = 3) +
  geom_label(data = data.frame(x = c(ci_90[1], ci_90[2]), y = c(0.03, 0.03)), 
             aes(x = x, y = y), 
             label = c(paste0("90% CI: ", round(ci_90[1], 2)), paste0("90% CI: ", round(ci_90[2], 2))),
             size = 3) +
  geom_label(data = data.frame(x = c(ci_99[1], ci_99[2]), y = c(0.03, 0.03)), 
             aes(x = x, y = y), 
             label = c(paste0("99% CI: ", round(ci_99[1], 2)), paste0("99% CI: ", round(ci_99[2], 2))),
             size = 3) +
  ggtitle("Density Histogram of Weight(lb)") +
  xlab("Weight LBs") + 
  ylab("Density") +
  geom_density(color = "#FF000025", fill = "#FF000025")

plot  # Display the plot