1.1) Calculate the count, mean and sd of the population weightLb and commmunicate the values in the document.
SumCountDF<- count(PopSet)
SumCount<- as.numeric(SumCountDF[1,])
SumMean<- mean(PopSet$weightLb)
SumSD<- sd(PopSet$weightLb)
There are 236 observations in this population, and the mean weight in pounds is 101 with a standard deviation of 18.93.
1.2) Plot a density histogram of the population weightLb, and draw a heavy black dashed vertical line at the location of the mean and use white bars with black edges for the histogram.
ggplot(data=PopSet, aes(x=PopSet$weightLb))+
geom_histogram(aes(y=stat(density)), color= "black", fill="white", bins=20)+
geom_vline(aes(xintercept=SumMean), size= 2, color = "black", linetype ="dashed")+
geom_label(data=PopSet, aes(x=SumMean, y=.05), label= paste0("Mean:", round(SumMean,)))
The difference between histogram and density histogram is that histogram shows the count of observations but the density histogram shows units so that the area adds up to 1 (i.e., percentage of the metric).
2.1) Now create a random sample of 15 from the population, and calculate the mean, sd, and .90 and .99 CI of the mean predicted from the sample weights. NOTE: If you use set.seed() you will always get the same sample
2.2) Add to the plot from 1.2 (if you can’t do 1.2, then create this as a new plot) , by graphing properties of the sample density histogram in transparent red, sample mean as a red dotted vertical line, .90 and .99 CI bounds as green and blue solid vertical lines respectively.
#creating sample set
set.seed(123)
SamplePop <- PopSet[sample(1:nrow(PopSet),15), ]
#calculating CI Metrics
SampSD <- sd(SamplePop$weightLb) #standard deviation
SampN <- length(SamplePop$weightLb)#length of vector = samplePop = 15
SampSE <-SampSD/sqrt(SampN)#standard error = sample standard dev. / square root of sample N
alpha90 <- 0.10 #CI = 1-.10 = .90
alpha99 <- 0.01 #CI = 1-.01 = .99
DegFree <-SampN-1 #when a sample, always sampleN - 1
SampMean <-mean(SamplePop$weightLb)
#90% CI
TScore90 <-qt(p=alpha90/2,df=DegFree, lower.tail = FALSE)
MError90 <-TScore90*SampSE
LBound90 <-SampMean - MError90
UBound90 <-SampMean + MError90
#99% CI
TScore99 <-qt(p=alpha99/2,df=DegFree, lower.tail = FALSE)
MError99 <-TScore99*SampSE
LBound99 <-SampMean - MError99
UBound99 <-SampMean + MError99
#Secondary Way to Double Check
Mod90 <- lm(SamplePop$weightLb ~ 1, SamplePop)
mod90C <- confint(Mod90, level = .90)
mod90C
## 5 % 95 %
## (Intercept) 98.32873 114.2046
Mod99<- lm(SamplePop$weightLb ~ 1, SamplePop)
mod99C <- confint(Mod90, level = .99)
mod99C
## 0.5 % 99.5 %
## (Intercept) 92.85052 119.6828
For this new sample, the mean is ‘r round(SampMean,2)’ and the standard deviation is ‘r round(SampSD,2)’. The 90% CI is between ‘r round(LBound90,2)’ and ‘r round(UBound90,2)’. The 99% CI is between ‘r round(LBound99,2)’ and ‘r round(UBound99)’.
ggplot(data=PopSet, aes(x=PopSet$weightLb))+
geom_histogram(aes(y=stat(density)), color= "black", fill="white", bins=20)+
geom_vline(aes(xintercept=SumMean), size= 1, color = "black", linetype ="dashed")+
geom_vline(aes(xintercept=SampMean), size= 1, color = "red", linetype ="dashed")+
geom_vline(aes(xintercept=LBound90), size= 1, color = "green", linetype ="solid")+
geom_vline(aes(xintercept=UBound90), size= 1, color = "green", linetype ="solid")+
geom_vline(aes(xintercept=LBound99), size= 1, color = "blue", linetype ="dotted")+
geom_vline(aes(xintercept=UBound99), size= 1, color = "blue", linetype ="dotted")+
theme(legend.position = "none") +
geom_label(data=PopSet, color = "black", aes(x=SumMean-15, y=.05),label= paste0("Population Mean:", round(SumMean,)))+
geom_label(data=PopSet, color ="red", aes(x=SampMean+15, y=.045), label= paste0("Sample Mean:", round(SampMean)))+
geom_label(data=PopSet, color ="green", aes(x=LBound90-10, y=.03), label= paste0("90 CI: ", round(LBound90),"-",round(UBound90)))+
geom_label(data=PopSet, color ="blue", aes(x=UBound99+10, y=.03), label= paste0("99 CI: ",
round(LBound99),"-", round(UBound99)))+
ggtitle ("Plot of Population and Sample Weights")+
xlab("Weight LBs")+
ylab("Density")+
geom_density(data=SamplePop, aes(x=SamplePop$weightLb,),color = "#FF000025", fill = "#FF000025")