getwd()
## [1] "C:/Users/Jerome/Documents/0000_Work_Files/0000_Coursera/Statistics_with_R_Specialization/Course_2_Inferential_Stats"
library(statsr)
## Warning: package 'statsr' was built under R version 4.0.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(shiny)
## Warning: package 'shiny' was built under R version 4.0.2
library(ggplot2)
data(ames)
#ames <- read.csv()
ggplot(data = ames, aes(x = area)) +
geom_histogram(binwidth = 250)

ames <- ames
write.csv (ames, file = "ames.csv", row.names = FALSE)
ames <- read.csv("ames.csv", header = TRUE)
This gives the stats to answer Q. 1.
ames %>%
summarise(mu = mean(area), pop_med = median(area),
sigma = sd(area), pop_iqr = IQR(area),
pop_min = min(area), pop_max = max(area),
pop_q1 = quantile(area, 0.25), # first quartile, 25th percentile
pop_q3 = quantile(area, 0.75)) # third quartile, 75th percentile
## mu pop_med sigma pop_iqr pop_min pop_max pop_q1 pop_q3
## 1 1499.69 1442 505.5089 616.75 334 5642 1126 1742.75
Draw a sample of 50 observations and run the same stats.
samp1 <- ames %>%
sample_n(size = 50)
samp1 <- samp1
write.csv (samp1, file = "samp1.csv", row.names = FALSE)
samp1 <- read.csv("samp1.csv", header = TRUE)
Run the stats for the sample
samp1 %>%
summarise(samp_mu = mean(area), samp_med = median(area),
samp_sigma = sd(area), samp_pop_iqr = IQR(area),
samp_min = min(area), samp_max = max(area),
samp_q1 = quantile(area, 0.25), # first quartile, 25th percentile
samp_q3 = quantile(area, 0.75)) # third quartile, 75th percentile
## samp_mu samp_med samp_sigma samp_pop_iqr samp_min samp_max samp_q1 samp_q3
## 1 1618.86 1612.5 462.6722 528.75 816 2872 1328 1856.75
ggplot(data = samp1, aes(x = area)) +
geom_histogram(binwidth = 250)

Try samples of 100 and 1000
samp100 <- ames %>%
sample_n(size = 100)
samp1000 <- ames %>%
sample_n(size = 1000)
samp100 %>%
summarise(mean100 = mean(area))
## mean100
## 1 1474.38
samp1000 %>%
summarise(mean1000 = mean(area))
## mean1000
## 1 1489.676
Another sample of 50
ames %>%
sample_n(size = 50) %>%
summarise(mean_2nd50 = mean(area))
## mean_2nd50
## 1 1573.66
15,000 samples of 50
sample_means50 <- ames %>%
rep_sample_n(size = 50, reps = 15000, replace = TRUE) %>%
summarise(x_bar = mean(area))
## `summarise()` ungrouping output (override with `.groups` argument)
ggplot(data = sample_means50, aes(x = x_bar)) +
geom_histogram(binwidth = 20)

Now do the same, but 25 samples of 10 each
sample_means_small <- ames %>%
rep_sample_n(size = 10, reps = 25, replace = TRUE) %>%
summarise(x_bar = mean(area))
## `summarise()` ungrouping output (override with `.groups` argument)
ggplot(data = sample_means_small, aes(x = x_bar)) +
geom_histogram(binwidth = 10)

Question 5
## PhantomJS not found. You can install it with webshot::install_phantomjs(). If it is installed, please make sure the phantomjs executable can be found via the PATH variable.
Shiny applications not supported in static R Markdown documents