getwd()
## [1] "C:/Users/Jerome/Documents/0000_Work_Files/0000_Coursera/Statistics_with_R_Specialization/Course_2_Inferential_Stats"
library(statsr)
## Warning: package 'statsr' was built under R version 4.0.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(shiny)
## Warning: package 'shiny' was built under R version 4.0.2
library(ggplot2)
data(ames)
#ames <- read.csv()
ggplot(data = ames, aes(x = area)) +
geom_histogram(binwidth = 250)

ames <- ames
 write.csv (ames, file = "ames.csv", row.names = FALSE)
ames <- read.csv("ames.csv", header = TRUE)

This gives the stats to answer Q. 1.

ames %>%
summarise(mu = mean(area), pop_med = median(area),
sigma = sd(area), pop_iqr = IQR(area),
pop_min = min(area), pop_max = max(area),
pop_q1 = quantile(area, 0.25), # first quartile, 25th percentile
pop_q3 = quantile(area, 0.75)) # third quartile, 75th percentile
##        mu pop_med    sigma pop_iqr pop_min pop_max pop_q1  pop_q3
## 1 1499.69    1442 505.5089  616.75     334    5642   1126 1742.75

Draw a sample of 50 observations and run the same stats.

samp1 <- ames %>%
sample_n(size = 50)
samp1 <- samp1 
 write.csv (samp1, file = "samp1.csv", row.names = FALSE)
 samp1 <- read.csv("samp1.csv", header = TRUE)

Run the stats for the sample

samp1 %>%
summarise(samp_mu = mean(area), samp_med = median(area),
samp_sigma = sd(area), samp_pop_iqr = IQR(area),
samp_min = min(area), samp_max = max(area),
samp_q1 = quantile(area, 0.25), # first quartile, 25th percentile
samp_q3 = quantile(area, 0.75)) # third quartile, 75th percentile
##   samp_mu samp_med samp_sigma samp_pop_iqr samp_min samp_max samp_q1 samp_q3
## 1 1618.86   1612.5   462.6722       528.75      816     2872    1328 1856.75
ggplot(data = samp1, aes(x = area)) +
geom_histogram(binwidth = 250)

Try samples of 100 and 1000

samp100 <- ames %>%
sample_n(size = 100)
samp1000 <- ames %>%
sample_n(size = 1000)
samp100 %>%
summarise(mean100 = mean(area))
##   mean100
## 1 1474.38
samp1000 %>%
summarise(mean1000 = mean(area))
##   mean1000
## 1 1489.676

Another sample of 50

ames %>%
sample_n(size = 50) %>%
summarise(mean_2nd50 = mean(area))
##   mean_2nd50
## 1    1573.66

15,000 samples of 50

sample_means50 <- ames %>%
rep_sample_n(size = 50, reps = 15000, replace = TRUE) %>%
summarise(x_bar = mean(area))
## `summarise()` ungrouping output (override with `.groups` argument)
ggplot(data = sample_means50, aes(x = x_bar)) +
geom_histogram(binwidth = 20)

Now do the same, but 25 samples of 10 each

sample_means_small <- ames %>%
rep_sample_n(size = 10, reps = 25, replace = TRUE) %>%
summarise(x_bar = mean(area))
## `summarise()` ungrouping output (override with `.groups` argument)
ggplot(data = sample_means_small, aes(x = x_bar)) +
geom_histogram(binwidth = 10)

Question 5

## PhantomJS not found. You can install it with webshot::install_phantomjs(). If it is installed, please make sure the phantomjs executable can be found via the PATH variable.
Shiny applications not supported in static R Markdown documents