Duke_Inferential_Stats_Week1

getwd()

## [1] "C:/Users/Jerome/Documents/0000_Work_Files/0000_Coursera/Statistics_with_R_Specialization/Course_2_Inferential_Stats"

library(statsr)

## Warning: package 'statsr' was built under R version 4.0.3

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.0.2

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(shiny)

## Warning: package 'shiny' was built under R version 4.0.2

library(ggplot2)
data(ames)

#ames <- read.csv()

ggplot(data = ames, aes(x = area)) +
geom_histogram(binwidth = 250)

ames <- ames
 write.csv (ames, file = "ames.csv", row.names = FALSE)

ames <- read.csv("ames.csv", header = TRUE)

This gives the stats to answer Q. 1.

ames %>%
summarise(mu = mean(area), pop_med = median(area),
sigma = sd(area), pop_iqr = IQR(area),
pop_min = min(area), pop_max = max(area),
pop_q1 = quantile(area, 0.25), # first quartile, 25th percentile
pop_q3 = quantile(area, 0.75)) # third quartile, 75th percentile

##        mu pop_med    sigma pop_iqr pop_min pop_max pop_q1  pop_q3
## 1 1499.69    1442 505.5089  616.75     334    5642   1126 1742.75

Draw a sample of 50 observations and run the same stats.

samp1 <- ames %>%
sample_n(size = 50)

samp1 <- samp1 
 write.csv (samp1, file = "samp1.csv", row.names = FALSE)
 samp1 <- read.csv("samp1.csv", header = TRUE)

Run the stats for the sample

samp1 %>%
summarise(samp_mu = mean(area), samp_med = median(area),
samp_sigma = sd(area), samp_pop_iqr = IQR(area),
samp_min = min(area), samp_max = max(area),
samp_q1 = quantile(area, 0.25), # first quartile, 25th percentile
samp_q3 = quantile(area, 0.75)) # third quartile, 75th percentile

##   samp_mu samp_med samp_sigma samp_pop_iqr samp_min samp_max samp_q1 samp_q3
## 1 1618.86   1612.5   462.6722       528.75      816     2872    1328 1856.75

ggplot(data = samp1, aes(x = area)) +
geom_histogram(binwidth = 250)

Try samples of 100 and 1000

samp100 <- ames %>%
sample_n(size = 100)
samp1000 <- ames %>%
sample_n(size = 1000)

samp100 %>%
summarise(mean100 = mean(area))

##   mean100
## 1 1474.38

samp1000 %>%
summarise(mean1000 = mean(area))

##   mean1000
## 1 1489.676

Another sample of 50

ames %>%
sample_n(size = 50) %>%
summarise(mean_2nd50 = mean(area))

##   mean_2nd50
## 1    1573.66

15,000 samples of 50

sample_means50 <- ames %>%
rep_sample_n(size = 50, reps = 15000, replace = TRUE) %>%
summarise(x_bar = mean(area))

## `summarise()` ungrouping output (override with `.groups` argument)

ggplot(data = sample_means50, aes(x = x_bar)) +
geom_histogram(binwidth = 20)

Now do the same, but 25 samples of 10 each

sample_means_small <- ames %>%
rep_sample_n(size = 10, reps = 25, replace = TRUE) %>%
summarise(x_bar = mean(area))

## `summarise()` ungrouping output (override with `.groups` argument)

ggplot(data = sample_means_small, aes(x = x_bar)) +
geom_histogram(binwidth = 10)

Question 5

## PhantomJS not found. You can install it with webshot::install_phantomjs(). If it is installed, please make sure the phantomjs executable can be found via the PATH variable.

Shiny applications not supported in static R Markdown documents

Duke_Inferential_Stats_Week1_RLAB

Jerome

9/4/2021

This gives the stats to answer Q. 1.

Draw a sample of 50 observations and run the same stats.

Run the stats for the sample

Try samples of 100 and 1000

Another sample of 50

15,000 samples of 50

Now do the same, but 25 samples of 10 each

Question 5