Normal Distribution

?rnorm()
## starting httpd help server ... done
rnorm(100, mean = 0, sd = 1)
##   [1]  0.213197260 -0.946108377 -0.032133801  0.722782905  0.381404975
##   [6] -2.519191628 -1.111409720  1.455713255  0.162745476 -1.942185455
##  [11] -1.513174438  1.019906490 -0.005654242  0.367237927  0.470619466
##  [16]  0.332851175 -0.745356491 -0.975314081 -0.339830233  1.155817394
##  [21] -1.462386294  2.242478350  0.431883842 -0.618532987 -0.885694814
##  [26] -1.050886036 -0.376589668  0.336525967 -1.401716799  0.364662446
##  [31] -0.848841787  1.221889401 -0.240583815  0.674600334 -1.387383525
##  [36] -1.108229251 -0.714976291  0.153054000  1.376335983 -1.293615906
##  [41] -0.904619654  0.877271410  1.574654625  0.275771329  0.825699056
##  [46] -0.139870842 -0.078631767 -1.280820702 -0.331295559  0.026099570
##  [51]  0.335708854 -0.342467371  0.845364338 -0.691286591 -0.433963124
##  [56]  0.622734553  0.592378120  1.861478201 -0.453190640 -0.364706636
##  [61] -1.626814147  1.004376352 -0.884022191 -1.302239673 -1.365518546
##  [66] -1.008864985 -0.099234075 -0.164381362 -1.426353328 -0.552789459
##  [71] -0.630338274 -0.226421658 -0.585671684  0.336165788 -1.391497128
##  [76] -1.718842011  0.319832386  0.133656941 -1.416532084 -0.902670330
##  [81] -1.067722022  0.401877369 -0.858543950  0.339550709 -1.325981385
##  [86]  0.062448630 -1.517878192 -1.466480140  0.135854554  0.363545782
##  [91] -0.695062553 -0.533318082 -0.391628192  0.346701035 -1.476327768
##  [96]  1.454486242  0.626987321  0.915074854 -1.802122796  0.002510583
hist(rnorm(100, mean = 0, sd = 1))

hist(rnorm(10000, mean = 0, sd = 1))

hist(rnorm(10000, mean = 170, sd = 10))

dnorm(0, mean = 0, sd = 1)
## [1] 0.3989423
dnorm(170, mean = 170, sd = 10)
## [1] 0.03989423
curve(dnorm, -3, 3)

curve(pnorm, -3, 3)

pnorm(1)
## [1] 0.8413447
pnorm(180, mean = 170, sd = 10)
## [1] 0.8413447
pnorm(160, mean = 170, sd = 10)
## [1] 0.1586553
pnorm(180, mean = 170, sd = 10) - pnorm(160, mean = 170, sd = 10)
## [1] 0.6826895
pnorm(190, mean = 170, sd = 10) - pnorm(150, mean = 170, sd = 10)
## [1] 0.9544997
pnorm(200, mean = 170, sd = 10) - pnorm(140, mean = 170, sd = 10)
## [1] 0.9973002
barplot(table(sample(1:6, size = 100000, replace=TRUE)))

hist(runif(1000,0,5))

hist(rpois(5000, lambda = 3))

curl::curl_download('https://raw.githubusercontent.com/ywchiu/fda_course/main/cdc.Rdata', 'cdc.Rdata')

load("cdc.Rdata")
hist(cdc$weight)

shapiro.test(rnorm(5000) )
## 
##  Shapiro-Wilk normality test
## 
## data:  rnorm(5000)
## W = 0.99971, p-value = 0.7409
shapiro.test(sample(cdc$weight,5000) )
## 
##  Shapiro-Wilk normality test
## 
## data:  sample(cdc$weight, 5000)
## W = 0.96329, p-value < 2.2e-16

Sample Distribution

hist(cdc$height)

sample_means <- rep(NA, 3)
sample_means
## [1] NA NA NA
sample_means10  <- rep(NA, 5000)
sample_means50  <- rep(NA, 5000)
sample_means100 <- rep(NA, 5000)

for (i in 1:5000){
  sample10 <- sample(cdc$height, 10)
  sample_means10[i]<-mean(sample10)
  
  sample50 <- sample(cdc$height, 50)
  sample_means50[i]<-mean(sample50)
  
  sample100 <- sample(cdc$height, 100)
  sample_means100[i]<-mean(sample100)
}

par(mfrow =c(3,1))
xlimits <- range(sample_means10)
hist(sample_means10, breaks = 20, xlim = xlimits)
hist(sample_means50, breaks = 20, xlim = xlimits)
hist(sample_means100, breaks = 20, xlim = xlimits)

mean(cdc$height)
## [1] 67.1829
population <- cdc$height

set.seed(123)
sample(1:42, 6)
## [1] 31 15 14  3 37 40
set.seed(123)
samp <- sample(population, 50)
mean(samp)
## [1] 67.4
hist(samp)

pnorm(1) - pnorm(-1)
## [1] 0.6826895
pnorm(2) - pnorm(-2)
## [1] 0.9544997
pnorm(1.96) - pnorm(-1.96)
## [1] 0.9500042
mean(samp)
## [1] 67.4
sde <- sd(samp) / sqrt(50)
sde * 1.96
## [1] 1.12
upper <- mean(samp) + 1.96 * sde
lower <- mean(samp) - 1.96 * sde


print(mean(cdc$height))
## [1] 67.1829
print(lower)
## [1] 66.28
print(upper)
## [1] 68.52
print(upper - lower)
## [1] 2.24
# sample 100
set.seed(123)
samp <- sample(population, 100)
sde <- sd(samp) / sqrt(100)
upper <- mean(samp) + 1.96 * sde
lower <- mean(samp) - 1.96 * sde
print(mean(cdc$height))
## [1] 67.1829
print(lower)
## [1] 66.66719
print(upper)
## [1] 68.27281
print(upper - lower)
## [1] 1.605615
# sample 200
set.seed(123)
samp <- sample(population, 200)
sde <- sd(samp) / sqrt(200)
upper <- mean(samp) + 1.96 * sde
lower <- mean(samp) - 1.96 * sde
print(mean(cdc$height))
## [1] 67.1829
print(lower)
## [1] 66.83475
print(upper)
## [1] 67.98525
print(upper - lower)
## [1] 1.150506
qnorm(0.995)
## [1] 2.575829
pnorm(2.58) - pnorm(-2.58)
## [1] 0.99012
# 95% 信心水準
set.seed(123)
samp <- sample(population, 100)
sde <- sd(samp) / sqrt(100)
upper <- mean(samp) + 1.96 * sde
lower <- mean(samp) - 1.96 * sde
print(mean(cdc$height))
## [1] 67.1829
print(lower)
## [1] 66.66719
print(upper)
## [1] 68.27281
print(upper - lower)
## [1] 1.605615
# 99% 信心水準
set.seed(123)
samp <- sample(population, 100)
sde <- sd(samp) / sqrt(100)
upper <- mean(samp) + 2.58 * sde
lower <- mean(samp) - 2.58 * sde
print(mean(cdc$height))
## [1] 67.1829
print(lower)
## [1] 66.41324
print(upper)
## [1] 68.52676
print(upper - lower)
## [1] 2.113514