An Introduction

How to generate using R?

Simulating Bernoulli(p) Distribution

p = 0.2;
n = 50;
U = runif(n, min = 0, max = 1);
X = sum(U < p);
X
## [1] 11
p1=0.35
p2=0.15
p3=0.4
p4=0.1
P = c(p1,p1+p2, p1+p2+p3, p1+p2+p3+p4)

X = c(1, 2, 3, 4)
counter = 1
set.seed(2021)
r = runif(1, min = 0, max = 1);
while(r > P[counter])
counter = counter + 1
end
## function (x, ...) 
## UseMethod("end")
## <bytecode: 0x000002b277e4b1b8>
## <environment: namespace:stats>
X[counter]
## [1] 2

U = runif(1, min = 0, max = 1);
lambda = 1
X = -(1/lambda)*log(U)

What about the normal distribution?

dnorm(0)
## [1] 0.3989423
dnorm(1)
## [1] 0.2419707
dnorm(0, mean=4, sd=3)
## [1] 0.05467002
pnorm(0)
## [1] 0.5
pnorm(1)
## [1] 0.8413447
x = seq(-3,3, by=0.1); y = pnorm(x) ;plot(x,y, type="l")

pnorm(0, lower.tail=FALSE)
## [1] 0.5
pnorm(1, lower.tail=FALSE)
## [1] 0.1586553
x = seq(-3,3, by=0.1); y = pnorm(x, lower.tail=FALSE)
plot(x,y, type="l")

qnorm(0.68); qnorm(0.95);qnorm(0.997)
## [1] 0.4676988
## [1] 1.644854
## [1] 2.747781
x = seq(0,1, by=0.05); y = qnorm(x);plot(x,y, type="l")

x=rnorm(1000)
hist(x)

pnorm(1) - pnorm(-1) # within one standard deviation
## [1] 0.6826895
pnorm(2) - pnorm(-2) # within two standard deviation
## [1] 0.9544997
pnorm(3) - pnorm(-3) # within three standard deviation
## [1] 0.9973002

Expectation

Interpretation:

Sample versions of expectation and variance?

Kurtosis and Skewness

Are the sum of rolls normal?

DiceR = read.csv("Dice.csv", header=T)
names(DiceR)= "Sum"
DiceR$Sum = as.numeric(DiceR$Sum)
## Warning: NAs introduced by coercion
Dice = na.omit(DiceR$Sum)
hist(Dice)

summary(Dice)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    7.00   15.00   17.50   17.23   20.00   27.00
sd(Dice)
## [1] 3.670899
cs = (Dice-mean(Dice))/(sd(Dice))
mean(cs)
## [1] 2.261974e-16
sd(cs)
## [1] 1
onesdcs = cs[cs >-1& cs <1] # within one sd
twosdcs = cs[cs >-2& cs <2] # within two sd
threesdcs = cs[cs >-3& cs <3]# within three sd
length(onesdcs)/length(cs)
## [1] 0.6653846
length(twosdcs)/length(cs)
## [1] 0.9576923
length(threesdcs)/length(cs)
## [1] 1
library(moments)
Kurtosis = kurtosis(Dice)
Kurtosis
## [1] 3.002322
Skewness = skewness(Dice)
Skewness
## [1] -0.2548971
normaldata = replicate(1000, rnorm(97,0,1), simplify=FALSE)

library(moments)
mean(sapply(normaldata, kurtosis))
## [1] 2.947374
mean(sapply(normaldata, skewness))
## [1] 0.0006454242
normaldata = replicate(1000, rnorm(97,0,1),
                       simplify=FALSE)
library(moments)
hist(sapply(normaldata, kurtosis))

Population versions of skewness and kurtosis?

Quantiles

x = c(1.3, 2.2, 2.7, 3.1, 3.3, 3.7)
quantile(x)
##    0%   25%   50%   75%  100% 
## 1.300 2.325 2.900 3.250 3.700
quantile(x, seq(0, 1, by=.1))
##   0%  10%  20%  30%  40%  50%  60%  70%  80%  90% 100% 
## 1.30 1.75 2.20 2.45 2.70 2.90 3.10 3.20 3.30 3.50 3.70
rain.nyc = c(43.6, 37.8, 49.2, 40.3, 45.5, 44.2, 38.6, 40.6, 38.7, 46.0, 37.1, 34.7, 35.0, 43.0, 34.4, 49.7, 33.5, 38.3, 41.7, 51.0, 54.4, 43.7, 37.6, 34.1, 46.6, 39.3, 33.7, 40.1, 42.4, 46.2, 36.8, 39.4, 47.0, 50.3, 55.5, 39.5, 35.5, 39.4, 43.8, 39.4, 39.9, 32.7, 46.5, 44.2, 56.1, 38.5, 43.1, 36.7, 39.6, 36.9, 50.8, 53.2, 37.8, 44.7, 40.6, 41.7, 41.4, 47.8, 56.1, 45.6, 40.4, 39.0, 36.1, 43.9, 53.5, 49.8, 33.8, 49.8, 53.0, 48.5, 38.6, 45.1, 39.0, 48.5, 36.7, 45.0, 45.0, 38.4, 40.8, 46.9, 36.2, 36.9, 44.4, 41.5, 45.2, 35.6, 39.9, 36.2, 36.5)
     
boxplot(rain.nyc, main = "New York City Rainfall", ylab = "Inches")

x=rnorm(1000)
qqnorm(x)
qqline(x)

qqnorm(Dice)
qqline(Dice)

#boxplot(yonkers, stamford, names = c("Yonkers", "Stamford"), main = "New York Ozone Levels", ylab = "Ozone Levels (ppb)")
#qqplot(yonkers, stamford, xlim = ylim, ylim = ylim, xlab = "Yonkers", ylab = "Stamford", main = "Ozone Levels (ppb)")
#abline(a=0, b=1, lty="dotted")

#https://www.stat.auckland.ac.nz/~ihaka/787/lectures-quantiles.pdf
#https://www.stat.auckland.ac.nz/~ihaka/787/data.txt