################################################################
#
# Chapter 2: Elementary Probability and Statistics
#
################################################################
#Plot Fig. 2.1
# Go to your working directory
setwd("/rCode")
nycDat=read.csv("data/NYCDailyDatasummer2019.csv", header=T)
dim(nycDat)
## [1] 7555 13
#[1] 7555 13
dayNum=92
nycP=nycDat[1:dayNum, c(3,6)]
dw=c()
for (i in 1:dayNum){if (nycP[i,2] >= 0.2){dw[i] = 1}
else {dw[i]=0} }
dw
## [1] 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 1 1 1 1 1 1 1 0 0 1 1 0 0 0 1 0 0 1 1 0 0 1 1
## [39] 1 0 0 1 0 0 0 0 0 1 1 0 0 1 1 1 0 0 0 0 1 0 0 1 0 0 1 1 0 1 1 1 1 0 0 0 1 0
## [77] 0 0 1 1 0 0 1 1 1 1 0 0 0 1 0 0
n0=which(dw==0)
n1=which(dw==1)
m=dayNum - 1
par(mfrow=c(1,1))
par(mar=c(2.5,4.5,2.5,4))
plot(1:dayNum, nycP[,2], type="s",
main="Daily Precipitation of New York City:
1 Jun 2019- 31 Aug 2019",
ylab="Precipitation [mm/day]",
xaxt="n", xlab="",
cex.lab=1.4, cex.axis=1.4)
axis(1, at=c(1, 30, 61, 92),
labels=c("1 June", "1 July", "1 Aug", "31 Aug"),
cex.axis=1.4)
par(new=TRUE)
plot(n0+1.0, dw[n0]-0.2, cex.lab=1.4,
ylim=c(0,15), pch=15, cex=0.4, col="red",
axes=FALSE, xlab="", ylab="")
points(n1+1, dw[n1], pch=15, col="blue", cex=0.4)
axis(4, col="blue", col.axis="blue",
at=c(0,1), labels =c("D", "W"),
las=2, cex.axis=1)
mtext("Dry or Wet Days",
col="blue", side=4, line=1.5, cex=1.4)

#
#Plot Fig. 2.2
p=0.6471
n=1:12
pdfn= (1-p)*p^n
par(mar=c(4.5,4.5,3,0.5))
plot(n, pdfn,
ylim=c(0,0.25), type="h", lwd=15,
cex.lab=1.5, cex.axis=1.5,
xlab="Length of Dry Spell: n", ylab="Probability",
main="Probability Distribution Function of Dry Spell")
text(1.5,0.25, "(a)",cex=1.5)

p=0.6471
N=1:12
cdfN= 1-p^N
par(mar=c(4.5,4.5,3,0.5))
plot(N, cdfN,
ylim=c(0,1), type="s", lwd=3,
cex.lab=1.5, cex.axis=1.5,
xlab="N: Dry Spell Not Longer Than N Days",
ylab="Cumulative Probability",
main="Cumulative Distribution Function of Dry Spell")
text(1.5,0.95, "(b)",cex=1.5)

#Plot Fig. 2.3
n=20
x=0:n
pdfdx=dbinom(x, size=n, prob=0.3)
par(mar=c(4.5,4.5,2,4.5))
plot(x, pdfdx, type="h", lty=1, lwd=3,
xlab="Number of successes: k", ylab="Probability",
main="Binomial Distribution: n=20, p=0.3",
cex.lab=1.4, cex.axis=1.4)
text(11, 0.05, "PDF", cex=2)
par(new=TRUE)
cdfx=pbinom(x, size=n, prob=0.3)
plot(x, cdfx, col="blue",
type="o", lty=2, axes=FALSE,
xlab="", ylab="", cex.axis=1.4)
axis(4, col="blue", col.axis = "blue", cex.axis=1.4)
mtext("Cumulative Probability", col="blue",
side=4, line=3, cex=1.4)
text(11, 0.9, "CDF", cex=2, col="blue")

#
#Plot Fig. 2.4
x=seq(-5,5, len=101)
y=dnorm(x)
plot(x, y,
type="l", lty=1,
xlab="x", ylab="Probability Density f(x)",
main="Standadrd Normal Distribution",
cex.lab=1.4, cex.axis=1.4)
lines(x,rep(0, 101))
xx=c(x[60], x[60], x[70], x[70])
yy=c(0, y[60], y[70], 0)
polygon(xx, yy, col="brown")
text(0.7, 0.01, "a", col="brown")
text(2.1, 0.01, "b", col="brown")
text(1.7,0.2, expression(f(x)), cex=1.4)
par(new=TRUE)
plot(x, pnorm(x), type="l", lty=2, col="blue",
axes=FALSE, ylab="", xlab="", cex.axis=1.4)
axis(4, col="blue", col.axis = "blue", cex.axis=1.4)
text(3,0.9, expression(F(x)), cex=1.4, col="blue")
mtext("Cumulative Probability F(x)", col="blue",
side=4, line=3, cex=1.4)
text(3.3,0.3, cex=1.3, col="brown",
expression(P[ab]==integral(f(x)*dx, a,b)))

#
#Plot Fig. 2.5
par(mar=c(4.5, 4.5, 2.5, 0.5))
x<- rnorm(200, mean=5, sd=3)
h<-hist(x, breaks=15, col="gray",
xlim=c(-5,15), ylim=c(0,30),
xlab=expression("Random numbers x of N(5," * 3^2 *")"),
ylab="Frenquency/Density",
main="Histogram and Its Normal Curve Fit",
cex.lab=1.4, cex.axis=1.4)
xfit<-seq(min(x),max(x),length=40)
yfit<-dnorm(xfit,mean=mean(x),sd=sd(x))
yfit <- yfit*diff(h$mids[1:2])*length(x)
#diff(h$mids[1:2])*length(x) =200
#is the total histogram area
points(x, rep(0, 200), cex=0.8, pch=3, col="blue")
lines(xfit, yfit, lwd=2)

#
#Plot Fig. 2.6
#install.packages("ggExtra")
library(ggplot2)
df <- data.frame(x = rnorm(1000, 5, 3), y = runif(1000, 0,5))
p <- ggplot(df, aes(x, y)) + geom_point() + theme_classic()
ggExtra::ggMarginal(p, type = "histogram")

#
#Plot Fig. 2.7: R code for Poisson distribution
k=0:199
y= dpois(k, lambda=5)
par(mar=c(4.5,4.5,2,4.5))
x
## [1] 7.50441406 4.48768771 1.87656608 5.49416330 11.17272978 8.27452198
## [7] -0.77513155 2.04173371 5.22191454 8.36571040 7.88263285 3.48436500
## [13] 5.79937055 3.14571357 4.52333957 6.86892558 4.53441848 6.85757823
## [19] 4.11268161 5.93092251 5.33617720 6.50854652 3.09765631 5.33393888
## [25] 3.51767850 4.82702747 3.21961696 5.40056279 4.09289277 6.25620301
## [31] 4.93687750 5.27143305 2.19482119 7.78695550 0.73805248 0.47522478
## [37] 6.37441819 4.05036849 2.57386504 6.39343559 3.09313186 9.04355456
## [43] 2.89568546 8.22482417 3.59175604 7.35757345 4.80497071 6.13423847
## [49] 4.01791636 1.02937453 6.26957222 6.67126728 3.23448309 -0.09005929
## [55] 6.26525599 5.73968220 2.56345050 7.52340559 7.31838339 1.74471455
## [61] 10.02439306 3.46838991 6.66666709 1.24051006 7.46171053 7.81840962
## [67] 8.91631575 4.02728455 5.73145761 3.42620015 4.39803775 -2.71305837
## [73] 4.17625089 5.33875054 4.11059847 2.33337528 6.23081581 1.26944475
## [79] 0.94520168 3.71720459 8.81098498 5.62966928 5.81314319 3.49488725
## [85] -3.14858039 -5.48706638 2.44740031 4.34356261 2.86227112 3.99655948
## [91] 4.62585256 4.33545136 5.69670134 7.43201110 2.82198184 5.59991515
## [97] 5.96462149 7.02292795 4.47281780 1.03861945 0.44638581 4.23463593
## [103] 0.52918311 6.14451164 4.17450729 1.88112222 0.77928873 2.27216480
## [109] 6.25359654 9.81877882 8.14008264 6.31769926 7.20016567 5.92732307
## [115] 2.12476459 8.09919802 3.72317342 11.94048314 10.34082861 7.93634860
## [121] 10.71116700 4.71051341 6.75596537 -1.23048835 5.41732087 6.27166134
## [127] 3.14609395 3.33483082 1.35036165 -0.95227340 3.02142526 10.69545099
## [133] 5.92232737 3.76053460 2.11626652 5.06168707 3.43251300 3.23291632
## [139] 3.84461856 10.31536717 6.35762523 7.03072232 7.96848031 2.99484821
## [145] 10.24695307 1.01884015 1.16842106 6.53672574 4.69801054 3.46351328
## [151] 4.15847960 3.16350321 4.59862691 3.42733685 -0.40702286 7.83418583
## [157] 1.79717329 2.62707732 5.80731494 4.01137384 7.09633884 3.52848356
## [163] 6.38003289 12.14991392 5.77092663 8.06850629 3.75193910 3.31427972
## [169] 7.41381435 13.37675892 0.62722863 7.60363644 9.57672863 1.00060781
## [175] 5.14915354 3.39911818 3.05186614 6.26507543 5.95966116 3.68801897
## [181] 5.39601996 9.18396009 5.84544794 2.29930154 -1.38912345 7.19931211
## [187] 6.17246210 1.77443943 2.56500337 10.15263285 -0.46727032 -1.17113232
## [193] 7.26129391 4.87596388 10.45160097 5.67278682 3.52352054 7.51443284
## [199] 0.94389727 7.00378428
y
## [1] 6.737947e-03 3.368973e-02 8.422434e-02 1.403739e-01 1.754674e-01
## [6] 1.754674e-01 1.462228e-01 1.044449e-01 6.527804e-02 3.626558e-02
## [11] 1.813279e-02 8.242177e-03 3.434240e-03 1.320862e-03 4.717363e-04
## [16] 1.572454e-04 4.913920e-05 1.445271e-05 4.014640e-06 1.056484e-06
## [21] 2.641211e-07 6.288597e-08 1.429227e-08 3.107014e-09 6.472947e-10
## [26] 1.294589e-10 2.489595e-11 4.610361e-12 8.232787e-13 1.419446e-13
## [31] 2.365743e-14 3.815715e-15 5.962055e-16 9.033417e-17 1.328444e-17
## [36] 1.897777e-18 2.635801e-19 3.561893e-20 4.686701e-21 6.008592e-22
## [41] 7.510739e-23 9.159438e-24 1.090409e-24 1.267918e-25 1.440816e-26
## [46] 1.600906e-27 1.740116e-28 1.851187e-29 1.928320e-30 1.967673e-31
## [51] 1.967673e-32 1.929091e-33 1.854895e-34 1.749901e-35 1.620279e-36
## [56] 1.472981e-37 1.315162e-38 1.153650e-39 9.945263e-41 8.428189e-42
## [61] 7.023491e-43 5.756959e-44 4.642709e-45 3.684690e-46 2.878664e-47
## [66] 2.214357e-48 1.677543e-49 1.251898e-50 9.205131e-52 6.670385e-53
## [71] 4.764561e-54 3.355324e-55 2.330086e-56 1.595950e-57 1.078344e-58
## [76] 7.188962e-60 4.729580e-61 3.071156e-62 1.968690e-63 1.246006e-64
## [81] 7.787539e-66 4.807123e-67 2.931172e-68 1.765766e-69 1.051051e-70
## [86] 6.182656e-72 3.594567e-73 2.065843e-74 1.173775e-75 6.594239e-77
## [91] 3.663466e-78 2.012894e-79 1.093964e-80 5.881526e-82 3.128471e-83
## [96] 1.646564e-84 8.575854e-86 4.420543e-87 2.255379e-88 1.139080e-89
## [101] 5.695402e-91 2.819506e-92 1.382111e-93 6.709275e-95 3.225613e-96
## [106] 1.536006e-97 7.245312e-99 3.385660e-100 1.567435e-101 7.190070e-103
## [111] 3.268214e-104 1.472168e-105 6.572180e-107 2.908044e-108 1.275458e-109
## [116] 5.545469e-111 2.390289e-112 1.021491e-113 4.328351e-115 1.818635e-116
## [121] 7.577645e-118 3.131258e-119 1.283303e-120 5.216677e-122 2.103499e-123
## [126] 8.413995e-125 3.338887e-126 1.314522e-127 5.134853e-129 1.990253e-130
## [131] 7.654820e-132 2.921687e-133 1.106700e-134 4.160525e-136 1.552435e-137
## [136] 5.749758e-139 2.113882e-140 7.714897e-142 2.795252e-143 1.005486e-144
## [141] 3.591023e-146 1.273412e-147 4.483847e-149 1.567779e-150 5.443676e-152
## [146] 1.877130e-153 6.428526e-155 2.186573e-156 7.387072e-158 2.478883e-159
## [151] 8.262944e-161 2.736074e-162 9.000244e-164 2.941256e-165 9.549534e-167
## [156] 3.080495e-168 9.873380e-170 3.144389e-171 9.950597e-173 3.129119e-174
## [161] 9.778496e-176 3.036800e-177 9.372839e-179 2.875104e-180 8.765561e-182
## [166] 2.656231e-183 8.000695e-185 2.395418e-186 7.129219e-188 2.109236e-189
## [171] 6.203636e-191 1.813929e-192 5.273049e-194 1.524003e-195 4.379318e-197
## [176] 1.251234e-198 3.554641e-200 1.004136e-201 2.820606e-203 7.878789e-205
## [181] 2.188552e-206 6.045725e-208 1.660913e-209 4.538015e-211 1.233156e-212
## [186] 3.332855e-214 8.959286e-216 2.395531e-217 6.371093e-219 1.685474e-220
## [191] 4.435459e-222 1.161115e-223 3.023737e-225 7.833515e-227 2.018947e-228
## [196] 5.176788e-230 1.320609e-231 3.351800e-233 8.464141e-235 2.126669e-236
plot(x, y,
type="p", lty=1, xlab="k", ylab="",
main="Poisson Distribution: Mean Rate = 5",
cex.lab=1.4, cex.axis=1.4)
mtext(side=2, line=3, cex=1.4, 'Probability f(k)')
par(new=TRUE)
plot(x, ppois(x, lambda=5), type="p", pch=16,
lty=2, col="blue",
axes=FALSE, ylab="", xlab="", cex.axis=1.4)
axis(4, col="blue", col.axis = "blue", cex.axis=1.4)
mtext("Cumulative Probability F(k)", col="blue",
side=4, line=3, cex=1.4)

##
dpois(10, lambda=5)
## [1] 0.01813279
##
#Plot Fig. 2.8: R code
x=seq(0,20, len=201)
ycauchy=dcauchy(x, location = 10, scale=1)
ygauss=dnorm(x, mean=10, sd=sqrt(pi/2))
plot(x, ycauchy, type="l", lwd=2,
ylab="Density",
main="Comparison between Cauchy and Gaussian Distributions",
cex.lab=1.4, cex.axis=1.4)
legend(-1,0.35, legend="Cauchy distribution",
cex=1.2, bty="n", lty=1, lwd=2)
lines(x, ygauss, lty=2)
legend(-1,0.32, legend="Normal distribution",
cex=1.2, bty="n", lty=2)

##
x= seq(1,10, by=0.2)
plot(x,dchisq(x, df = 4))

##
#
#Plot Fig. 2.9: R code
setwd("/rCode")
da1 =read.csv("data/EdmontonT.csv", header=TRUE)
x=da1[,3]
m1 = mean(x)
s1 = sd(x)
xa = (x- m1)/s1
y = matrix(xa[1:1632], ncol=6, byrow=TRUE)
w = rowSums(y^2)
hist(w, breaks= seq(0,40,by=1),
xlim=c(0,40), ylim=c(0,50),
main="Histogram and its Chi-square Fit
for Edmonton temperature data",
xlab=expression("Chi-square data ["~degree~C~"]"^2),
cex.lab=1.4, cex.axis=1.4)
par(new=TRUE)
density = function(x) dchisq(x, df=6)
x=seq(0,40, by=0.1)
plot(x, density(x), type="l", lwd=3,
col="blue", ylim=c(0,0.15),
axes=FALSE, bty="n", xlab="", ylab="")
axis(4, cex.axis=1.4, col='blue', col.axis='blue')
mtext("Chi-square Density", cex=1.4,
side = 4, line = 3, col="blue")
text(20, 0.1, cex=1.4, col="blue",
"Chi-square distribution: df=6")

#Lognormal distribution of the June Madison precipitation
Madison = read.csv("data/MadisonP.csv", header=TRUE)
Madison[1:2,]#Take a look at the first two lines of the data
## STATION NAME LATITUDE LONGITUDE
## 1 USW00014837 MADISON DANE CO REGIONAL AIRPORT, WI US 43.1405 -89.3452
## 2 USW00014837 MADISON DANE CO REGIONAL AIRPORT, WI US 43.1405 -89.3452
## ELEVATION DATE PRCP
## 1 264 1941-01 69.4
## 2 264 1941-02 20.1
daP = matrix(Madison[,7], ncol=12, byrow=TRUE)
x = daP[,6] #The June precipitation data
n=length(x)
m1=mean(x)
A = log(m1)- sum(log(x))/n
c = (1/(4*A))*(1 + sqrt(1 + 4*A/3))
beta = c/m1
mu=mean(log(x))
sig = sd(log(x))
round(c(mu, sig), digits = 2)
## [1] 4.52 0.65
#[1] 4.52 0.65
modelmean = exp(mu + 0.5*sig^2)
modelsd = sqrt((exp(sig^2)-1)*exp(2*mu + sig^2))
datamean = mean(x)
datasd = sd(x)
round(c(modelmean, datamean, modelsd, datasd), digits =2)
## [1] 112.81 109.79 81.26 63.94
#[1] 112.81 109.79 81.26 63.94