################################################################
#
# Chapter 2: Elementary Probability and Statistics  
#
################################################################
#Plot Fig. 2.1
# Go to your working directory
setwd("/rCode")
nycDat=read.csv("data/NYCDailyDatasummer2019.csv", header=T)
dim(nycDat)
## [1] 7555   13
#[1] 7555   13
dayNum=92
nycP=nycDat[1:dayNum, c(3,6)]

dw=c()
for (i in 1:dayNum){if (nycP[i,2] >= 0.2){dw[i] = 1} 
  else {dw[i]=0} }
dw
##  [1] 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 1 1 1 1 1 1 1 0 0 1 1 0 0 0 1 0 0 1 1 0 0 1 1
## [39] 1 0 0 1 0 0 0 0 0 1 1 0 0 1 1 1 0 0 0 0 1 0 0 1 0 0 1 1 0 1 1 1 1 0 0 0 1 0
## [77] 0 0 1 1 0 0 1 1 1 1 0 0 0 1 0 0
n0=which(dw==0)
n1=which(dw==1)
m=dayNum - 1
par(mfrow=c(1,1))
par(mar=c(2.5,4.5,2.5,4))
plot(1:dayNum, nycP[,2], type="s",
     main="Daily Precipitation of New York City: 
      1 Jun 2019- 31 Aug 2019",
     ylab="Precipitation [mm/day]", 
     xaxt="n", xlab="",
     cex.lab=1.4, cex.axis=1.4)
axis(1, at=c(1, 30, 61, 92), 
     labels=c("1 June", "1 July", "1 Aug", "31 Aug"),
     cex.axis=1.4)
par(new=TRUE)
plot(n0+1.0, dw[n0]-0.2, cex.lab=1.4,
     ylim=c(0,15), pch=15, cex=0.4, col="red",
     axes=FALSE, xlab="", ylab="")
points(n1+1, dw[n1], pch=15, col="blue", cex=0.4)
axis(4, col="blue", col.axis="blue", 
     at=c(0,1), labels =c("D", "W"), 
     las=2, cex.axis=1)
mtext("Dry or Wet Days", 
      col="blue", side=4, line=1.5, cex=1.4)

#
#Plot Fig. 2.2
p=0.6471
n=1:12
pdfn= (1-p)*p^n
par(mar=c(4.5,4.5,3,0.5))
plot(n, pdfn, 
     ylim=c(0,0.25), type="h", lwd=15,
     cex.lab=1.5, cex.axis=1.5,
     xlab="Length of Dry Spell: n", ylab="Probability",
     main="Probability Distribution Function of Dry Spell")
text(1.5,0.25, "(a)",cex=1.5)

p=0.6471
N=1:12
cdfN= 1-p^N
par(mar=c(4.5,4.5,3,0.5))
plot(N, cdfN, 
     ylim=c(0,1), type="s", lwd=3,
     cex.lab=1.5, cex.axis=1.5,
     xlab="N: Dry Spell Not Longer Than N Days", 
     ylab="Cumulative Probability",
     main="Cumulative Distribution Function of Dry Spell")
text(1.5,0.95, "(b)",cex=1.5)

#Plot Fig. 2.3
n=20
x=0:n
pdfdx=dbinom(x, size=n, prob=0.3)
par(mar=c(4.5,4.5,2,4.5))
plot(x, pdfdx, type="h", lty=1, lwd=3,
     xlab="Number of successes: k", ylab="Probability", 
     main="Binomial Distribution: n=20, p=0.3",
     cex.lab=1.4, cex.axis=1.4)
text(11, 0.05, "PDF", cex=2)
par(new=TRUE)
cdfx=pbinom(x, size=n, prob=0.3)
plot(x, cdfx, col="blue",
     type="o", lty=2, axes=FALSE,
     xlab="", ylab="", cex.axis=1.4)
axis(4, col="blue", col.axis = "blue", cex.axis=1.4)
mtext("Cumulative Probability", col="blue",
      side=4, line=3, cex=1.4)
text(11, 0.9, "CDF", cex=2, col="blue")

#
#Plot Fig. 2.4
x=seq(-5,5, len=101)
y=dnorm(x)
plot(x, y, 
     type="l", lty=1, 
     xlab="x", ylab="Probability Density f(x)", 
     main="Standadrd Normal Distribution",
     cex.lab=1.4, cex.axis=1.4)
lines(x,rep(0, 101))
xx=c(x[60], x[60], x[70], x[70])
yy=c(0, y[60], y[70], 0)
polygon(xx, yy, col="brown") 
text(0.7, 0.01, "a", col="brown")
text(2.1, 0.01, "b", col="brown")
text(1.7,0.2, expression(f(x)), cex=1.4)
par(new=TRUE)
plot(x, pnorm(x), type="l", lty=2, col="blue",
     axes=FALSE, ylab="", xlab="", cex.axis=1.4)
axis(4, col="blue", col.axis = "blue", cex.axis=1.4)
text(3,0.9, expression(F(x)), cex=1.4, col="blue")
mtext("Cumulative Probability F(x)", col="blue",
      side=4, line=3, cex=1.4)
text(3.3,0.3, cex=1.3, col="brown",
     expression(P[ab]==integral(f(x)*dx, a,b)))

#
#Plot Fig. 2.5
par(mar=c(4.5, 4.5, 2.5, 0.5))
x<- rnorm(200, mean=5, sd=3)
h<-hist(x, breaks=15, col="gray", 
        xlim=c(-5,15), ylim=c(0,30),
        xlab=expression("Random numbers x of N(5," * 3^2 *")"),
        ylab="Frenquency/Density",
        main="Histogram and Its Normal Curve Fit", 
        cex.lab=1.4, cex.axis=1.4)
xfit<-seq(min(x),max(x),length=40)
yfit<-dnorm(xfit,mean=mean(x),sd=sd(x))
yfit <- yfit*diff(h$mids[1:2])*length(x)
#diff(h$mids[1:2])*length(x) =200 
#is the total histogram area
points(x, rep(0, 200), cex=0.8, pch=3, col="blue")
lines(xfit, yfit,  lwd=2)

#
#Plot Fig. 2.6
#install.packages("ggExtra") 
library(ggplot2)
df <- data.frame(x = rnorm(1000, 5, 3), y = runif(1000, 0,5))
p <- ggplot(df, aes(x, y)) + geom_point() + theme_classic()
ggExtra::ggMarginal(p, type = "histogram")

#
#Plot Fig. 2.7: R code for Poisson distribution
k=0:199
y= dpois(k, lambda=5)
par(mar=c(4.5,4.5,2,4.5))
x
##   [1]  7.50441406  4.48768771  1.87656608  5.49416330 11.17272978  8.27452198
##   [7] -0.77513155  2.04173371  5.22191454  8.36571040  7.88263285  3.48436500
##  [13]  5.79937055  3.14571357  4.52333957  6.86892558  4.53441848  6.85757823
##  [19]  4.11268161  5.93092251  5.33617720  6.50854652  3.09765631  5.33393888
##  [25]  3.51767850  4.82702747  3.21961696  5.40056279  4.09289277  6.25620301
##  [31]  4.93687750  5.27143305  2.19482119  7.78695550  0.73805248  0.47522478
##  [37]  6.37441819  4.05036849  2.57386504  6.39343559  3.09313186  9.04355456
##  [43]  2.89568546  8.22482417  3.59175604  7.35757345  4.80497071  6.13423847
##  [49]  4.01791636  1.02937453  6.26957222  6.67126728  3.23448309 -0.09005929
##  [55]  6.26525599  5.73968220  2.56345050  7.52340559  7.31838339  1.74471455
##  [61] 10.02439306  3.46838991  6.66666709  1.24051006  7.46171053  7.81840962
##  [67]  8.91631575  4.02728455  5.73145761  3.42620015  4.39803775 -2.71305837
##  [73]  4.17625089  5.33875054  4.11059847  2.33337528  6.23081581  1.26944475
##  [79]  0.94520168  3.71720459  8.81098498  5.62966928  5.81314319  3.49488725
##  [85] -3.14858039 -5.48706638  2.44740031  4.34356261  2.86227112  3.99655948
##  [91]  4.62585256  4.33545136  5.69670134  7.43201110  2.82198184  5.59991515
##  [97]  5.96462149  7.02292795  4.47281780  1.03861945  0.44638581  4.23463593
## [103]  0.52918311  6.14451164  4.17450729  1.88112222  0.77928873  2.27216480
## [109]  6.25359654  9.81877882  8.14008264  6.31769926  7.20016567  5.92732307
## [115]  2.12476459  8.09919802  3.72317342 11.94048314 10.34082861  7.93634860
## [121] 10.71116700  4.71051341  6.75596537 -1.23048835  5.41732087  6.27166134
## [127]  3.14609395  3.33483082  1.35036165 -0.95227340  3.02142526 10.69545099
## [133]  5.92232737  3.76053460  2.11626652  5.06168707  3.43251300  3.23291632
## [139]  3.84461856 10.31536717  6.35762523  7.03072232  7.96848031  2.99484821
## [145] 10.24695307  1.01884015  1.16842106  6.53672574  4.69801054  3.46351328
## [151]  4.15847960  3.16350321  4.59862691  3.42733685 -0.40702286  7.83418583
## [157]  1.79717329  2.62707732  5.80731494  4.01137384  7.09633884  3.52848356
## [163]  6.38003289 12.14991392  5.77092663  8.06850629  3.75193910  3.31427972
## [169]  7.41381435 13.37675892  0.62722863  7.60363644  9.57672863  1.00060781
## [175]  5.14915354  3.39911818  3.05186614  6.26507543  5.95966116  3.68801897
## [181]  5.39601996  9.18396009  5.84544794  2.29930154 -1.38912345  7.19931211
## [187]  6.17246210  1.77443943  2.56500337 10.15263285 -0.46727032 -1.17113232
## [193]  7.26129391  4.87596388 10.45160097  5.67278682  3.52352054  7.51443284
## [199]  0.94389727  7.00378428
y
##   [1]  6.737947e-03  3.368973e-02  8.422434e-02  1.403739e-01  1.754674e-01
##   [6]  1.754674e-01  1.462228e-01  1.044449e-01  6.527804e-02  3.626558e-02
##  [11]  1.813279e-02  8.242177e-03  3.434240e-03  1.320862e-03  4.717363e-04
##  [16]  1.572454e-04  4.913920e-05  1.445271e-05  4.014640e-06  1.056484e-06
##  [21]  2.641211e-07  6.288597e-08  1.429227e-08  3.107014e-09  6.472947e-10
##  [26]  1.294589e-10  2.489595e-11  4.610361e-12  8.232787e-13  1.419446e-13
##  [31]  2.365743e-14  3.815715e-15  5.962055e-16  9.033417e-17  1.328444e-17
##  [36]  1.897777e-18  2.635801e-19  3.561893e-20  4.686701e-21  6.008592e-22
##  [41]  7.510739e-23  9.159438e-24  1.090409e-24  1.267918e-25  1.440816e-26
##  [46]  1.600906e-27  1.740116e-28  1.851187e-29  1.928320e-30  1.967673e-31
##  [51]  1.967673e-32  1.929091e-33  1.854895e-34  1.749901e-35  1.620279e-36
##  [56]  1.472981e-37  1.315162e-38  1.153650e-39  9.945263e-41  8.428189e-42
##  [61]  7.023491e-43  5.756959e-44  4.642709e-45  3.684690e-46  2.878664e-47
##  [66]  2.214357e-48  1.677543e-49  1.251898e-50  9.205131e-52  6.670385e-53
##  [71]  4.764561e-54  3.355324e-55  2.330086e-56  1.595950e-57  1.078344e-58
##  [76]  7.188962e-60  4.729580e-61  3.071156e-62  1.968690e-63  1.246006e-64
##  [81]  7.787539e-66  4.807123e-67  2.931172e-68  1.765766e-69  1.051051e-70
##  [86]  6.182656e-72  3.594567e-73  2.065843e-74  1.173775e-75  6.594239e-77
##  [91]  3.663466e-78  2.012894e-79  1.093964e-80  5.881526e-82  3.128471e-83
##  [96]  1.646564e-84  8.575854e-86  4.420543e-87  2.255379e-88  1.139080e-89
## [101]  5.695402e-91  2.819506e-92  1.382111e-93  6.709275e-95  3.225613e-96
## [106]  1.536006e-97  7.245312e-99 3.385660e-100 1.567435e-101 7.190070e-103
## [111] 3.268214e-104 1.472168e-105 6.572180e-107 2.908044e-108 1.275458e-109
## [116] 5.545469e-111 2.390289e-112 1.021491e-113 4.328351e-115 1.818635e-116
## [121] 7.577645e-118 3.131258e-119 1.283303e-120 5.216677e-122 2.103499e-123
## [126] 8.413995e-125 3.338887e-126 1.314522e-127 5.134853e-129 1.990253e-130
## [131] 7.654820e-132 2.921687e-133 1.106700e-134 4.160525e-136 1.552435e-137
## [136] 5.749758e-139 2.113882e-140 7.714897e-142 2.795252e-143 1.005486e-144
## [141] 3.591023e-146 1.273412e-147 4.483847e-149 1.567779e-150 5.443676e-152
## [146] 1.877130e-153 6.428526e-155 2.186573e-156 7.387072e-158 2.478883e-159
## [151] 8.262944e-161 2.736074e-162 9.000244e-164 2.941256e-165 9.549534e-167
## [156] 3.080495e-168 9.873380e-170 3.144389e-171 9.950597e-173 3.129119e-174
## [161] 9.778496e-176 3.036800e-177 9.372839e-179 2.875104e-180 8.765561e-182
## [166] 2.656231e-183 8.000695e-185 2.395418e-186 7.129219e-188 2.109236e-189
## [171] 6.203636e-191 1.813929e-192 5.273049e-194 1.524003e-195 4.379318e-197
## [176] 1.251234e-198 3.554641e-200 1.004136e-201 2.820606e-203 7.878789e-205
## [181] 2.188552e-206 6.045725e-208 1.660913e-209 4.538015e-211 1.233156e-212
## [186] 3.332855e-214 8.959286e-216 2.395531e-217 6.371093e-219 1.685474e-220
## [191] 4.435459e-222 1.161115e-223 3.023737e-225 7.833515e-227 2.018947e-228
## [196] 5.176788e-230 1.320609e-231 3.351800e-233 8.464141e-235 2.126669e-236
plot(x, y, 
     type="p", lty=1, xlab="k", ylab="", 
     main="Poisson Distribution: Mean Rate = 5",
     cex.lab=1.4, cex.axis=1.4)
mtext(side=2, line=3, cex=1.4, 'Probability f(k)')
par(new=TRUE)
plot(x, ppois(x, lambda=5), type="p", pch=16, 
     lty=2, col="blue",
     axes=FALSE, ylab="", xlab="", cex.axis=1.4)
axis(4, col="blue", col.axis = "blue", cex.axis=1.4)
mtext("Cumulative Probability F(k)", col="blue",
      side=4, line=3, cex=1.4)

##
dpois(10, lambda=5)
## [1] 0.01813279
##

#Plot Fig. 2.8: R code
x=seq(0,20, len=201)
ycauchy=dcauchy(x, location = 10, scale=1)
ygauss=dnorm(x, mean=10, sd=sqrt(pi/2))
plot(x, ycauchy, type="l", lwd=2, 
     ylab="Density", 
     main="Comparison between Cauchy and Gaussian Distributions",
     cex.lab=1.4, cex.axis=1.4)
legend(-1,0.35, legend="Cauchy distribution", 
       cex=1.2, bty="n", lty=1, lwd=2)
lines(x, ygauss, lty=2)
legend(-1,0.32, legend="Normal distribution", 
       cex=1.2, bty="n", lty=2)

##
x= seq(1,10, by=0.2)
plot(x,dchisq(x, df = 4))

##
#
#Plot Fig. 2.9: R code
setwd("/rCode")
da1 =read.csv("data/EdmontonT.csv", header=TRUE)
x=da1[,3]
m1 = mean(x)
s1 = sd(x)
xa = (x- m1)/s1
y = matrix(xa[1:1632], ncol=6, byrow=TRUE)
w = rowSums(y^2)
hist(w, breaks= seq(0,40,by=1),
     xlim=c(0,40), ylim=c(0,50),
     main="Histogram and its Chi-square Fit
for Edmonton temperature data", 
     xlab=expression("Chi-square data ["~degree~C~"]"^2),
     cex.lab=1.4, cex.axis=1.4)
par(new=TRUE)
density = function(x) dchisq(x, df=6)
x=seq(0,40, by=0.1)
plot(x, density(x), type="l", lwd=3,
     col="blue", ylim=c(0,0.15),
     axes=FALSE, bty="n", xlab="", ylab="")
axis(4, cex.axis=1.4, col='blue', col.axis='blue')
mtext("Chi-square Density", cex=1.4,
      side = 4, line = 3, col="blue")
text(20, 0.1, cex=1.4, col="blue",
     "Chi-square distribution: df=6")

#Lognormal distribution of the June Madison precipitation
  
Madison = read.csv("data/MadisonP.csv", header=TRUE)
Madison[1:2,]#Take a look at the first two lines of the data
##       STATION                                    NAME LATITUDE LONGITUDE
## 1 USW00014837 MADISON DANE CO REGIONAL AIRPORT, WI US  43.1405  -89.3452
## 2 USW00014837 MADISON DANE CO REGIONAL AIRPORT, WI US  43.1405  -89.3452
##   ELEVATION    DATE PRCP
## 1       264 1941-01 69.4
## 2       264 1941-02 20.1
daP = matrix(Madison[,7], ncol=12, byrow=TRUE)
x = daP[,6] #The June precipitation data
n=length(x)
m1=mean(x)
A = log(m1)- sum(log(x))/n
c = (1/(4*A))*(1 + sqrt(1 + 4*A/3))
beta = c/m1
mu=mean(log(x))
sig = sd(log(x))
round(c(mu, sig), digits = 2)
## [1] 4.52 0.65
#[1] 4.52 0.65
modelmean = exp(mu + 0.5*sig^2)
modelsd = sqrt((exp(sig^2)-1)*exp(2*mu + sig^2))
datamean = mean(x)
datasd = sd(x)
round(c(modelmean, datamean, modelsd, datasd), digits =2)
## [1] 112.81 109.79  81.26  63.94
#[1] 112.81 109.79  81.26  63.94