Run the following, replacing “alyss/OneDrive/Documents” with your directory, to set the working directory: setwd(“/Users/alyss/OneDrive/Documents/LinAlg”).
Run getwd() to verify that you are in the directory/folder.
Before plotting the figure, run “setEPS(),” then run “postscript(”figname.eps”, width = w, height = h),” replacing figname, w, and h with the desired parameters. The figure will not show up as an output, and will instead be a file on your computer.
#Fig 4.1
x= c(
1671.5, 1635.6, 2097.0, 1295.4, 1822.7, 2396.9, 2763.0, 1284.7,
1525.2, 1328.6, 1378.9, 2323.8, 2757.8, 1033.3, 1105.5, 1185.7,
2343.9, 1764.5, 1271.0, 2347.3, 2094.0, 2643.2, 1837.9, 1121.7)
y= c(
22.064, 23.591, 18.464, 23.995, 20.645, 17.175, 13.582, 24.635,
22.178, 24.002, 23.952, 16.613, 13.588, 25.645, 25.625, 25.828,
17.626, 22.433, 24.539, 17.364, 17.327, 15.413, 22.174, 24.549)
par(mar=c(4.5,4.5,2.5,0.5))
plot(x,y,
xlab="Elevation [m]",
ylab=expression("Temperature ["~degree~"C]"),
main="Colorado Elevation and July Tmean: 1981-2010 Average",
cex.lab=1.5, cex.axis=1.5, cex.main =1.2)
reg=lm(y~x)
reg
##
## Call:
## lm(formula = y ~ x)
##
## Coefficients:
## (Intercept) x
## 33.476300 -0.006982
summary(reg)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.52923 -0.60674 -0.08437 0.40181 1.53427
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.4763004 0.5460279 61.31 <2e-16 ***
## x -0.0069819 0.0002913 -23.97 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7808 on 22 degrees of freedom
## Multiple R-squared: 0.9631, Adjusted R-squared: 0.9614
## F-statistic: 574.5 on 1 and 22 DF, p-value: < 2.2e-16
abline(reg,lwd=3)
text(2100, 25.5,
expression("Temperature lapse rate: 7.0"~degree~"C/1.0km"),
cex=1.5)
text(2350, 24, "y = 33.48 - 0.0070 x", cex=1.5)
text(2350, 22.5,"R-squared = 0.96", cex=1.5)
#dev.off()
#Colorado TLR regression analysis
lm(y ~ x)
##
## Call:
## lm(formula = y ~ x)
##
## Coefficients:
## (Intercept) x
## 33.476300 -0.006982
reg = lm(y ~ x)
round(reg$residuals, digits = 5)
## 1 2 3 4 5 6 7 8
## 0.25792 1.53427 -0.37129 -0.43697 -0.10542 0.43358 -0.60335 0.12833
## 9 10 11 12 13 14 15 16
## -0.64953 -0.19817 0.10302 -0.63880 -0.63366 -0.61692 -0.13283 0.63012
## 17 18 19 20 21 22 23 24
## 0.51454 1.27623 -0.06333 0.27628 -1.52923 0.39122 1.52971 -1.09572
mean(reg$residuals)
## [1] 1.62043e-17
xa = x - mean(x)
sum(xa*reg$residuals)
## [1] -2.83773e-13
sum((reg$residuals)^2)/(length(y) -2)
## [1] 0.6096193
#Fig 4.2
par(mar=c(0.0,0.5,0.0,0.5))
plot(0,0, xlim=c(0,5.2), ylim=c(0,2.2),
axes = FALSE, xlab="", ylab="")
arrows(0,0,4,0, angle=5, code=2, lwd=3, length=0.5)
arrows(4,0,4,2, angle=5, code=2, lwd=3, length=0.5)
arrows(0,0,4,2, angle=5, code=2, lwd=3, length=0.5)
arrows(5,0,4,2, angle=7, code=2, lwd=2, lty=3, length=0.5)
arrows(0,0,5,0, angle=7, code=2, lwd=2, lty=3, length=0.5)
arrows(3,0,4,2, angle=7, code=2, lwd=2, lty=3, length=0.5)
arrows(0,0,3,0, angle=7, code=2, lwd=2, lty=3, length=0.5)
segments(3.9,0, 3.9, 0.1)
segments(3.9, 0.1, 4.0, 0.1)
text(2,0.2, expression(hat(b)~bold(x)[a]), cex=2)
text(2,1.2, expression(bold(y)[a]), cex=2)
text(4.1,1, expression(bold(e)), cex=2)
text(3.8,0.6, expression(paste("Shortest ",bold(e))),
cex=1.5, srt=90)
text(3.4,1.1, expression(paste("Longer ",bold(e))),
cex=1.5, srt=71)
text(4.6,1.1, expression(paste("Longer ",bold(e))),
cex=1.5, srt=-71)
#dev.off()
#estimating regression slope b
#Method 1: Using vector projection
xa = x - mean(x) #Anomaly the x data vector
nxa = sqrt(sum(xa^2)) #Norm of the anomaly data vector
ya = y - mean(y)
nya=sqrt(sum(ya^2))
sum(ya*(xa/nxa))/nxa #Compute an estimate for b
## [1] -0.006981885
#Method 2: Using correlation
corxy=cor(xa, ya) #Compute the correlation between xa and ya
corxy #Very high correlation
## [1] -0.9813858
corxy*nya/nxa #Compute an estimate for b
## [1] -0.006981885
#computing MV
var(reg$fitted.values)
## [1] 15.22721
#Or another way
yhat = reg$fitted.values
var(yhat)
## [1] 15.22721
#Or still another way
n = 24
sum((yhat - mean(yhat))^2)/(n-1)
## [1] 15.22721
#computing YV
sum((y - mean(y))^2)/(n-1)
## [1] 15.81033
#Or another way
var(y)
## [1] 15.81033
#computing R-squared value
var(reg$fitted.values)/var(y) #This is the R-squared value
## [1] 0.9631181
cor(x,y)
## [1] -0.9813858
(cor(x,y))^2 #This is the R-squared value
## [1] 0.9631181
qt(c(.025, .975), df=22)
## [1] -2.073873 2.073873
summary(reg)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.52923 -0.60674 -0.08437 0.40181 1.53427
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.4763004 0.5460279 61.31 <2e-16 ***
## x -0.0069819 0.0002913 -23.97 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7808 on 22 degrees of freedom
## Multiple R-squared: 0.9631, Adjusted R-squared: 0.9614
## F-statistic: 574.5 on 1 and 22 DF, p-value: < 2.2e-16
-0.0069818 - 2.073873 * 0.0002913
## [1] -0.007585919
-0.0069818 + 2.073873 * 0.0002913
## [1] -0.006377681
33.4762157 - 2.073873*0.5460279
## [1] 32.34382
33.4762157 + 2.073873*0.5460279
## [1] 34.60861
(-0.0069818-(-0.0073))/0.0002913
## [1] 1.092345
#Fig. 4.3: Confidence interval of a regression model
setwd("/Users/alyss/OneDrive/Documents/LinAlg")
#Confidence interval of the linear model
x1 = seq(max(x), min(x),len=100)
n = 24
xbar = mean(x)
reg = lm(y ~ x)
SSE = sum((reg$residuals)^2)
s_squared = SSE/(length(y)-2)
s = sqrt(s_squared)
modTLR = 33.476216 + -0.006982*x1
xbar = mean(x)
Sxx = sum((x-xbar)^2)
CIupperModel= modTLR + qt(.975, df=n-2)*s*sqrt((1/n)+(x1-xbar)^2/Sxx)
CIlowerModel= modTLR - qt(.975, df=n-2)*s*sqrt((1/n)+(x1-xbar)^2/Sxx)
CIupperResponse= modTLR + qt(.975, df=n-2)*s*sqrt(1+(1/n)+(x1-xbar)^2/Sxx)
CIlowerResponse= modTLR - qt(.975, df=n-2)*s*sqrt(1+(1/n)+(x1-xbar)^2/Sxx)
par(mar=c(4.5,4.5,2.0,0.5))
plot(x,y,
ylim=c(10,30), xlim=c(1000,3000),
xlab="Elevation [m]",
ylab=bquote("Temperature ["~degree~"C]"),
main="Colorado Elevation and July Tmean: 1981-2010 Average",
cex.lab=1.5, cex.axis=1.5)
lines(x1,CIupperModel,type="l",col='red')
lines(x1,CIlowerModel,type="l",col='red')
lines(x1,CIupperResponse,type="l",col='blue')
lines(x1,CIlowerResponse,type="l",col='blue')
abline(reg,lwd=3)
text(2280, 26,
bquote("Temperature lapse rate: 7.0"~degree~"C/km"),
cex=1.5)
text(2350, 27.5,"R-squared = 0.96", cex=1.5)
text(2350, 29, "y= 33.48 - 0.0070 x", cex=1.5)
text(1600, 15,"Blue lines: CI of July Tmean RV",
col="blue", cex=1.5)
text(1600, 13.5,"Red lines: CI of the fitted model",
col="red", cex=1.5)
#dev.off()
#Fig. 4.4
reg = lm(y~x)
par(mar=c(4.5,4.5,2.0,0.5))
plot(x, reg$residuals, pch=5,
ylim=c(-2,2), xlim=c(1000,2800),
xlab="Elevation [m]",
ylab=bquote("Residual Temp ["~degree~"C]"),
main="Residuals of the Colorado 1981-2010 July Tmean vs. Elevation",
cex.lab=1.5, cex.axis=1.5, cex.main = 1.2)
#dev.off()
#Plot Fig. 4.5: R code
reg = lm(y ~ x)
par(mar=c(4.5,4.5,2.0,0.5))
qqnorm(reg$residuals, pch=5,
main="QQ-Normal Plot for the Colorado TLR Residuals",
cex.lab = 1.4, cex.axis = 1.4)
qqline(reg$residuals, lty=2)
#dev.off()
#DW-test for independence
#install.packages("lmtest")
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.4.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.4.2
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
ElevTemp=cbind(x,y, 1:24)
#Sort the data for ascending elevation
ElevTemp=ElevTemp[order(x),]
reg1=lm(ElevTemp[,2] ~ ElevTemp[,1])
dwtest(reg1)
##
## Durbin-Watson test
##
## data: reg1
## DW = 2.3072, p-value = 0.7062
## alternative hypothesis: true autocorrelation is greater than 0
#DW = 2.3072, p-value = 0.7062
#Mann-Kendall test
#install.packages("trend")
library(trend)
## Warning: package 'trend' was built under R version 4.4.2
ElevTemp=cbind(x, y, 1:24)
#Sort the data for ascending elevation
ElevTemp=ElevTemp[order(x),]
reg1=lm(ElevTemp[,2] ~ ElevTemp[,1])
ElevTemp[,3]=reg1$residuals
mk.test(ElevTemp[,3]) #data: ElevTemp[, 3]
##
## Mann-Kendall trend test
##
## data: ElevTemp[, 3]
## z = 0.47128, n = 24, p-value = 0.6374
## alternative hypothesis: true S is not equal to 0
## sample estimates:
## S varS tau
## 2.000000e+01 1.625333e+03 7.246377e-02
mk.test(ElevTemp[,2])
##
## Mann-Kendall trend test
##
## data: ElevTemp[, 2]
## z = -5.9779, n = 24, p-value = 2.261e-09
## alternative hypothesis: true S is not equal to 0
## sample estimates:
## S varS tau
## -242.0000000 1625.3333333 -0.8768116
lat=c(39.9919, 38.4600, 39.2203, 38.8236, 39.2425, 37.6742,
39.6261, 38.4775, 40.6147, 40.2600, 39.1653, 38.5258,
37.7717, 38.0494, 38.0936, 38.0636, 37.1742, 38.4858,
38.0392, 38.0858, 40.4883, 37.9492, 37.1786, 40.0583)
lon=c(
-105.2667, -105.2256, -105.2783, -102.3486, -107.9631, -106.3247,
-106.0353, -102.7808, -105.1314, -103.8156, -108.7331, -106.9675,
-107.1097, -102.1236, -102.6306, -103.2153, -105.9392, -107.8792,
-103.6933, -106.1444, -106.8233, -107.8733, -104.4869, -102.2189)
#TLR multivariate linear regression
elev = x; temp = y #The x and y data were entered earlier
dat = cbind(lat, lon, elev, temp)
datdf = data.frame(dat)
datdf[1:2,] #Show the data of the first two stations
## lat lon elev temp
## 1 39.9919 -105.2667 1671.5 22.064
## 2 38.4600 -105.2256 1635.6 23.591
#Multivariate linear regression
reg=lm(temp ~ lat + lon + elev, data = datdf)
summary(reg) #Display the regression results
##
## Call:
## lm(formula = temp ~ lat + lon + elev, data = datdf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.88838 -0.43114 0.06135 0.27781 1.31295
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 36.4399561 9.4355746 3.862 0.000971 ***
## lat -0.4925051 0.1320096 -3.731 0.001319 **
## lon -0.1630799 0.0889159 -1.834 0.081564 .
## elev -0.0075693 0.0003298 -22.953 7.67e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6176 on 20 degrees of freedom
## Multiple R-squared: 0.979, Adjusted R-squared: 0.9759
## F-statistic: 311.1 on 3 and 20 DF, p-value: < 2.2e-16
round(reg$coefficients, digits=5)
## (Intercept) lat lon elev
## 36.43996 -0.49251 -0.16308 -0.00757
colnames(datdf) <- c('x1', 'x2', 'x3', 'y')
reg=lm(y ~ x1 + x2 + x3, data = datdf)
reg
##
## Call:
## lm(formula = y ~ x1 + x2 + x3, data = datdf)
##
## Coefficients:
## (Intercept) x1 x2 x3
## 36.439956 -0.492505 -0.163080 -0.007569
#Fig. 4.6 and make regression diagnostics
setwd("/Users/alyss/OneDrive/Documents/LinAlg")
dtmean<-read.table(
"data/aravg.ann.land_ocean.90S.90N.v4.0.1.201907.txt",
header=F)
dim(dtmean)
## [1] 140 6
x = dtmean[1:139,1]
y = dtmean[1:139,2]
reg = lm(y ~ x) #linear regression
reg
##
## Call:
## lm(formula = y ~ x)
##
## Coefficients:
## (Intercept) x
## -13.872921 0.007023
#Confidence interval of the linear model
xbar = mean(x)
SSE = sum((reg$residuals)^2)
s_squared = SSE/(length(y)-2)
s = sqrt(s_squared)
modT = -14.574841 + 0.007348 *x
xbar = mean(x)
Sxx = sum((x-xbar)^2)
n = length(y)
CIupperModel= modT +
qt(.975, df=n-2)*s*sqrt((1/n)+(x-xbar)^2/Sxx)
CIlowerModel= modT -
qt(.975, df=n-2)*s*sqrt((1/n)+(x-xbar)^2/Sxx)
CIupperResponse= modT +
qt(.975, df=n-2)*s*sqrt(1+(1/n)+(x-xbar)^2/Sxx)
CIlowerResponse= modT -
qt(.975, df=n-2)*s*sqrt(1+(1/n)+(x-xbar)^2/Sxx)
CIupperModelr= modT +
qt(.975, df=5)*s*sqrt((1/n)+(x-xbar)^2/Sxx)
CIlowerModelr= modT -
qt(.975, df=5)*s*sqrt((1/n)+(x-xbar)^2/Sxx)
CIupperResponser= modT +
qt(.975, df=5)*s*sqrt(1+(1/n)+(x-xbar)^2/Sxx)
CIlowerResponser= modT -
qt(.975, df=5)*s*sqrt(1+(1/n)+(x-xbar)^2/Sxx)
par(mfrow=c(2,1))
par(mar=c(0,4.5,2.5,0.7))
plot(x, y, ylim = c(-1.5, 1),
type="o", xaxt="n", yaxt="n",
cex.lab=1.4, cex.axis=1.4,
xlab="Year", ylab=bquote("Temperature ["~degree~"C]"),
main="Global Annual Mean Surface Temperature Anomalies",
cex.lab=1.4, cex.axis=1.4
)
axis(side = 2, at = c(-1.0, 0, 1.0), cex.axis = 1.4)
abline(reg, col="black", lwd=3)
lines(x,CIupperModel,type="l",col='red')
lines(x,CIlowerModel,type="l",col='red')
lines(x,CIupperResponse,type="l",col='blue')
lines(x,CIlowerResponse,type="l",col='blue')
lines(x,CIupperModelr,type="l", lty = 3, col='red')
lines(x,CIlowerModelr,type="l", lty = 3, col='red')
lines(x,CIupperResponser,type="l",lty = 3, col='blue')
lines(x,CIlowerResponser,type="l",lty = 3, col='blue')
text(1940, 0.5,
bquote("Linear trend: 0.7348"~degree~"C per century"),
col="black",cex=1.4)
text(1880, 0.9, "(a)", cex=1.4)
par(mar=c(4.5,4.5,0,0.7))
plot(x, reg$residuals, ylim = c(-0.6,0.6),
pch=5, cex.lab=1.4, cex.axis=1.4,
yaxt = 'n', xlab="Year",
ylab=bquote("Residuals ["~degree~"C]"))
axis(side = 2, at = c(-0.3, 0, 0.3), cex.axis = 1.4)
text(1880, 0.5, "(b)", cex=1.4)
#dev.off()
#Kolmogorov-Smirnov (KS) test for normality
ks.test(reg$residuals, pnorm) #The normality assumption is accepted
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: reg$residuals
## D = 0.37221, p-value < 2.2e-16
## alternative hypothesis: two-sided
#Diagnostics on independence and normality
# Durbin-Watson (DW) test for independence
#install.packages("lmtest")
library(lmtest)
dwtest(reg) #The independence assumption is rejected
##
## Durbin-Watson test
##
## data: reg
## DW = 0.40484, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
rho1 = acf(y)[[1]][2] #Auto-correlation function
rho1
## [1] 0.9317402
edof = (length(y) - 2)*(1 - rho1)/(1 + rho1)
edof
## [1] 4.841017
qt(.975, df=137)
## [1] 1.977431
qt(.975, df=5)
## [1] 2.570582
yautoc = acf(y) #Auto-correlation function
yautoc$acf[2]
## [1] 0.9317402
cor(y[1:138], y[2:139])
## [1] 0.9502565
length(y)
## [1] 139
#Confidence interval of the linear model
x1 = x
xbar = mean(x1)
reg8018 = lm(y ~ x1)
SSE = sum((reg8018$residuals)^2)
s_squared = SSE/(length(y)-2)
s = sqrt(s_squared)
modT = -14.574841 + 0.007348 *x1
xbar = mean(x)
Sxx = sum((x-xbar)^2)
CIupperModel= modT + qt(.975, df=n-2)*s*sqrt((1/n)+(x1-xbar)^2/Sxx)
CIlowerModel= modT - qt(.975, df=n-2)*s*sqrt((1/n)+(x1-xbar)^2/Sxx)
CIupperResponse= modT + qt(.975, df=n-2)*s*sqrt(1+(1/n)+(x1-xbar)^2/Sxx)
CIlowerResponse= modT - qt(.975, df=n-2)*s*sqrt(1+(1/n)+(x1-xbar)^2/Sxx)
plot(x1, y,
xlab="Elevation [m]",
ylab=bquote("Temperature ["~degree~"C]"),
main="Colorado Elevation and July Tmean: 1981-2010 Average",
cex.lab=1.5, cex.axis=1.5)
lines(x1,CIupperModel,type="l",col='red')
lines(x1,CIlowerModel,type="l",col='red')
lines(x1,CIupperResponse,type="l",col='blue')
lines(x1,CIlowerResponse,type="l",col='blue')
#Linear regression diagnostics: Check the assumptions
#Normality test by Q-Q plot
par(mfrow=c(1,1))
par(mar=c(4.5,4.5,2.5,0.7))
qqnorm(reg8018$residuals, pch=5,
ylim=c(-0.4,0.4),xlim=c(-3,3),
main="QQ-Normal Plot for the NOAAGlobalTemp: 1880-2018",
cex.lab=1.4, cex.axis=1.4)
qqline(reg8018$residuals, lty=2, col = 'red')
#Kolmogorov-Smirnov (KS) test for normality
resi_sd=sd(reg8018$residuals)
resi_mean=mean(reg8018$residuals)
test_norm = rnorm(length(x1), mean = resi_mean,
sd=resi_sd)
ks.test(reg8018$residuals,test_norm) #Conclusion: Normal distribution is not rejected.
##
## Asymptotic two-sample Kolmogorov-Smirnov test
##
## data: reg8018$residuals and test_norm
## D = 0.1223, p-value = 0.2496
## alternative hypothesis: two-sided
#Check independence by Durbin-Watson (DW) test
dwtest(reg8018) #Conclusion: There is a significant serial correlation.
##
## Durbin-Watson test
##
## data: reg8018
## DW = 0.40484, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
yautoc = acf(y)
yautoc$acf[2]
## [1] 0.9317402
cor(y,y)
## [1] 1
#rm(list=ls()) #R forgets all the defined variables