Chapter 3: Linear Models Using Regression and Data
A Linear Model for the Life Expectancy in France
#Life expectancy in France
dat <- read.csv("~/Desktop/RMathModel/data/LifeExpectancyWorldBank2018clean.csv",
header=TRUE)
dat[1:3,1:5] # yields first 3 rows and first 5 columns
## Country.Name Country.Code X1960 X1961 X1962
## 1 Aruba ABW 65.662 66.074 66.444
## 2 Afghanistan AFG 32.292 32.742 33.185
## 3 Angola AGO 33.251 33.573 33.914
dim(dat) #displays dimension of data frame
## [1] 264 60
#data from 1960 to 2017, 58 years
dat[1:5,55:60]
## X2012 X2013 X2014 X2015 X2016 X2017
## 1 75.299 75.440 75.582 75.725 75.867 NA
## 2 62.086 62.494 62.895 63.288 63.673 NA
## 3 59.770 60.373 60.858 61.241 61.547 NA
## 4 77.389 77.702 77.963 78.174 78.345 NA
## 5 NA NA NA NA NA NA
which(dat == "France") # find the index at which "France" exists
## [1] 76
dat[76,1:5]
## Country.Name Country.Code X1960 X1961 X1962
## 76 France FRA 69.86829 70.11707 70.31463
yr <- 1960:1989 # create a sequence of years
le <- dat[76,3:32]
le
## X1960 X1961 X1962 X1963 X1964 X1965 X1966 X1967
## 76 69.86829 70.11707 70.31463 70.51463 70.66341 70.8122 70.96098 71.16098
## X1968 X1969 X1970 X1971 X1972 X1973 X1974 X1975
## 76 71.30976 71.45854 71.65854 71.90732 72.10732 72.3561 72.60488 72.85366
## X1976 X1977 X1978 X1979 X1980 X1981 X1982 X1983 X1984 X1985
## 76 73.10244 73.35122 73.60244 73.85122 74.05122 74.3 74.5 74.8 75 75.3
## X1986 X1987 X1988 X1989
## 76 75.6 75.8 76.1 76.34878
ler <- as.numeric(le) #Converts characters values into numeric values
par(mar=c(4.5,4.5,2,1.5)) #Allows user to modify and set graph parameters
plot(yr, ler, ylim=c(70,77),
xlab="Time", ylab="Life Expectancy",
main="Life Expectancy in France",
cex.main=1.5, cex.lab=1.5, cex.axis=1.5)
lm(ler ~ yr) #Creates linear regression model of ler to yr
##
## Call:
## lm(formula = ler ~ yr)
##
## Coefficients:
## (Intercept) yr
## -367.9487 0.2233
abline(lm(ler ~ yr), col="blue", lwd=2)
#Adds fitted linear model to the graph
text(1968,76, "Trend = 0.2233 per year", cex=1.5, col="blue")
#Adds text to graph
grid(nx = NULL, ny = NULL)

#Adds grid lines to graph
Energy Consumption and Heating Degree Data
#HDD and kWh: United States Monthly data from Oct. to Sept.
#setwd("~/sshen/mathmodel")
# uncomment next two lines and 'dev.off()' to save plot/figure
# on to your working directory
#setEPS()
#postscript("figm0302.eps", height=6, width=8)
par(mar=c(4.2,4.7,3.5,0.8))
hdd <- c(163 ,228 ,343 ,373, 301, 238 ,137 ,84 ,38 ,15,14,34)
kwh <- c(593 ,676 ,1335 ,1149 , 1127, 892 , 538 ,289 , 172, 131,134,134)
lm(kwh ~ hdd)
##
## Call:
## lm(formula = kwh ~ hdd)
##
## Coefficients:
## (Intercept) hdd
## 53.505 3.317
plot(hdd, kwh, xlim=c(0,400),ylim=c(0,1500),
xlab="Climate Index Data: HDD",
ylab="Energy Index Data: kWh",
main="Monthly Household Energy Consumption\n vs. Climate Data",
cex.lab=1.5, cex.axis=1.5, cex.main=1.5)
abline(lm(kwh ~ hdd), col="red",lwd=2)
text(140,1300, col="red", cex=1.3,
"Linear Model: kWh = 3.317HDD + 53.505")
text(110,1100, col="red", cex=1.3,
expression(R^2==0.9663))
grid(nx = NULL, ny = NULL)

#dev.off()
An Example of Linear Model and Data Analysis Using R: A Global
Warming Dataset
#Read the txt data into R by
dtmean <- read.table("~/Desktop/RMathModel/data/gl_land_nohead.txt",
header=F)
#Another way to import the data is to set the R working directory
#to the same directory where the dataset is located.
#setwd("~/sshen/mathmodel")
#Then import the data by reading the file name
dtmean <- read.table("~/Desktop/RMathModel/data/gl_land_nohead.txt", header=F)
dtmean
## V1 V2 V3
## 1 1880 -0.41 -99.99
## 2 1881 -0.39 -99.99
## 3 1882 -0.30 -0.40
## 4 1883 -0.32 -0.41
## 5 1884 -0.60 -0.46
## 6 1885 -0.46 -0.52
## 7 1886 -0.60 -0.55
## 8 1887 -0.64 -0.47
## 9 1888 -0.44 -0.49
## 10 1889 -0.20 -0.48
## 11 1890 -0.56 -0.45
## 12 1891 -0.59 -0.47
## 13 1892 -0.49 -0.51
## 14 1893 -0.51 -0.47
## 15 1894 -0.41 -0.41
## 16 1895 -0.33 -0.35
## 17 1896 -0.30 -0.33
## 18 1897 -0.22 -0.31
## 19 1898 -0.39 -0.27
## 20 1899 -0.32 -0.24
## 21 1900 -0.15 -0.27
## 22 1901 -0.14 -0.28
## 23 1902 -0.37 -0.33
## 24 1903 -0.43 -0.37
## 25 1904 -0.54 -0.39
## 26 1905 -0.37 -0.43
## 27 1906 -0.26 -0.43
## 28 1907 -0.55 -0.40
## 29 1908 -0.41 -0.40
## 30 1909 -0.43 -0.42
## 31 1910 -0.36 -0.39
## 32 1911 -0.38 -0.37
## 33 1912 -0.36 -0.30
## 34 1913 -0.34 -0.25
## 35 1914 -0.08 -0.23
## 36 1915 -0.07 -0.26
## 37 1916 -0.31 -0.28
## 38 1917 -0.52 -0.30
## 39 1918 -0.43 -0.35
## 40 1919 -0.19 -0.32
## 41 1920 -0.29 -0.27
## 42 1921 -0.17 -0.24
## 43 1922 -0.26 -0.25
## 44 1923 -0.28 -0.24
## 45 1924 -0.23 -0.22
## 46 1925 -0.26 -0.20
## 47 1926 -0.05 -0.17
## 48 1927 -0.19 -0.19
## 49 1928 -0.11 -0.17
## 50 1929 -0.34 -0.18
## 51 1930 -0.16 -0.16
## 52 1931 -0.09 -0.19
## 53 1932 -0.12 -0.14
## 54 1933 -0.24 -0.15
## 55 1934 -0.10 -0.16
## 56 1935 -0.22 -0.13
## 57 1936 -0.09 -0.07
## 58 1937 0.00 -0.07
## 59 1938 0.07 -0.01
## 60 1939 -0.10 0.02
## 61 1940 0.08 0.03
## 62 1941 0.04 0.02
## 63 1942 0.05 0.05
## 64 1943 0.01 0.02
## 65 1944 0.06 -0.01
## 66 1945 -0.08 -0.01
## 67 1946 -0.07 -0.02
## 68 1947 0.06 -0.06
## 69 1948 -0.09 -0.09
## 70 1949 -0.13 -0.09
## 71 1950 -0.23 -0.11
## 72 1951 -0.08 -0.08
## 73 1952 -0.02 -0.08
## 74 1953 0.07 -0.05
## 75 1954 -0.13 -0.08
## 76 1955 -0.11 -0.07
## 77 1956 -0.22 -0.07
## 78 1957 0.04 -0.04
## 79 1958 0.05 -0.02
## 80 1959 0.04 0.04
## 81 1960 -0.02 0.03
## 82 1961 0.08 0.02
## 83 1962 0.01 -0.04
## 84 1963 0.01 -0.06
## 85 1964 -0.26 -0.09
## 86 1965 -0.17 -0.10
## 87 1966 -0.07 -0.12
## 88 1967 -0.02 -0.07
## 89 1968 -0.10 -0.02
## 90 1969 0.01 -0.02
## 91 1970 0.07 -0.03
## 92 1971 -0.08 0.04
## 93 1972 -0.03 0.03
## 94 1973 0.22 0.02
## 95 1974 -0.04 0.00
## 96 1975 0.02 0.05
## 97 1976 -0.17 0.03
## 98 1977 0.21 0.08
## 99 1978 0.14 0.14
## 100 1979 0.20 0.27
## 101 1980 0.34 0.25
## 102 1981 0.44 0.30
## 103 1982 0.13 0.30
## 104 1983 0.39 0.27
## 105 1984 0.22 0.24
## 106 1985 0.20 0.29
## 107 1986 0.24 0.31
## 108 1987 0.40 0.34
## 109 1988 0.49 0.41
## 110 1989 0.38 0.46
## 111 1990 0.52 0.43
## 112 1991 0.52 0.38
## 113 1992 0.23 0.38
## 114 1993 0.27 0.39
## 115 1994 0.37 0.38
## 116 1995 0.56 0.44
## 117 1996 0.47 0.55
## 118 1997 0.53 0.60
## 119 1998 0.84 0.60
## 120 1999 0.60 0.65
## 121 2000 0.57 0.70
## 122 2001 0.68 0.68
## 123 2002 0.79 0.70
## 124 2003 0.77 0.76
## 125 2004 0.68 0.78
## 126 2005 0.87 0.79
## 127 2006 0.77 0.77
## 128 2007 0.85 0.79
## 129 2008 0.65 0.80
## 130 2009 0.79 0.80
## 131 2010 0.92 0.78
## 132 2011 0.79 0.82
## 133 2012 0.78 0.84
## 134 2013 0.82 0.85
## 135 2014 0.88 -99.99
## 136 2015 0.99 -99.99
dim(dtmean)
## [1] 136 3
yrtime <- dtmean[,1]#Column 1 is time in Year
tmean <- dtmean[,2] #Column 2 is mean temp.
summary(tmean) #Displays summary statistics of data
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.64000 -0.28250 -0.08000 0.01243 0.22000 0.99000
summary(yrtime)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1880 1914 1948 1948 1981 2015
boxplot(tmean, main="Land Temp. Anomalies")
grid(nx = NULL, ny = NULL)

hist(tmean,
main="Histogram: Land Temperature Anomalies",
xlab="Temp. Anomalies")

tmean
## [1] -0.41 -0.39 -0.30 -0.32 -0.60 -0.46 -0.60 -0.64 -0.44 -0.20 -0.56 -0.59
## [13] -0.49 -0.51 -0.41 -0.33 -0.30 -0.22 -0.39 -0.32 -0.15 -0.14 -0.37 -0.43
## [25] -0.54 -0.37 -0.26 -0.55 -0.41 -0.43 -0.36 -0.38 -0.36 -0.34 -0.08 -0.07
## [37] -0.31 -0.52 -0.43 -0.19 -0.29 -0.17 -0.26 -0.28 -0.23 -0.26 -0.05 -0.19
## [49] -0.11 -0.34 -0.16 -0.09 -0.12 -0.24 -0.10 -0.22 -0.09 0.00 0.07 -0.10
## [61] 0.08 0.04 0.05 0.01 0.06 -0.08 -0.07 0.06 -0.09 -0.13 -0.23 -0.08
## [73] -0.02 0.07 -0.13 -0.11 -0.22 0.04 0.05 0.04 -0.02 0.08 0.01 0.01
## [85] -0.26 -0.17 -0.07 -0.02 -0.10 0.01 0.07 -0.08 -0.03 0.22 -0.04 0.02
## [97] -0.17 0.21 0.14 0.20 0.34 0.44 0.13 0.39 0.22 0.20 0.24 0.40
## [109] 0.49 0.38 0.52 0.52 0.23 0.27 0.37 0.56 0.47 0.53 0.84 0.60
## [121] 0.57 0.68 0.79 0.77 0.68 0.87 0.77 0.85 0.65 0.79 0.92 0.79
## [133] 0.78 0.82 0.88 0.99
yrtime
## [1] 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894
## [16] 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909
## [31] 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924
## [46] 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939
## [61] 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954
## [76] 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969
## [91] 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984
## [106] 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999
## [121] 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
## [136] 2015
reg8014 <- lm(tmean ~ yrtime) #Linear regression
summary(reg8014) #Statistics summary of linear regression model
##
## Call:
## lm(formula = tmean ~ yrtime)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.44528 -0.11753 -0.01427 0.11089 0.36181
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.795e+01 7.254e-01 -24.75 <2e-16 ***
## yrtime 9.223e-03 3.724e-04 24.77 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1705 on 134 degrees of freedom
## Multiple R-squared: 0.8207, Adjusted R-squared: 0.8194
## F-statistic: 613.4 on 1 and 134 DF, p-value: < 2.2e-16
plot(yrtime, tmean, xlab="Year", ylab="Land Temperature Anomalies",
main="Global Annual Mean Land Surface Air Temperature",
type="o")
abline(reg8014, col="red") #Plot the regression line
text(1930, 0.6, "Linear Temp. Trend = 0.88 °C per century",
col="red",cex=1.2)
grid(nx = NULL, ny = NULL)

reg8014$residuals #shows the values of all the residuals
## 1 2 3 4 5 6
## 0.200123444 0.210900482 0.291677520 0.262454558 -0.026768403 0.104008635
## 7 8 9 10 11 12
## -0.045214327 -0.094437288 0.096339750 0.327116788 -0.042106173 -0.081329135
## 13 14 15 16 17 18
## 0.009447903 -0.019775058 0.071001980 0.141779018 0.162556056 0.233333095
## 19 20 21 22 23 24
## 0.054110133 0.114887171 0.275664210 0.276441248 0.037218286 -0.032004675
## 25 26 27 28 29 30
## -0.151227637 0.009549401 0.110326440 -0.188896522 -0.058119484 -0.087342445
## 31 32 33 34 35 36
## -0.026565407 -0.055788369 -0.045011331 -0.034234292 0.216542746 0.217319784
## 37 38 39 40 41 42
## -0.031903177 -0.251126139 -0.170349101 0.060427938 -0.048795024 0.061982014
## 43 44 45 46 47 48
## -0.037240947 -0.066463909 -0.025686871 -0.064909833 0.135867206 -0.013355756
## 49 50 51 52 53 54
## 0.057421282 -0.181801679 -0.011024641 0.049752397 0.010529436 -0.118693526
## 55 56 57 58 59 60
## 0.012083512 -0.117139449 0.003637589 0.084414627 0.145191665 -0.034031296
## 61 62 63 64 65 66
## 0.136745742 0.087522780 0.088299819 0.039076857 0.079853895 -0.069369066
## 67 68 69 70 71 72
## -0.068592028 0.052185010 -0.107037951 -0.156260913 -0.265483875 -0.124706837
## 73 74 75 76 77 78
## -0.073929798 0.006847240 -0.202375722 -0.191598683 -0.310821645 -0.060044607
## 79 80 81 82 83 84
## -0.059267568 -0.078490530 -0.147713492 -0.056936453 -0.136159415 -0.145382377
## 85 86 87 88 89 90
## -0.424605338 -0.343828300 -0.253051262 -0.212274224 -0.301497185 -0.200720147
## 91 92 93 94 95 96
## -0.149943109 -0.309166070 -0.268389032 -0.027611994 -0.296834955 -0.246057917
## 97 98 99 100 101 102
## -0.445280879 -0.074503840 -0.153726802 -0.102949764 0.027827274 0.118604313
## 103 104 105 106 107 108
## -0.200618649 0.050158389 -0.129064572 -0.158287534 -0.127510496 0.023266543
## 109 110 111 112 113 114
## 0.104043581 -0.015179381 0.115597658 0.106374696 -0.192848266 -0.162071228
## 115 116 117 118 119 120
## -0.071294189 0.109482849 0.010259887 0.061036926 0.361813964 0.112591002
## 121 122 123 124 125 126
## 0.073368041 0.174145079 0.274922117 0.245699156 0.146476194 0.327253232
## 127 128 129 130 131 132
## 0.218030271 0.288807309 0.079584347 0.210361385 0.331138424 0.191915462
## 133 134 135 136
## 0.172692500 0.203469539 0.254246577 0.355023615
re1 <- reg8014$residuals
#Residual standard error, approximately the SD of residuals
sd(re1)*sqrt((135-1)/(135-2))
## [1] 0.1704966
Research Level Exploration for Analyzing the Global Warming
Data
#Hansen's data analysis
#setwd("~/sshen/mathmodel")
dtmean <- read.table("~/Desktop/RMathModel/data/gl_land_oceanHansen.txt",
header=TRUE)
colnames(dtmean)
## [1] "Year" "Anomaly" "mean"
#a) Statistical summary
summary(dtmean$Anomaly)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.47000 -0.21000 -0.08000 0.01185 0.17500 0.74000
#b) Boxplot
boxplot(dtmean$Anomaly,
main="Boxplot of Hansen's Global Temp. Data 1880-2014",
ylab="Temp. [°C]")
grid(nx = NULL, ny = NULL)

#c) Histogram
hist(dtmean$Anomaly,
breaks=c(0.1, seq(-0.5, 0.8, 0.1)),
xlab="Temp. Anomaly [°C]",
main="Histogram of Hansen's Global Temp.")

#d) Linear regression models
y18802014 <- dtmean$Anomaly
x18802014 <- seq(1880,2014)
#setEPS()
#postscript("figm0306.eps", height=6, width=8)
par(mar=c(4.2,4.7,2.0,0.8))
plot(x18802014, y18802014,
cex.lab=1.5, cex.axis=1.5, cex.main=1.5,
xlab="Year",ylab="Temperature Anomalies [°C]",
main="Global Annual Mean Surface Air Temp.: Hansen Data",
type="o")
grid(nx = NULL, ny = NULL)
reg18802014 <- lm(y18802014 ~ x18802014)
reg18802014
##
## Call:
## lm(formula = y18802014 ~ x18802014)
##
## Coefficients:
## (Intercept) x18802014
## -13.183247 0.006777
abline(reg18802014, col="red", lwd=4)
text(1920, 0.70,
"1880-2014 Trend = 0.68 °C per century",
col="red", cex=1.2)
#Linear regression for the first 31 years
y18801910 <- dtmean$Anomaly[1:31]
x18801910 <- seq(1880,1910)
reg18801910 <- lm(y18801910 ~ x18801910)
reg18801910
##
## Call:
## lm(formula = y18801910 ~ x18801910)
##
## Coefficients:
## (Intercept) x18801910
## 10.990000 -0.005935
segments(x18801910[1], fitted(reg18801910)[1],
x18801910[31], fitted(reg18801910)[31],
col="blue", lwd=4)
#This allows to plot a line segment for the first 31 years.
#But abline(reg18801910,col="blue",lwd=2,xlim=c(1880, 1910))
#plots the regression line for the entire period 1880 - 2014.
text(1920, 0.3, "1880-1910 Trend = -0.60 °C per century",
col="blue",cex=1.2)
#Linear regression for the first 71 years
y18801950 <- dtmean$Anomaly[1:71]
x18801950 <- seq(1880,1950)
reg18801950 <- lm(y18801950 ~ x18801950)
reg18801950
##
## Call:
## lm(formula = y18801950 ~ x18801950)
##
## Coefficients:
## (Intercept) x18801950
## -7.217396 0.003668
segments(x18801950[1], fitted(reg18801950)[1],
x18801950[71], fitted(reg18801950)[71],
col="green",lwd=5)
text(1920, 0.4, "1880-1950 Trend = 0.37 °C per century",
col="green", cex=1.2)
#Linear regression for the first 96 years
y18801975 <- dtmean$Anomaly[1:96]
x18801975 <- seq(1880,1975)
reg18801975 <- lm(y18801975 ~ x18801975)
reg18801975
##
## Call:
## lm(formula = y18801975 ~ x18801975)
##
## Coefficients:
## (Intercept) x18801975
## -7.025833 0.003568
segments(x18801975[1], fitted(reg18801975)[1],
x18801975[96], fitted(reg18801975)[96],
col="black",lwd=2)
text(1920, 0.5, "1880-1975 Trend = 0.36 °C per century",
col="black", cex=1.2)
#Linear regression for the first 121 years
y18802000 <- dtmean$Anomaly[1:121]
x18802000 <- seq(1880,2000)
reg18802000 <- lm(y18802000 ~ x18802000)
reg18802000
##
## Call:
## lm(formula = y18802000 ~ x18802000)
##
## Coefficients:
## (Intercept) x18802000
## -10.681780 0.005476
segments(x18802000[1],fitted(reg18802000)[1],
x18802000[121],fitted(reg18802000)[121],
col="purple",lwd=4)
text(1920, 0.6, "1880-2000 Trend = 0.55 °C per century",
col="purple", cex=1.2)

#dev.off()