Introduction to Modern Mathematical Modeling with R:

A User’s Manual to Train Mathematical Consultants

 

A Cambridge University Press book by

SSP Shen

 

 

R Code written by Dr. Samuel Shen, Distinguished Professor
San Diego State University, California, USA
https://shen.sdsu.edu
Email:

 

Compiled and Edited by Joaquin Stawsky
San Diego State University, August 2022

 

 

 

Chapter 3: Linear Models Using Regression and Data

A Linear Model for the Life Expectancy in France

#Life expectancy in France 
dat <- read.csv("~/Desktop/RMathModel/data/LifeExpectancyWorldBank2018clean.csv",
               header=TRUE)

dat[1:3,1:5] # yields first 3 rows and first 5 columns
##   Country.Name Country.Code  X1960  X1961  X1962
## 1        Aruba          ABW 65.662 66.074 66.444
## 2  Afghanistan          AFG 32.292 32.742 33.185
## 3       Angola          AGO 33.251 33.573 33.914
dim(dat) #displays dimension of data frame
## [1] 264  60
#data from 1960 to 2017, 58 years 
dat[1:5,55:60]
##    X2012  X2013  X2014  X2015  X2016 X2017
## 1 75.299 75.440 75.582 75.725 75.867    NA
## 2 62.086 62.494 62.895 63.288 63.673    NA
## 3 59.770 60.373 60.858 61.241 61.547    NA
## 4 77.389 77.702 77.963 78.174 78.345    NA
## 5     NA     NA     NA     NA     NA    NA
which(dat == "France") # find the index at which "France" exists
## [1] 76
dat[76,1:5]
##    Country.Name Country.Code    X1960    X1961    X1962
## 76       France          FRA 69.86829 70.11707 70.31463
yr <- 1960:1989 # create a sequence of years
le <- dat[76,3:32]
le
##       X1960    X1961    X1962    X1963    X1964   X1965    X1966    X1967
## 76 69.86829 70.11707 70.31463 70.51463 70.66341 70.8122 70.96098 71.16098
##       X1968    X1969    X1970    X1971    X1972   X1973    X1974    X1975
## 76 71.30976 71.45854 71.65854 71.90732 72.10732 72.3561 72.60488 72.85366
##       X1976    X1977    X1978    X1979    X1980 X1981 X1982 X1983 X1984 X1985
## 76 73.10244 73.35122 73.60244 73.85122 74.05122  74.3  74.5  74.8    75  75.3
##    X1986 X1987 X1988    X1989
## 76  75.6  75.8  76.1 76.34878
ler <- as.numeric(le) #Converts characters values into numeric values
par(mar=c(4.5,4.5,2,1.5))  #Allows user to modify and set graph parameters
plot(yr, ler, ylim=c(70,77),
     xlab="Time", ylab="Life Expectancy",
     main="Life Expectancy in France", 
     cex.main=1.5, cex.lab=1.5, cex.axis=1.5)
lm(ler ~ yr) #Creates linear regression model of ler to yr
## 
## Call:
## lm(formula = ler ~ yr)
## 
## Coefficients:
## (Intercept)           yr  
##   -367.9487       0.2233
abline(lm(ler  ~ yr), col="blue", lwd=2) 
#Adds fitted linear model to the graph

text(1968,76, "Trend = 0.2233 per year", cex=1.5, col="blue")
#Adds text to graph

grid(nx = NULL, ny = NULL)

#Adds grid lines to graph

 

Energy Consumption and Heating Degree Data

#HDD and kWh: United States Monthly data from Oct. to Sept.
#setwd("~/sshen/mathmodel")

# uncomment next two lines and 'dev.off()' to save plot/figure 
# on to your working directory
#setEPS()
#postscript("figm0302.eps", height=6, width=8) 

par(mar=c(4.2,4.7,3.5,0.8))
hdd <- c(163 ,228 ,343 ,373, 301, 238 ,137 ,84 ,38 ,15,14,34)
kwh <- c(593 ,676 ,1335 ,1149 , 1127, 892 , 538 ,289 , 172, 131,134,134) 
lm(kwh ~ hdd)
## 
## Call:
## lm(formula = kwh ~ hdd)
## 
## Coefficients:
## (Intercept)          hdd  
##      53.505        3.317
plot(hdd, kwh, xlim=c(0,400),ylim=c(0,1500),
     xlab="Climate Index Data: HDD",
     ylab="Energy Index Data: kWh",
     main="Monthly Household Energy Consumption\n vs. Climate Data", 
     cex.lab=1.5, cex.axis=1.5, cex.main=1.5)
abline(lm(kwh ~ hdd), col="red",lwd=2) 
text(140,1300, col="red", cex=1.3, 
     "Linear Model: kWh = 3.317HDD + 53.505") 
text(110,1100, col="red", cex=1.3,
     expression(R^2==0.9663)) 
grid(nx = NULL, ny = NULL)

#dev.off()

 

An Example of Linear Model and Data Analysis Using R: A Global Warming Dataset

#Read the txt data into R by
dtmean <- read.table("~/Desktop/RMathModel/data/gl_land_nohead.txt",
                   header=F)

#Another way to import the data is to set the R working directory 
#to the same directory where the dataset is located.
#setwd("~/sshen/mathmodel")
#Then import the data by reading the file name
dtmean <- read.table("~/Desktop/RMathModel/data/gl_land_nohead.txt", header=F)
dtmean
##       V1    V2     V3
## 1   1880 -0.41 -99.99
## 2   1881 -0.39 -99.99
## 3   1882 -0.30  -0.40
## 4   1883 -0.32  -0.41
## 5   1884 -0.60  -0.46
## 6   1885 -0.46  -0.52
## 7   1886 -0.60  -0.55
## 8   1887 -0.64  -0.47
## 9   1888 -0.44  -0.49
## 10  1889 -0.20  -0.48
## 11  1890 -0.56  -0.45
## 12  1891 -0.59  -0.47
## 13  1892 -0.49  -0.51
## 14  1893 -0.51  -0.47
## 15  1894 -0.41  -0.41
## 16  1895 -0.33  -0.35
## 17  1896 -0.30  -0.33
## 18  1897 -0.22  -0.31
## 19  1898 -0.39  -0.27
## 20  1899 -0.32  -0.24
## 21  1900 -0.15  -0.27
## 22  1901 -0.14  -0.28
## 23  1902 -0.37  -0.33
## 24  1903 -0.43  -0.37
## 25  1904 -0.54  -0.39
## 26  1905 -0.37  -0.43
## 27  1906 -0.26  -0.43
## 28  1907 -0.55  -0.40
## 29  1908 -0.41  -0.40
## 30  1909 -0.43  -0.42
## 31  1910 -0.36  -0.39
## 32  1911 -0.38  -0.37
## 33  1912 -0.36  -0.30
## 34  1913 -0.34  -0.25
## 35  1914 -0.08  -0.23
## 36  1915 -0.07  -0.26
## 37  1916 -0.31  -0.28
## 38  1917 -0.52  -0.30
## 39  1918 -0.43  -0.35
## 40  1919 -0.19  -0.32
## 41  1920 -0.29  -0.27
## 42  1921 -0.17  -0.24
## 43  1922 -0.26  -0.25
## 44  1923 -0.28  -0.24
## 45  1924 -0.23  -0.22
## 46  1925 -0.26  -0.20
## 47  1926 -0.05  -0.17
## 48  1927 -0.19  -0.19
## 49  1928 -0.11  -0.17
## 50  1929 -0.34  -0.18
## 51  1930 -0.16  -0.16
## 52  1931 -0.09  -0.19
## 53  1932 -0.12  -0.14
## 54  1933 -0.24  -0.15
## 55  1934 -0.10  -0.16
## 56  1935 -0.22  -0.13
## 57  1936 -0.09  -0.07
## 58  1937  0.00  -0.07
## 59  1938  0.07  -0.01
## 60  1939 -0.10   0.02
## 61  1940  0.08   0.03
## 62  1941  0.04   0.02
## 63  1942  0.05   0.05
## 64  1943  0.01   0.02
## 65  1944  0.06  -0.01
## 66  1945 -0.08  -0.01
## 67  1946 -0.07  -0.02
## 68  1947  0.06  -0.06
## 69  1948 -0.09  -0.09
## 70  1949 -0.13  -0.09
## 71  1950 -0.23  -0.11
## 72  1951 -0.08  -0.08
## 73  1952 -0.02  -0.08
## 74  1953  0.07  -0.05
## 75  1954 -0.13  -0.08
## 76  1955 -0.11  -0.07
## 77  1956 -0.22  -0.07
## 78  1957  0.04  -0.04
## 79  1958  0.05  -0.02
## 80  1959  0.04   0.04
## 81  1960 -0.02   0.03
## 82  1961  0.08   0.02
## 83  1962  0.01  -0.04
## 84  1963  0.01  -0.06
## 85  1964 -0.26  -0.09
## 86  1965 -0.17  -0.10
## 87  1966 -0.07  -0.12
## 88  1967 -0.02  -0.07
## 89  1968 -0.10  -0.02
## 90  1969  0.01  -0.02
## 91  1970  0.07  -0.03
## 92  1971 -0.08   0.04
## 93  1972 -0.03   0.03
## 94  1973  0.22   0.02
## 95  1974 -0.04   0.00
## 96  1975  0.02   0.05
## 97  1976 -0.17   0.03
## 98  1977  0.21   0.08
## 99  1978  0.14   0.14
## 100 1979  0.20   0.27
## 101 1980  0.34   0.25
## 102 1981  0.44   0.30
## 103 1982  0.13   0.30
## 104 1983  0.39   0.27
## 105 1984  0.22   0.24
## 106 1985  0.20   0.29
## 107 1986  0.24   0.31
## 108 1987  0.40   0.34
## 109 1988  0.49   0.41
## 110 1989  0.38   0.46
## 111 1990  0.52   0.43
## 112 1991  0.52   0.38
## 113 1992  0.23   0.38
## 114 1993  0.27   0.39
## 115 1994  0.37   0.38
## 116 1995  0.56   0.44
## 117 1996  0.47   0.55
## 118 1997  0.53   0.60
## 119 1998  0.84   0.60
## 120 1999  0.60   0.65
## 121 2000  0.57   0.70
## 122 2001  0.68   0.68
## 123 2002  0.79   0.70
## 124 2003  0.77   0.76
## 125 2004  0.68   0.78
## 126 2005  0.87   0.79
## 127 2006  0.77   0.77
## 128 2007  0.85   0.79
## 129 2008  0.65   0.80
## 130 2009  0.79   0.80
## 131 2010  0.92   0.78
## 132 2011  0.79   0.82
## 133 2012  0.78   0.84
## 134 2013  0.82   0.85
## 135 2014  0.88 -99.99
## 136 2015  0.99 -99.99
dim(dtmean)
## [1] 136   3
yrtime <- dtmean[,1]#Column 1 is time in Year
tmean <- dtmean[,2] #Column 2 is mean temp.
summary(tmean) #Displays summary statistics of data
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.64000 -0.28250 -0.08000  0.01243  0.22000  0.99000
summary(yrtime)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1880    1914    1948    1948    1981    2015
boxplot(tmean, main="Land Temp. Anomalies")
grid(nx = NULL, ny = NULL)

hist(tmean, 
     main="Histogram: Land Temperature Anomalies",
     xlab="Temp. Anomalies")

tmean
##   [1] -0.41 -0.39 -0.30 -0.32 -0.60 -0.46 -0.60 -0.64 -0.44 -0.20 -0.56 -0.59
##  [13] -0.49 -0.51 -0.41 -0.33 -0.30 -0.22 -0.39 -0.32 -0.15 -0.14 -0.37 -0.43
##  [25] -0.54 -0.37 -0.26 -0.55 -0.41 -0.43 -0.36 -0.38 -0.36 -0.34 -0.08 -0.07
##  [37] -0.31 -0.52 -0.43 -0.19 -0.29 -0.17 -0.26 -0.28 -0.23 -0.26 -0.05 -0.19
##  [49] -0.11 -0.34 -0.16 -0.09 -0.12 -0.24 -0.10 -0.22 -0.09  0.00  0.07 -0.10
##  [61]  0.08  0.04  0.05  0.01  0.06 -0.08 -0.07  0.06 -0.09 -0.13 -0.23 -0.08
##  [73] -0.02  0.07 -0.13 -0.11 -0.22  0.04  0.05  0.04 -0.02  0.08  0.01  0.01
##  [85] -0.26 -0.17 -0.07 -0.02 -0.10  0.01  0.07 -0.08 -0.03  0.22 -0.04  0.02
##  [97] -0.17  0.21  0.14  0.20  0.34  0.44  0.13  0.39  0.22  0.20  0.24  0.40
## [109]  0.49  0.38  0.52  0.52  0.23  0.27  0.37  0.56  0.47  0.53  0.84  0.60
## [121]  0.57  0.68  0.79  0.77  0.68  0.87  0.77  0.85  0.65  0.79  0.92  0.79
## [133]  0.78  0.82  0.88  0.99
yrtime
##   [1] 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894
##  [16] 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909
##  [31] 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924
##  [46] 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939
##  [61] 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954
##  [76] 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969
##  [91] 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984
## [106] 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999
## [121] 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014
## [136] 2015
reg8014 <- lm(tmean ~ yrtime) #Linear regression
summary(reg8014) #Statistics summary of linear regression model
## 
## Call:
## lm(formula = tmean ~ yrtime)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.44528 -0.11753 -0.01427  0.11089  0.36181 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.795e+01  7.254e-01  -24.75   <2e-16 ***
## yrtime       9.223e-03  3.724e-04   24.77   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1705 on 134 degrees of freedom
## Multiple R-squared:  0.8207, Adjusted R-squared:  0.8194 
## F-statistic: 613.4 on 1 and 134 DF,  p-value: < 2.2e-16
plot(yrtime, tmean, xlab="Year", ylab="Land Temperature Anomalies", 
     main="Global Annual Mean Land Surface Air Temperature",
     type="o")
abline(reg8014, col="red") #Plot the regression line
text(1930, 0.6, "Linear Temp. Trend = 0.88 °C per century", 
     col="red",cex=1.2)
grid(nx = NULL, ny = NULL)

reg8014$residuals #shows the values of all the residuals
##            1            2            3            4            5            6 
##  0.200123444  0.210900482  0.291677520  0.262454558 -0.026768403  0.104008635 
##            7            8            9           10           11           12 
## -0.045214327 -0.094437288  0.096339750  0.327116788 -0.042106173 -0.081329135 
##           13           14           15           16           17           18 
##  0.009447903 -0.019775058  0.071001980  0.141779018  0.162556056  0.233333095 
##           19           20           21           22           23           24 
##  0.054110133  0.114887171  0.275664210  0.276441248  0.037218286 -0.032004675 
##           25           26           27           28           29           30 
## -0.151227637  0.009549401  0.110326440 -0.188896522 -0.058119484 -0.087342445 
##           31           32           33           34           35           36 
## -0.026565407 -0.055788369 -0.045011331 -0.034234292  0.216542746  0.217319784 
##           37           38           39           40           41           42 
## -0.031903177 -0.251126139 -0.170349101  0.060427938 -0.048795024  0.061982014 
##           43           44           45           46           47           48 
## -0.037240947 -0.066463909 -0.025686871 -0.064909833  0.135867206 -0.013355756 
##           49           50           51           52           53           54 
##  0.057421282 -0.181801679 -0.011024641  0.049752397  0.010529436 -0.118693526 
##           55           56           57           58           59           60 
##  0.012083512 -0.117139449  0.003637589  0.084414627  0.145191665 -0.034031296 
##           61           62           63           64           65           66 
##  0.136745742  0.087522780  0.088299819  0.039076857  0.079853895 -0.069369066 
##           67           68           69           70           71           72 
## -0.068592028  0.052185010 -0.107037951 -0.156260913 -0.265483875 -0.124706837 
##           73           74           75           76           77           78 
## -0.073929798  0.006847240 -0.202375722 -0.191598683 -0.310821645 -0.060044607 
##           79           80           81           82           83           84 
## -0.059267568 -0.078490530 -0.147713492 -0.056936453 -0.136159415 -0.145382377 
##           85           86           87           88           89           90 
## -0.424605338 -0.343828300 -0.253051262 -0.212274224 -0.301497185 -0.200720147 
##           91           92           93           94           95           96 
## -0.149943109 -0.309166070 -0.268389032 -0.027611994 -0.296834955 -0.246057917 
##           97           98           99          100          101          102 
## -0.445280879 -0.074503840 -0.153726802 -0.102949764  0.027827274  0.118604313 
##          103          104          105          106          107          108 
## -0.200618649  0.050158389 -0.129064572 -0.158287534 -0.127510496  0.023266543 
##          109          110          111          112          113          114 
##  0.104043581 -0.015179381  0.115597658  0.106374696 -0.192848266 -0.162071228 
##          115          116          117          118          119          120 
## -0.071294189  0.109482849  0.010259887  0.061036926  0.361813964  0.112591002 
##          121          122          123          124          125          126 
##  0.073368041  0.174145079  0.274922117  0.245699156  0.146476194  0.327253232 
##          127          128          129          130          131          132 
##  0.218030271  0.288807309  0.079584347  0.210361385  0.331138424  0.191915462 
##          133          134          135          136 
##  0.172692500  0.203469539  0.254246577  0.355023615
re1 <- reg8014$residuals
#Residual standard error, approximately the SD of residuals
sd(re1)*sqrt((135-1)/(135-2))
## [1] 0.1704966

 

Research Level Exploration for Analyzing the Global Warming Data

#Hansen's data analysis
#setwd("~/sshen/mathmodel")
dtmean <- read.table("~/Desktop/RMathModel/data/gl_land_oceanHansen.txt",
                     header=TRUE)
colnames(dtmean)
## [1] "Year"    "Anomaly" "mean"
#a) Statistical summary
summary(dtmean$Anomaly)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.47000 -0.21000 -0.08000  0.01185  0.17500  0.74000
#b) Boxplot
boxplot(dtmean$Anomaly,
        main="Boxplot of Hansen's Global Temp. Data 1880-2014", 
        ylab="Temp. [°C]")
grid(nx = NULL, ny = NULL)

#c) Histogram
hist(dtmean$Anomaly, 
     breaks=c(0.1, seq(-0.5, 0.8, 0.1)),
     xlab="Temp. Anomaly [°C]",
     main="Histogram of Hansen's Global Temp.")

#d) Linear regression models
y18802014 <- dtmean$Anomaly
x18802014 <- seq(1880,2014)

#setEPS()
#postscript("figm0306.eps", height=6, width=8) 
par(mar=c(4.2,4.7,2.0,0.8))
plot(x18802014, y18802014, 
     cex.lab=1.5, cex.axis=1.5, cex.main=1.5,
     xlab="Year",ylab="Temperature Anomalies [°C]",
     main="Global Annual Mean Surface Air Temp.: Hansen Data", 
     type="o")
grid(nx = NULL, ny = NULL)

reg18802014 <- lm(y18802014  ~ x18802014)
reg18802014
## 
## Call:
## lm(formula = y18802014 ~ x18802014)
## 
## Coefficients:
## (Intercept)    x18802014  
##  -13.183247     0.006777
abline(reg18802014, col="red", lwd=4)
text(1920, 0.70, 
     "1880-2014 Trend = 0.68 °C per century", 
     col="red", cex=1.2) 


#Linear regression for the first 31 years
y18801910 <- dtmean$Anomaly[1:31]
x18801910 <- seq(1880,1910) 
reg18801910 <- lm(y18801910  ~ x18801910) 
reg18801910
## 
## Call:
## lm(formula = y18801910 ~ x18801910)
## 
## Coefficients:
## (Intercept)    x18801910  
##   10.990000    -0.005935
segments(x18801910[1], fitted(reg18801910)[1],
         x18801910[31], fitted(reg18801910)[31],
         col="blue", lwd=4) 

#This allows to plot a line segment for the first 31 years.
#But abline(reg18801910,col="blue",lwd=2,xlim=c(1880, 1910))
#plots the regression line for the entire period 1880 - 2014.
text(1920, 0.3, "1880-1910 Trend = -0.60 °C per century", 
     col="blue",cex=1.2)

#Linear regression for the first 71 years
y18801950 <- dtmean$Anomaly[1:71] 
x18801950 <- seq(1880,1950) 
reg18801950 <- lm(y18801950  ~ x18801950)
reg18801950
## 
## Call:
## lm(formula = y18801950 ~ x18801950)
## 
## Coefficients:
## (Intercept)    x18801950  
##   -7.217396     0.003668
segments(x18801950[1], fitted(reg18801950)[1],
         x18801950[71], fitted(reg18801950)[71],
         col="green",lwd=5)
text(1920, 0.4, "1880-1950 Trend = 0.37 °C per century", 
     col="green", cex=1.2)

#Linear regression for the first 96 years
y18801975 <- dtmean$Anomaly[1:96] 
x18801975 <- seq(1880,1975) 
reg18801975 <- lm(y18801975  ~  x18801975) 
reg18801975
## 
## Call:
## lm(formula = y18801975 ~ x18801975)
## 
## Coefficients:
## (Intercept)    x18801975  
##   -7.025833     0.003568
segments(x18801975[1], fitted(reg18801975)[1],
         x18801975[96], fitted(reg18801975)[96],
         col="black",lwd=2)
text(1920, 0.5, "1880-1975 Trend = 0.36 °C per century", 
     col="black", cex=1.2)

#Linear regression for the first 121 years
y18802000 <- dtmean$Anomaly[1:121] 
x18802000 <- seq(1880,2000) 
reg18802000 <- lm(y18802000  ~ x18802000) 
reg18802000
## 
## Call:
## lm(formula = y18802000 ~ x18802000)
## 
## Coefficients:
## (Intercept)    x18802000  
##  -10.681780     0.005476
segments(x18802000[1],fitted(reg18802000)[1],
         x18802000[121],fitted(reg18802000)[121],
         col="purple",lwd=4)
text(1920, 0.6, "1880-2000 Trend = 0.55 °C per century", 
     col="purple", cex=1.2) 

#dev.off()