Linear Regression Exercise

In Class LA Rain – Linear Regression Exercise

LARain <- read.csv(choose.files(), header = TRUE)

names(LARain)
## [1] "Year"    "APMAM"   "APSAB"   "APSLAKE" "OPBPC"   "OPRC"    "OPSLAKE"
## [8] "BSAAM"

plot(LARain)

plot of chunk unnamed-chunk-1

plot(LARain$BSAAM ~ LARain$APMAM)

plot of chunk unnamed-chunk-1


lm(LARain$BSAAM ~ LARain$APMAM)
## 
## Call:
## lm(formula = LARain$BSAAM ~ LARain$APMAM)
## 
## Coefficients:
##  (Intercept)  LARain$APMAM  
##        63363          1965
a <- lm(LARain$BSAAM ~ LARain$APMAM)
summary(a)
## 
## Call:
## lm(formula = LARain$BSAAM ~ LARain$APMAM)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -37043 -16339  -5457  17158  72467 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     63364       9917    6.39  1.2e-07 ***
## LARain$APMAM     1965       1249    1.57     0.12    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 25100 on 41 degrees of freedom
## Multiple R-squared: 0.0569,  Adjusted R-squared: 0.0339 
## F-statistic: 2.47 on 1 and 41 DF,  p-value: 0.123

b <- lm(LARain$BSAAM ~ LARain$OPBPC)
summary(b)  #r2: .78 #pvalue: 2.996e-15
## 
## Call:
## lm(formula = LARain$BSAAM ~ LARain$OPBPC)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -21183  -7298   -819   4731  38430 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     40017       3589    11.2  5.5e-14 ***
## LARain$OPBPC     2940        241    12.2  3.0e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 12000 on 41 degrees of freedom
## Multiple R-squared: 0.785,   Adjusted R-squared: 0.779 
## F-statistic:  149 on 1 and 41 DF,  p-value: 3e-15

c <- lm(LARain$BSAAM ~ LARain$OPRC)
summary(c)  #r2: .84 #pvalue: <2.2e-16
## 
## Call:
## lm(formula = LARain$BSAAM ~ LARain$OPRC)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -24356  -5514   -522   7448  24854 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    21741       4044    5.38  3.3e-06 ***
## LARain$OPRC     4667        311   14.99  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 10100 on 41 degrees of freedom
## Multiple R-squared: 0.846,   Adjusted R-squared: 0.842 
## F-statistic:  225 on 1 and 41 DF,  p-value: <2e-16

d <- lm(LARain$BSAAM ~ LARain$OPSLAKE)
summary(d)  #r2: .88 #pvalue: <2.2e-16
## 
## Call:
## lm(formula = LARain$BSAAM ~ LARain$OPSLAKE)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -17604  -5338    332   3411  20876 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       27015       3219    8.39  1.9e-10 ***
## LARain$OPSLAKE     3752        216   17.39  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 8920 on 41 degrees of freedom
## Multiple R-squared: 0.881,   Adjusted R-squared: 0.878 
## F-statistic:  303 on 1 and 41 DF,  p-value: <2e-16
confint(d)  #calculating confidence interval
##                2.5 % 97.5 %
## (Intercept)    20514  33515
## LARain$OPSLAKE  3317   4188
anova(d)  # the null hyp of no assoc btwn x's and y's is definitely rejected
## Analysis of Variance Table
## 
## Response: LARain$BSAAM
##                Df   Sum Sq  Mean Sq F value Pr(>F)    
## LARain$OPSLAKE  1 2.41e+10 2.41e+10     303 <2e-16 ***
## Residuals      41 3.26e+09 7.96e+07                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(d, which = 1:2)  #looking at residuals

plot of chunk unnamed-chunk-1 plot of chunk unnamed-chunk-1

#It seems that the OPSLAKE weather station #is the best predictor of runoff due to its #rsquared and p values.

bc <- lm(LARain$BSAAM ~ LARain$OPBPC + LARain$OPRC)
summary(bc)  #r2: .87 #pvalue: <2.2e-16
## 
## Call:
## lm(formula = LARain$BSAAM ~ LARain$OPBPC + LARain$OPRC)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -17377  -6600    -10   6240  18771 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     25353       3802    6.67  5.4e-08 ***
## LARain$OPBPC     1191        365    3.27   0.0022 ** 
## LARain$OPRC      3092        558    5.55  2.1e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 9130 on 40 degrees of freedom
## Multiple R-squared: 0.878,   Adjusted R-squared: 0.872 
## F-statistic:  144 on 2 and 40 DF,  p-value: <2e-16

cd <- lm(LARain$BSAAM ~ LARain$OPRC + LARain$OPSLAKE)
summary(cd)  #r2: .90 #pvalue: <2.2e-16
## 
## Call:
## lm(formula = LARain$BSAAM ~ LARain$OPRC + LARain$OPSLAKE)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -15991  -6485   -498   4700  19946 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       22891       3278    6.98  2.0e-08 ***
## LARain$OPRC        1866        639    2.92   0.0057 ** 
## LARain$OPSLAKE     2401        503    4.77  2.5e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 8200 on 40 degrees of freedom
## Multiple R-squared: 0.902,   Adjusted R-squared: 0.897 
## F-statistic:  183 on 2 and 40 DF,  p-value: <2e-16

bd <- lm(LARain$BSAAM ~ LARain$OPBPC + LARain$OPSLAKE)
summary(bd)  #r2: .87 #pvalue: <2.2e-16
## 
## Call:
## lm(formula = LARain$BSAAM ~ LARain$OPBPC + LARain$OPSLAKE)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -17591  -5277    276   3381  20867 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     27051.0     3540.1    7.64  2.4e-09 ***
## LARain$OPBPC       14.4      546.4    0.03     0.98    
## LARain$OPSLAKE   3736.2      658.2    5.68  1.3e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 9030 on 40 degrees of freedom
## Multiple R-squared: 0.881,   Adjusted R-squared: 0.875 
## F-statistic:  148 on 2 and 40 DF,  p-value: <2e-16

#If only 2 stations open, keep OPRC & OPSLAKE