Exercise from lecture 4

The LA_RAIN.csv dataset contains 43 years worth of precipitation measurements (in inches) taken at six sights in the Owens Valley labeled APMAM (Mammoth Lake), APSAB (Lake Sabrina), APSLAKE (South Lake), OPBPC (Big Pine Creek), OPRC (Rock Creek), and OPSLAKE, and stream runoff volume (measured in acre-?-feet) ata sighne Bish, opCalifornia(labeledBSAAM

Which weather station (or sets of weather stations) is the best predictor of runoff?

## Charge the table
larain <- read.csv("C:/Users/Claudio Alvarez/Desktop/LA_RAIN.csv", header = TRUE)
names(larain)
## [1] "Year"    "APMAM"   "APSAB"   "APSLAKE" "OPBPC"   "OPRC"    "OPSLAKE"
## [8] "BSAAM"

## Plots all data
plot(larain, main = "LA Rainfall and Station Data")

plot of chunk unnamed-chunk-1


## linear regression
lm1 <- lm(BSAAM ~ OPBPC, data = larain)
summary(lm1)  #adjR2 = 0.77
## 
## Call:
## lm(formula = BSAAM ~ OPBPC, data = larain)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -21183  -7298   -819   4731  38430 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    40017       3589    11.2  5.5e-14 ***
## OPBPC           2940        241    12.2  3.0e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 12000 on 41 degrees of freedom
## Multiple R-squared: 0.785,   Adjusted R-squared: 0.779 
## F-statistic:  149 on 1 and 41 DF,  p-value: 3e-15

lm2 <- lm(BSAAM ~ OPRC, data = larain)
summary(lm2)  #adjR2 = 0.84
## 
## Call:
## lm(formula = BSAAM ~ OPRC, data = larain)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -24356  -5514   -522   7448  24854 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    21741       4044    5.38  3.3e-06 ***
## OPRC            4667        311   14.99  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 10100 on 41 degrees of freedom
## Multiple R-squared: 0.846,   Adjusted R-squared: 0.842 
## F-statistic:  225 on 1 and 41 DF,  p-value: <2e-16

lm3 <- lm(BSAAM ~ OPSLAKE, data = larain)
summary(lm3)  # adjr2= 0.87
## 
## Call:
## lm(formula = BSAAM ~ OPSLAKE, data = larain)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -17604  -5338    332   3411  20876 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    27015       3219    8.39  1.9e-10 ***
## OPSLAKE         3752        216   17.39  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 8920 on 41 degrees of freedom
## Multiple R-squared: 0.881,   Adjusted R-squared: 0.878 
## F-statistic:  303 on 1 and 41 DF,  p-value: <2e-16

## Comparison between models anova

anova(lm1, lm2, lm3)
## Analysis of Variance Table
## 
## Model 1: BSAAM ~ OPBPC
## Model 2: BSAAM ~ OPRC
## Model 3: BSAAM ~ OPSLAKE
##   Res.Df      RSS Df Sum of Sq F Pr(>F)
## 1     41 5.89e+09                      
## 2     41 4.22e+09  0  1.67e+09         
## 3     41 3.26e+09  0  9.56e+08
## examining residuals
plot(larain$BSAAM ~ larain$OPSLAKE, main = "LA Rainfall Prediction", xlab = "OPSLake Gage", 
    ylab = "LA Rainfall")

plot of chunk unnamed-chunk-2

abline(coef(opslake))
## Error: object 'opslake' not found

##
plot(opslake$residuals ~ opslake$fitted.values, main = "opslake Residuals", 
    xlab = "Fitted Values", ylab = "Residuals")
## Error: object 'opslake' not found

## quadratic model

lm4 <- lm(BSAAM ~ OPSLAKE + I(OPSLAKE^2), data = larain)
summary(lm4)  # adj. R2 = 0.87
## 
## Call:
## lm(formula = BSAAM ~ OPSLAKE + I(OPSLAKE^2), data = larain)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -18060  -5452    290   3790  20250 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   23800.2     7089.2    3.36   0.0017 ** 
## OPSLAKE        4220.2      942.3    4.48  6.1e-05 ***
## I(OPSLAKE^2)    -14.0       27.4   -0.51   0.6128    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 9000 on 40 degrees of freedom
## Multiple R-squared: 0.881,   Adjusted R-squared: 0.876 
## F-statistic:  149 on 2 and 40 DF,  p-value: <2e-16

## comparison best linear simple model lm3 and quadratic model lm4

anova(lm3, lm4)
## Analysis of Variance Table
## 
## Model 1: BSAAM ~ OPSLAKE
## Model 2: BSAAM ~ OPSLAKE + I(OPSLAKE^2)
##   Res.Df      RSS Df Sum of Sq    F Pr(>F)
## 1     41 3.26e+09                         
## 2     40 3.24e+09  1  21097409 0.26   0.61
## not significant differences

## examining residuals

par(mfrow = c(1, 2))

plot(lm3$residuals ~ lm3$fitted.values, main = "opslake Residuals", xlab = "Fitted Values", 
    ylab = "Residuals")
plot(lm4$residuals ~ lm4$fitted.values, main = "opslake2 Residuals", xlab = "Fitted Values", 
    ylab = "Residuals")

plot of chunk unnamed-chunk-2

there is not difference between both models. Linear regression model is the best to predict rainfall.