Intro to STAN Homework Part #1

After our Intro to Stan lecture I think it would be valuable to have you go through a similar exercise. Let’s test a second research question.

Research question: Is sea ice extent declining in the Southern Hemisphere over time? Is the same pattern happening in the Antarctic as in the Arctic? Fit a Stan model to find out!

Make sure you follow the steps we used in class.

What do your Stan model results indicate so far?

1. Load and Inspect Data

#place the code here
df<-read.csv("/Users/joanneyang/Documents/HU/505-90-O-2020:Spring-ModelingSimulation&Game/Assign5_ice/seaice.csv")

summary(df)
##       year       extent_north    extent_south  
##  Min.   :1979   Min.   :10.15   Min.   :10.69  
##  1st Qu.:1988   1st Qu.:10.90   1st Qu.:11.42  
##  Median :1998   Median :11.67   Median :11.67  
##  Mean   :1998   Mean   :11.46   Mean   :11.68  
##  3rd Qu.:2008   3rd Qu.:11.98   3rd Qu.:11.88  
##  Max.   :2017   Max.   :12.45   Max.   :12.78
head(df)
##   year extent_north extent_south
## 1 1979       12.328       11.700
## 2 1980       12.337       11.230
## 3 1981       12.127       11.435
## 4 1982       12.447       11.640
## 5 1983       12.332       11.389
## 6 1984       11.910       11.454

2. Plot the data

#plot data

year <- df$year
extent_south <- df$extent_south

ggplot(df, aes(year, extent_south)) +
   geom_point(aes(color= extent_south)) +
   geom_smooth(method = "lm") +
   coord_cartesian() +
   scale_color_gradient() +
   theme_bw()

3. Run a general linear model using lm()

#write the code

lm <- lm(df$year~df$extent_south)
summary(lm)
## 
## Call:
## lm(formula = df$year ~ df$extent_south)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -19.1971  -6.1934   0.1828   7.0543  29.0404 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     1879.251     50.160  37.465   <2e-16 ***
## df$extent_south   10.166      4.292   2.369   0.0232 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.77 on 37 degrees of freedom
## Multiple R-squared:  0.1317, Adjusted R-squared:  0.1082 
## F-statistic: 5.611 on 1 and 37 DF,  p-value: 0.02318

4. Index the data, re-run the lm(), extract summary statistics and turn the indexed data into a dataframe to pass into Stan

#write the code here

x<- df$year
y<- df$extent_south
N <- length(df$year)

df <- as.data.frame(df)

lm1 <- lm(y ~ x)
summary(lm1)
## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.23372 -0.18142  0.01587  0.18465  0.88814 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -14.199551  10.925576  -1.300   0.2018  
## x             0.012953   0.005468   2.369   0.0232 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3843 on 37 degrees of freedom
## Multiple R-squared:  0.1317, Adjusted R-squared:  0.1082 
## F-statistic: 5.611 on 1 and 37 DF,  p-value: 0.02318
#write the code here

stan_input_data <- list(N=N, x=x, y=y)

5. Write the Stan model

#write the code

 write("// Stan model for simple linear regression

data {
 int < lower = 1 > N; // Sample size
 vector[N] x; // Predictor
 vector[N] y; // Outcome
}

parameters {
 real alpha; // Intercept
 real beta; // Slope (regression coefficients)
 real < lower = 0 > sigma; // Error SD
}

model {
 y ~ normal(alpha + x * beta , sigma);
}

generated quantities {
} // The posterior predictive distribution",

"stan_model1.stan")


stan_model1 <- "stan_model1.stan"

6. Run the Stan model and inspect the results

#code here

fit <- stan(file = stan_model1, data = stan_input_data, warmup = 500, iter = 1000, chains = 4, cores = 2, thin = 1)
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -D_REENTRANT  -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -include stan/math/prim/mat/fun/Eigen.hpp   -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk -I/usr/local/include  -fPIC  -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:16: error: expected ';' after top level declarator
## namespace Eigen {
##                ^
##                ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
##          ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
## Warning: There were 493 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## http://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## http://mc-stan.org/misc/warnings.html#bulk-ess
fit
## Inference for Stan model: stan_model1.
## 4 chains, each with iter=1000; warmup=500; thin=1; 
## post-warmup draws per chain=500, total post-warmup draws=2000.
## 
##         mean se_mean    sd   2.5%    25%    50%   75% 97.5% n_eff Rhat
## alpha -14.18    0.61 11.50 -36.86 -21.82 -14.38 -6.52 10.19   357 1.01
## beta    0.01    0.00  0.01   0.00   0.01   0.01  0.02  0.02   358 1.01
## sigma   0.40    0.00  0.05   0.31   0.36   0.39  0.43  0.49   613 1.00
## lp__   16.27    0.05  1.26  13.00  15.66  16.60 17.18 17.72   607 1.00
## 
## Samples were drawn using NUTS(diag_e) at Tue Mar 24 22:33:56 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

7. Extract the posterior estimates into a list so we can plot them

#code here
posterior <- rstan::extract(fit)
str(posterior)
## List of 4
##  $ alpha: num [1:2000(1d)] -22.79 4.41 -11.97 -10.67 -14.19 ...
##   ..- attr(*, "dimnames")=List of 1
##   .. ..$ iterations: NULL
##  $ beta : num [1:2000(1d)] 0.01722 0.00367 0.01185 0.01118 0.01296 ...
##   ..- attr(*, "dimnames")=List of 1
##   .. ..$ iterations: NULL
##  $ sigma: num [1:2000(1d)] 0.399 0.373 0.316 0.293 0.383 ...
##   ..- attr(*, "dimnames")=List of 1
##   .. ..$ iterations: NULL
##  $ lp__ : num [1:2000(1d)] 17.1 15.8 16.3 14.8 17.7 ...
##   ..- attr(*, "dimnames")=List of 1
##   .. ..$ iterations: NULL

8. Compare your results to our results to “lm”

#code here

plot(y~x, pch = 20)
abline(lm, col=2, lty=2, lw=3)
abline(mean(posterior$alpha), mean(posterior$beta), col=6, lw=2)

9. Plot multiple estimates from the posterior

#code here
plot(y ~ x, pch = 20)
for (i in 1:500) {
  abline(posterior$alpha[i], posterior$beta[i], col = "pink", lty =1)
}
abline(mean(posterior$alpha), mean(posterior$beta), col = 6, lw = 2)

traceplot(fit)