After our Intro to Stan lecture I think it would be valuable to have you go through a similar exercise. Let’s test a second research question.
Research question: Is sea ice extent declining in the Southern Hemisphere over time? Is the same pattern happening in the Antarctic as in the Arctic? Fit a Stan model to find out!
Make sure you follow the steps we used in class.
What do your Stan model results indicate so far?
#place the code here
df<-read.csv("/Users/joanneyang/Documents/HU/505-90-O-2020:Spring-ModelingSimulation&Game/Assign5_ice/seaice.csv")
summary(df)
## year extent_north extent_south
## Min. :1979 Min. :10.15 Min. :10.69
## 1st Qu.:1988 1st Qu.:10.90 1st Qu.:11.42
## Median :1998 Median :11.67 Median :11.67
## Mean :1998 Mean :11.46 Mean :11.68
## 3rd Qu.:2008 3rd Qu.:11.98 3rd Qu.:11.88
## Max. :2017 Max. :12.45 Max. :12.78
head(df)
## year extent_north extent_south
## 1 1979 12.328 11.700
## 2 1980 12.337 11.230
## 3 1981 12.127 11.435
## 4 1982 12.447 11.640
## 5 1983 12.332 11.389
## 6 1984 11.910 11.454
#plot data
year <- df$year
extent_south <- df$extent_south
ggplot(df, aes(year, extent_south)) +
geom_point(aes(color= extent_south)) +
geom_smooth(method = "lm") +
coord_cartesian() +
scale_color_gradient() +
theme_bw()
#write the code
lm <- lm(df$year~df$extent_south)
summary(lm)
##
## Call:
## lm(formula = df$year ~ df$extent_south)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.1971 -6.1934 0.1828 7.0543 29.0404
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1879.251 50.160 37.465 <2e-16 ***
## df$extent_south 10.166 4.292 2.369 0.0232 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.77 on 37 degrees of freedom
## Multiple R-squared: 0.1317, Adjusted R-squared: 0.1082
## F-statistic: 5.611 on 1 and 37 DF, p-value: 0.02318
#write the code here
x<- df$year
y<- df$extent_south
N <- length(df$year)
df <- as.data.frame(df)
lm1 <- lm(y ~ x)
summary(lm1)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.23372 -0.18142 0.01587 0.18465 0.88814
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -14.199551 10.925576 -1.300 0.2018
## x 0.012953 0.005468 2.369 0.0232 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3843 on 37 degrees of freedom
## Multiple R-squared: 0.1317, Adjusted R-squared: 0.1082
## F-statistic: 5.611 on 1 and 37 DF, p-value: 0.02318
#write the code here
stan_input_data <- list(N=N, x=x, y=y)
#write the code
write("// Stan model for simple linear regression
data {
int < lower = 1 > N; // Sample size
vector[N] x; // Predictor
vector[N] y; // Outcome
}
parameters {
real alpha; // Intercept
real beta; // Slope (regression coefficients)
real < lower = 0 > sigma; // Error SD
}
model {
y ~ normal(alpha + x * beta , sigma);
}
generated quantities {
} // The posterior predictive distribution",
"stan_model1.stan")
stan_model1 <- "stan_model1.stan"
#code here
fit <- stan(file = stan_model1, data = stan_input_data, warmup = 500, iter = 1000, chains = 4, cores = 2, thin = 1)
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/Rcpp/include/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/unsupported" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/src/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/" -I"/Library/Frameworks/R.framework/Versions/3.6/Resources/library/rstan/include" -DEIGEN_NO_DEBUG -D_REENTRANT -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -include stan/math/prim/mat/fun/Eigen.hpp -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk -I/usr/local/include -fPIC -Wall -g -O2 -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:16: error: expected ';' after top level declarator
## namespace Eigen {
## ^
## ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/3.6/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
## ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
## Warning: There were 493 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
## http://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## http://mc-stan.org/misc/warnings.html#bulk-ess
fit
## Inference for Stan model: stan_model1.
## 4 chains, each with iter=1000; warmup=500; thin=1;
## post-warmup draws per chain=500, total post-warmup draws=2000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## alpha -14.18 0.61 11.50 -36.86 -21.82 -14.38 -6.52 10.19 357 1.01
## beta 0.01 0.00 0.01 0.00 0.01 0.01 0.02 0.02 358 1.01
## sigma 0.40 0.00 0.05 0.31 0.36 0.39 0.43 0.49 613 1.00
## lp__ 16.27 0.05 1.26 13.00 15.66 16.60 17.18 17.72 607 1.00
##
## Samples were drawn using NUTS(diag_e) at Tue Mar 24 22:33:56 2020.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
#code here
posterior <- rstan::extract(fit)
str(posterior)
## List of 4
## $ alpha: num [1:2000(1d)] -22.79 4.41 -11.97 -10.67 -14.19 ...
## ..- attr(*, "dimnames")=List of 1
## .. ..$ iterations: NULL
## $ beta : num [1:2000(1d)] 0.01722 0.00367 0.01185 0.01118 0.01296 ...
## ..- attr(*, "dimnames")=List of 1
## .. ..$ iterations: NULL
## $ sigma: num [1:2000(1d)] 0.399 0.373 0.316 0.293 0.383 ...
## ..- attr(*, "dimnames")=List of 1
## .. ..$ iterations: NULL
## $ lp__ : num [1:2000(1d)] 17.1 15.8 16.3 14.8 17.7 ...
## ..- attr(*, "dimnames")=List of 1
## .. ..$ iterations: NULL
#code here
plot(y~x, pch = 20)
abline(lm, col=2, lty=2, lw=3)
abline(mean(posterior$alpha), mean(posterior$beta), col=6, lw=2)
#code here
plot(y ~ x, pch = 20)
for (i in 1:500) {
abline(posterior$alpha[i], posterior$beta[i], col = "pink", lty =1)
}
abline(mean(posterior$alpha), mean(posterior$beta), col = 6, lw = 2)
traceplot(fit)