After our Intro to Stan lecture I think it would be valuable to have you go through a similar exercise. Let’s test a second research question.
Research question: Is sea ice extent declining in the Southern Hemisphere over time? Is the same pattern happening in the Antarctic as in the Arctic? Fit a Stan model to find out!
Make sure you follow the steps we used in class.
What do your Stan model results indicate so far?
seaice <- read.csv("C:/Users/yanmi/OneDrive/Desktop/Alice/seaice.csv")
head(seaice)
## year extent_north extent_south
## 1 1979 12.328 11.700
## 2 1980 12.337 11.230
## 3 1981 12.127 11.435
## 4 1982 12.447 11.640
## 5 1983 12.332 11.389
## 6 1984 11.910 11.454
View(seaice)
plot(extent_south ~ year, pch = 20, data = seaice)
library(ggplot2)
plot(extent_south ~ year, pch = 25, data = seaice,
main = "Sea ice extent in the South, over time",
xlab = "Year", ylab = "Sea ice in the south",
col = "blue")
abline(lm(seaice$extent_south ~ seaice$year),
col = "red", lty = 2, lwd = 2)
lm1 <- lm(extent_south ~ year, data = seaice)
summary(lm1)
##
## Call:
## lm(formula = extent_south ~ year, data = seaice)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.23372 -0.18142 0.01587 0.18465 0.88814
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -14.199551 10.925576 -1.300 0.2018
## year 0.012953 0.005468 2.369 0.0232 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3843 on 37 degrees of freedom
## Multiple R-squared: 0.1317, Adjusted R-squared: 0.1082
## F-statistic: 5.611 on 1 and 37 DF, p-value: 0.02318
plot(extent_south ~ year, pch = 20, data = seaice)
abline(lm1, col = 2, lty = 2, lw = 3)
library(datasets)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:gdata':
##
## combine, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
seaice <- seaice %>%
mutate("index"=I(year)-1978) #index, regarding 1978 as 0
lm2 <- lm(extent_south~index, data=seaice)
summary(lm2)
##
## Call:
## lm(formula = extent_south ~ index, data = seaice)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.23372 -0.18142 0.01587 0.18465 0.88814
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.421555 0.125490 91.015 <2e-16 ***
## index 0.012953 0.005468 2.369 0.0232 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3843 on 37 degrees of freedom
## Multiple R-squared: 0.1317, Adjusted R-squared: 0.1082
## F-statistic: 5.611 on 1 and 37 DF, p-value: 0.02318
x <- I(seaice$year - 1978)
y <- seaice$extent_south
N <- length(seaice$year)
lm1 <- lm(y ~ x)
summary(lm1)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.23372 -0.18142 0.01587 0.18465 0.88814
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.421555 0.125490 91.015 <2e-16 ***
## x 0.012953 0.005468 2.369 0.0232 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3843 on 37 degrees of freedom
## Multiple R-squared: 0.1317, Adjusted R-squared: 0.1082
## F-statistic: 5.611 on 1 and 37 DF, p-value: 0.02318
stan_data <- list(N=nrow(seaice), y=seaice$extent_south, x=seaice$index)
write("// Stan model for simple linear regression
data {
int < lower = 1 > N; // Sample size
vector[N] x; // Predictor variable
vector[N] y; // Outcome varaiable
}
parameters {
real alpha; // Declare variable for intercept
real beta; // Declare variable for slope/regression coefficients
real < lower = 0 > sigma; // Error SD
}
model {
y ~ normal(alpha + x * beta , sigma);
}
generated quantities {
} ",
"stanmod.stan")
stanc("stanmod.stan")
## $status
## [1] TRUE
##
## $model_cppname
## [1] "model620815ef3f23_stanmod"
##
## $cppcode
## [1] "// Code generated by Stan version 2.21.0\n\n#include <stan/model/model_header.hpp>\n\nnamespace model620815ef3f23_stanmod_namespace {\n\nusing std::istream;\nusing std::string;\nusing std::stringstream;\nusing std::vector;\nusing stan::io::dump;\nusing stan::math::lgamma;\nusing stan::model::prob_grad;\nusing namespace stan::math;\n\nstatic int current_statement_begin__;\n\nstan::io::program_reader prog_reader__() {\n stan::io::program_reader reader;\n reader.add_event(0, 0, \"start\", \"model620815ef3f23_stanmod\");\n reader.add_event(22, 20, \"end\", \"model620815ef3f23_stanmod\");\n return reader;\n}\n\nclass model620815ef3f23_stanmod\n : public stan::model::model_base_crtp<model620815ef3f23_stanmod> {\nprivate:\n int N;\n vector_d x;\n vector_d y;\npublic:\n model620815ef3f23_stanmod(stan::io::var_context& context__,\n std::ostream* pstream__ = 0)\n : model_base_crtp(0) {\n ctor_body(context__, 0, pstream__);\n }\n\n model620815ef3f23_stanmod(stan::io::var_context& context__,\n unsigned int random_seed__,\n std::ostream* pstream__ = 0)\n : model_base_crtp(0) {\n ctor_body(context__, random_seed__, pstream__);\n }\n\n void ctor_body(stan::io::var_context& context__,\n unsigned int random_seed__,\n std::ostream* pstream__) {\n typedef double local_scalar_t__;\n\n boost::ecuyer1988 base_rng__ =\n stan::services::util::create_rng(random_seed__, 0);\n (void) base_rng__; // suppress unused var warning\n\n current_statement_begin__ = -1;\n\n static const char* function__ = \"model620815ef3f23_stanmod_namespace::model620815ef3f23_stanmod\";\n (void) function__; // dummy to suppress unused var warning\n size_t pos__;\n (void) pos__; // dummy to suppress unused var warning\n std::vector<int> vals_i__;\n std::vector<double> vals_r__;\n local_scalar_t__ DUMMY_VAR__(std::numeric_limits<double>::quiet_NaN());\n (void) DUMMY_VAR__; // suppress unused var warning\n\n try {\n // initialize data block variables from context__\n current_statement_begin__ = 4;\n context__.validate_dims(\"data initialization\", \"N\", \"int\", context__.to_vec());\n N = int(0);\n vals_i__ = context__.vals_i(\"N\");\n pos__ = 0;\n N = vals_i__[pos__++];\n check_greater_or_equal(function__, \"N\", N, 1);\n\n current_statement_begin__ = 5;\n validate_non_negative_index(\"x\", \"N\", N);\n context__.validate_dims(\"data initialization\", \"x\", \"vector_d\", context__.to_vec(N));\n x = Eigen::Matrix<double, Eigen::Dynamic, 1>(N);\n vals_r__ = context__.vals_r(\"x\");\n pos__ = 0;\n size_t x_j_1_max__ = N;\n for (size_t j_1__ = 0; j_1__ < x_j_1_max__; ++j_1__) {\n x(j_1__) = vals_r__[pos__++];\n }\n\n current_statement_begin__ = 6;\n validate_non_negative_index(\"y\", \"N\", N);\n context__.validate_dims(\"data initialization\", \"y\", \"vector_d\", context__.to_vec(N));\n y = Eigen::Matrix<double, Eigen::Dynamic, 1>(N);\n vals_r__ = context__.vals_r(\"y\");\n pos__ = 0;\n size_t y_j_1_max__ = N;\n for (size_t j_1__ = 0; j_1__ < y_j_1_max__; ++j_1__) {\n y(j_1__) = vals_r__[pos__++];\n }\n\n\n // initialize transformed data variables\n // execute transformed data statements\n\n // validate transformed data\n\n // validate, set parameter ranges\n num_params_r__ = 0U;\n param_ranges_i__.clear();\n current_statement_begin__ = 10;\n num_params_r__ += 1;\n current_statement_begin__ = 11;\n num_params_r__ += 1;\n current_statement_begin__ = 12;\n num_params_r__ += 1;\n } catch (const std::exception& e) {\n stan::lang::rethrow_located(e, current_statement_begin__, prog_reader__());\n // Next line prevents compiler griping about no return\n throw std::runtime_error(\"*** IF YOU SEE THIS, PLEASE REPORT A BUG ***\");\n }\n }\n\n ~model620815ef3f23_stanmod() { }\n\n\n void transform_inits(const stan::io::var_context& context__,\n std::vector<int>& params_i__,\n std::vector<double>& params_r__,\n std::ostream* pstream__) const {\n typedef double local_scalar_t__;\n stan::io::writer<double> writer__(params_r__, params_i__);\n size_t pos__;\n (void) pos__; // dummy call to supress warning\n std::vector<double> vals_r__;\n std::vector<int> vals_i__;\n\n current_statement_begin__ = 10;\n if (!(context__.contains_r(\"alpha\")))\n stan::lang::rethrow_located(std::runtime_error(std::string(\"Variable alpha missing\")), current_statement_begin__, prog_reader__());\n vals_r__ = context__.vals_r(\"alpha\");\n pos__ = 0U;\n context__.validate_dims(\"parameter initialization\", \"alpha\", \"double\", context__.to_vec());\n double alpha(0);\n alpha = vals_r__[pos__++];\n try {\n writer__.scalar_unconstrain(alpha);\n } catch (const std::exception& e) {\n stan::lang::rethrow_located(std::runtime_error(std::string(\"Error transforming variable alpha: \") + e.what()), current_statement_begin__, prog_reader__());\n }\n\n current_statement_begin__ = 11;\n if (!(context__.contains_r(\"beta\")))\n stan::lang::rethrow_located(std::runtime_error(std::string(\"Variable beta missing\")), current_statement_begin__, prog_reader__());\n vals_r__ = context__.vals_r(\"beta\");\n pos__ = 0U;\n context__.validate_dims(\"parameter initialization\", \"beta\", \"double\", context__.to_vec());\n double beta(0);\n beta = vals_r__[pos__++];\n try {\n writer__.scalar_unconstrain(beta);\n } catch (const std::exception& e) {\n stan::lang::rethrow_located(std::runtime_error(std::string(\"Error transforming variable beta: \") + e.what()), current_statement_begin__, prog_reader__());\n }\n\n current_statement_begin__ = 12;\n if (!(context__.contains_r(\"sigma\")))\n stan::lang::rethrow_located(std::runtime_error(std::string(\"Variable sigma missing\")), current_statement_begin__, prog_reader__());\n vals_r__ = context__.vals_r(\"sigma\");\n pos__ = 0U;\n context__.validate_dims(\"parameter initialization\", \"sigma\", \"double\", context__.to_vec());\n double sigma(0);\n sigma = vals_r__[pos__++];\n try {\n writer__.scalar_lb_unconstrain(0, sigma);\n } catch (const std::exception& e) {\n stan::lang::rethrow_located(std::runtime_error(std::string(\"Error transforming variable sigma: \") + e.what()), current_statement_begin__, prog_reader__());\n }\n\n params_r__ = writer__.data_r();\n params_i__ = writer__.data_i();\n }\n\n void transform_inits(const stan::io::var_context& context,\n Eigen::Matrix<double, Eigen::Dynamic, 1>& params_r,\n std::ostream* pstream__) const {\n std::vector<double> params_r_vec;\n std::vector<int> params_i_vec;\n transform_inits(context, params_i_vec, params_r_vec, pstream__);\n params_r.resize(params_r_vec.size());\n for (int i = 0; i < params_r.size(); ++i)\n params_r(i) = params_r_vec[i];\n }\n\n\n template <bool propto__, bool jacobian__, typename T__>\n T__ log_prob(std::vector<T__>& params_r__,\n std::vector<int>& params_i__,\n std::ostream* pstream__ = 0) const {\n\n typedef T__ local_scalar_t__;\n\n local_scalar_t__ DUMMY_VAR__(std::numeric_limits<double>::quiet_NaN());\n (void) DUMMY_VAR__; // dummy to suppress unused var warning\n\n T__ lp__(0.0);\n stan::math::accumulator<T__> lp_accum__;\n try {\n stan::io::reader<local_scalar_t__> in__(params_r__, params_i__);\n\n // model parameters\n current_statement_begin__ = 10;\n local_scalar_t__ alpha;\n (void) alpha; // dummy to suppress unused var warning\n if (jacobian__)\n alpha = in__.scalar_constrain(lp__);\n else\n alpha = in__.scalar_constrain();\n\n current_statement_begin__ = 11;\n local_scalar_t__ beta;\n (void) beta; // dummy to suppress unused var warning\n if (jacobian__)\n beta = in__.scalar_constrain(lp__);\n else\n beta = in__.scalar_constrain();\n\n current_statement_begin__ = 12;\n local_scalar_t__ sigma;\n (void) sigma; // dummy to suppress unused var warning\n if (jacobian__)\n sigma = in__.scalar_lb_constrain(0, lp__);\n else\n sigma = in__.scalar_lb_constrain(0);\n\n // model body\n\n current_statement_begin__ = 16;\n lp_accum__.add(normal_log<propto__>(y, add(alpha, multiply(x, beta)), sigma));\n\n } catch (const std::exception& e) {\n stan::lang::rethrow_located(e, current_statement_begin__, prog_reader__());\n // Next line prevents compiler griping about no return\n throw std::runtime_error(\"*** IF YOU SEE THIS, PLEASE REPORT A BUG ***\");\n }\n\n lp_accum__.add(lp__);\n return lp_accum__.sum();\n\n } // log_prob()\n\n template <bool propto, bool jacobian, typename T_>\n T_ log_prob(Eigen::Matrix<T_,Eigen::Dynamic,1>& params_r,\n std::ostream* pstream = 0) const {\n std::vector<T_> vec_params_r;\n vec_params_r.reserve(params_r.size());\n for (int i = 0; i < params_r.size(); ++i)\n vec_params_r.push_back(params_r(i));\n std::vector<int> vec_params_i;\n return log_prob<propto,jacobian,T_>(vec_params_r, vec_params_i, pstream);\n }\n\n\n void get_param_names(std::vector<std::string>& names__) const {\n names__.resize(0);\n names__.push_back(\"alpha\");\n names__.push_back(\"beta\");\n names__.push_back(\"sigma\");\n }\n\n\n void get_dims(std::vector<std::vector<size_t> >& dimss__) const {\n dimss__.resize(0);\n std::vector<size_t> dims__;\n dims__.resize(0);\n dimss__.push_back(dims__);\n dims__.resize(0);\n dimss__.push_back(dims__);\n dims__.resize(0);\n dimss__.push_back(dims__);\n }\n\n template <typename RNG>\n void write_array(RNG& base_rng__,\n std::vector<double>& params_r__,\n std::vector<int>& params_i__,\n std::vector<double>& vars__,\n bool include_tparams__ = true,\n bool include_gqs__ = true,\n std::ostream* pstream__ = 0) const {\n typedef double local_scalar_t__;\n\n vars__.resize(0);\n stan::io::reader<local_scalar_t__> in__(params_r__, params_i__);\n static const char* function__ = \"model620815ef3f23_stanmod_namespace::write_array\";\n (void) function__; // dummy to suppress unused var warning\n\n // read-transform, write parameters\n double alpha = in__.scalar_constrain();\n vars__.push_back(alpha);\n\n double beta = in__.scalar_constrain();\n vars__.push_back(beta);\n\n double sigma = in__.scalar_lb_constrain(0);\n vars__.push_back(sigma);\n\n double lp__ = 0.0;\n (void) lp__; // dummy to suppress unused var warning\n stan::math::accumulator<double> lp_accum__;\n\n local_scalar_t__ DUMMY_VAR__(std::numeric_limits<double>::quiet_NaN());\n (void) DUMMY_VAR__; // suppress unused var warning\n\n if (!include_tparams__ && !include_gqs__) return;\n\n try {\n if (!include_gqs__ && !include_tparams__) return;\n if (!include_gqs__) return;\n } catch (const std::exception& e) {\n stan::lang::rethrow_located(e, current_statement_begin__, prog_reader__());\n // Next line prevents compiler griping about no return\n throw std::runtime_error(\"*** IF YOU SEE THIS, PLEASE REPORT A BUG ***\");\n }\n }\n\n template <typename RNG>\n void write_array(RNG& base_rng,\n Eigen::Matrix<double,Eigen::Dynamic,1>& params_r,\n Eigen::Matrix<double,Eigen::Dynamic,1>& vars,\n bool include_tparams = true,\n bool include_gqs = true,\n std::ostream* pstream = 0) const {\n std::vector<double> params_r_vec(params_r.size());\n for (int i = 0; i < params_r.size(); ++i)\n params_r_vec[i] = params_r(i);\n std::vector<double> vars_vec;\n std::vector<int> params_i_vec;\n write_array(base_rng, params_r_vec, params_i_vec, vars_vec, include_tparams, include_gqs, pstream);\n vars.resize(vars_vec.size());\n for (int i = 0; i < vars.size(); ++i)\n vars(i) = vars_vec[i];\n }\n\n std::string model_name() const {\n return \"model620815ef3f23_stanmod\";\n }\n\n\n void constrained_param_names(std::vector<std::string>& param_names__,\n bool include_tparams__ = true,\n bool include_gqs__ = true) const {\n std::stringstream param_name_stream__;\n param_name_stream__.str(std::string());\n param_name_stream__ << \"alpha\";\n param_names__.push_back(param_name_stream__.str());\n param_name_stream__.str(std::string());\n param_name_stream__ << \"beta\";\n param_names__.push_back(param_name_stream__.str());\n param_name_stream__.str(std::string());\n param_name_stream__ << \"sigma\";\n param_names__.push_back(param_name_stream__.str());\n\n if (!include_gqs__ && !include_tparams__) return;\n\n if (include_tparams__) {\n }\n\n if (!include_gqs__) return;\n }\n\n\n void unconstrained_param_names(std::vector<std::string>& param_names__,\n bool include_tparams__ = true,\n bool include_gqs__ = true) const {\n std::stringstream param_name_stream__;\n param_name_stream__.str(std::string());\n param_name_stream__ << \"alpha\";\n param_names__.push_back(param_name_stream__.str());\n param_name_stream__.str(std::string());\n param_name_stream__ << \"beta\";\n param_names__.push_back(param_name_stream__.str());\n param_name_stream__.str(std::string());\n param_name_stream__ << \"sigma\";\n param_names__.push_back(param_name_stream__.str());\n\n if (!include_gqs__ && !include_tparams__) return;\n\n if (include_tparams__) {\n }\n\n if (!include_gqs__) return;\n }\n\n}; // model\n\n} // namespace\n\ntypedef model620815ef3f23_stanmod_namespace::model620815ef3f23_stanmod stan_model;\n\n#ifndef USING_R\n\nstan::model::model_base& new_model(\n stan::io::var_context& data_context,\n unsigned int seed,\n std::ostream* msg_stream) {\n stan_model* m = new stan_model(data_context, seed, msg_stream);\n return *m;\n}\n\n#endif\n\n"
##
## $model_name
## [1] "stanmod"
##
## $model_code
## [1] "// Stan model for simple linear regression\n\n data {\n int < lower = 1 > N; // Sample size\n vector[N] x; // Predictor variable\n vector[N] y; // Outcome varaiable\n }\n \n parameters {\n real alpha; // Declare variable for intercept\n real beta; // Declare variable for slope/regression coefficients\n real < lower = 0 > sigma; // Error SD\n }\n \n model {\n y ~ normal(alpha + x * beta , sigma);\n }\n \n generated quantities {\n } "
## attr(,"model_name2")
## [1] "stanmod"
stan_model1 <- "stan_model1.stan"
#library(gridExtra)
library(modelr)
##
## Attaching package: 'modelr'
## The following object is masked from 'package:gdata':
##
## resample
p1 <- seaice %>%
add_predictions(lm2,"pred_south") %>%
ggplot(aes(index,extent_south))+
geom_point()+
geom_line(aes(y=pred_south),color="red",size=1)+
labs(x="Index",y="Sea ice extent in the Southern Hemisphere (10^6\ km^2)")+
ggtitle("A general lm fit")
p2 <- seaice %>%
ggplot(aes(index,extent_south))+
labs(x="Index",y="Sea ice extent in the Southern Hemisphere (10^6\ km^2)")+
ggtitle("A Stan fit with non-informative priors")
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:gdata':
##
## combine
library(modelr)
p1 <- seaice %>%
add_predictions(lm2,"pred_south") %>%
ggplot(aes(index,extent_south))+
geom_point()+
geom_line(aes(y=pred_south),color="red",size=1)+
labs(x="Index",y="Sea ice extent in the Southern Hemisphere (10^6\ km^2)")+
ggtitle("A general lm fit")
p2 <- seaice %>%
ggplot(aes(index,extent_south))+
labs(x="Index",y="Sea ice extent in the Southern Hemisphere (10^6\ km^2)")+
ggtitle("A Stan fit with non-informative priors")
gridExtra::grid.arrange(p1,p2, nrow=1, bottom="Models comparison")
```