Intro to rstan

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

data_seaice <- fread("C:/Users/HPi/OneDrive - HP Inc/Pradosh/Personal/Harrisburg/Course 505/seaice.csv")
head(data_seaice)

##    year extent_north extent_south
## 1: 1979       12.328       11.700
## 2: 1980       12.337       11.230
## 3: 1981       12.127       11.435
## 4: 1982       12.447       11.640
## 5: 1983       12.332       11.389
## 6: 1984       11.910       11.454

#Let's create a graph to understand whether the sea ice extent is declining in Southern Hemisphere
plot(extent_north ~ year, pch = 20, data = data_seaice)

#Running a general linear model using lm()
lm_1 <- lm(extent_north ~ year, data = data_seaice)
lm_1

## 
## Call:
## lm(formula = extent_north ~ year, data = data_seaice)
## 
## Coefficients:
## (Intercept)         year  
##   120.50304     -0.05457

#Preparing the data, rerunning lm(), and extracting summary statistics
summary(lm_1)

## 
## Call:
## lm(formula = extent_north ~ year, data = data_seaice)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.49925 -0.17713  0.04898  0.16923  0.32829 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 120.503036   6.267203   19.23   <2e-16 ***
## year         -0.054574   0.003137  -17.40   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2205 on 37 degrees of freedom
## Multiple R-squared:  0.8911, Adjusted R-squared:  0.8881 
## F-statistic: 302.7 on 1 and 37 DF,  p-value: < 2.2e-16

plot(extent_north ~ year, pch = 20, data = data_seaice)
abline(lm_1, col = 2, lty = 2, lw = 3)

If we try renaming the variables and index years from say 1 to 39 as we aren’t interested in knowing what happened from year 1979 to 2017. SO let’s focus on the years thereafter or the information which is more relevant to our dataset. So let’s index the year to zero. So let’s set it up from 1 to 30.

x <- I(data_seaice$year - 1978)
y <- data_seaice$extent_north
N <- length(data_seaice$year)

#Rerun linear model wit hnew data

lm_1 <- lm(y ~ x)
summary(lm_1)

## 
## Call:
## lm(formula = y ~ x)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.49925 -0.17713  0.04898  0.16923  0.32829 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 12.555888   0.071985   174.4   <2e-16 ***
## x           -0.054574   0.003137   -17.4   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2205 on 37 degrees of freedom
## Multiple R-squared:  0.8911, Adjusted R-squared:  0.8881 
## F-statistic: 302.7 on 1 and 37 DF,  p-value: < 2.2e-16

lm_alpha <- summary(lm_1)$coeff[1]#intersect
lm_beta <- summary(lm_1)$coeff[2]#slope
lm_sigma <- sigma(lm_1)#residual error

stan_data <- list(N = N, x = x, y = y)

Write the Stan model

## $status
## [1] TRUE
## 
## $model_cppname
## [1] "modelf545553971_stan_model1"
## 
## $cppcode
## [1] "// Code generated by Stan version 2.19.1\n\n#include <stan/model/model_header.hpp>\n\nnamespace modelf545553971_stan_model1_namespace {\n\nusing std::istream;\nusing std::string;\nusing std::stringstream;\nusing std::vector;\nusing stan::io::dump;\nusing stan::math::lgamma;\nusing stan::model::prob_grad;\nusing namespace stan::math;\n\nstatic int current_statement_begin__;\n\nstan::io::program_reader prog_reader__() {\n    stan::io::program_reader reader;\n    reader.add_event(0, 0, \"start\", \"modelf545553971_stan_model1\");\n    reader.add_event(22, 20, \"end\", \"modelf545553971_stan_model1\");\n    return reader;\n}\n\nclass modelf545553971_stan_model1 : public prob_grad {\nprivate:\n        int N;\n        vector_d x;\n        vector_d y;\npublic:\n    modelf545553971_stan_model1(stan::io::var_context& context__,\n        std::ostream* pstream__ = 0)\n        : prob_grad(0) {\n        ctor_body(context__, 0, pstream__);\n    }\n\n    modelf545553971_stan_model1(stan::io::var_context& context__,\n        unsigned int random_seed__,\n        std::ostream* pstream__ = 0)\n        : prob_grad(0) {\n        ctor_body(context__, random_seed__, pstream__);\n    }\n\n    void ctor_body(stan::io::var_context& context__,\n                   unsigned int random_seed__,\n                   std::ostream* pstream__) {\n        typedef double local_scalar_t__;\n\n        boost::ecuyer1988 base_rng__ =\n          stan::services::util::create_rng(random_seed__, 0);\n        (void) base_rng__;  // suppress unused var warning\n\n        current_statement_begin__ = -1;\n\n        static const char* function__ = \"modelf545553971_stan_model1_namespace::modelf545553971_stan_model1\";\n        (void) function__;  // dummy to suppress unused var warning\n        size_t pos__;\n        (void) pos__;  // dummy to suppress unused var warning\n        std::vector<int> vals_i__;\n        std::vector<double> vals_r__;\n        local_scalar_t__ DUMMY_VAR__(std::numeric_limits<double>::quiet_NaN());\n        (void) DUMMY_VAR__;  // suppress unused var warning\n\n        try {\n            // initialize data block variables from context__\n            current_statement_begin__ = 4;\n            context__.validate_dims(\"data initialization\", \"N\", \"int\", context__.to_vec());\n            N = int(0);\n            vals_i__ = context__.vals_i(\"N\");\n            pos__ = 0;\n            N = vals_i__[pos__++];\n            check_greater_or_equal(function__, \"N\", N, 1);\n\n            current_statement_begin__ = 5;\n            validate_non_negative_index(\"x\", \"N\", N);\n            context__.validate_dims(\"data initialization\", \"x\", \"vector_d\", context__.to_vec(N));\n            x = Eigen::Matrix<double, Eigen::Dynamic, 1>(N);\n            vals_r__ = context__.vals_r(\"x\");\n            pos__ = 0;\n            size_t x_j_1_max__ = N;\n            for (size_t j_1__ = 0; j_1__ < x_j_1_max__; ++j_1__) {\n                x(j_1__) = vals_r__[pos__++];\n            }\n\n            current_statement_begin__ = 6;\n            validate_non_negative_index(\"y\", \"N\", N);\n            context__.validate_dims(\"data initialization\", \"y\", \"vector_d\", context__.to_vec(N));\n            y = Eigen::Matrix<double, Eigen::Dynamic, 1>(N);\n            vals_r__ = context__.vals_r(\"y\");\n            pos__ = 0;\n            size_t y_j_1_max__ = N;\n            for (size_t j_1__ = 0; j_1__ < y_j_1_max__; ++j_1__) {\n                y(j_1__) = vals_r__[pos__++];\n            }\n\n\n            // initialize transformed data variables\n            // execute transformed data statements\n\n            // validate transformed data\n\n            // validate, set parameter ranges\n            num_params_r__ = 0U;\n            param_ranges_i__.clear();\n            current_statement_begin__ = 10;\n            num_params_r__ += 1;\n            current_statement_begin__ = 11;\n            num_params_r__ += 1;\n            current_statement_begin__ = 12;\n            num_params_r__ += 1;\n        } catch (const std::exception& e) {\n            stan::lang::rethrow_located(e, current_statement_begin__, prog_reader__());\n            // Next line prevents compiler griping about no return\n            throw std::runtime_error(\"*** IF YOU SEE THIS, PLEASE REPORT A BUG ***\");\n        }\n    }\n\n    ~modelf545553971_stan_model1() { }\n\n\n    void transform_inits(const stan::io::var_context& context__,\n                         std::vector<int>& params_i__,\n                         std::vector<double>& params_r__,\n                         std::ostream* pstream__) const {\n        typedef double local_scalar_t__;\n        stan::io::writer<double> writer__(params_r__, params_i__);\n        size_t pos__;\n        (void) pos__; // dummy call to supress warning\n        std::vector<double> vals_r__;\n        std::vector<int> vals_i__;\n\n        current_statement_begin__ = 10;\n        if (!(context__.contains_r(\"alpha\")))\n            stan::lang::rethrow_located(std::runtime_error(std::string(\"Variable alpha missing\")), current_statement_begin__, prog_reader__());\n        vals_r__ = context__.vals_r(\"alpha\");\n        pos__ = 0U;\n        context__.validate_dims(\"parameter initialization\", \"alpha\", \"double\", context__.to_vec());\n        double alpha(0);\n        alpha = vals_r__[pos__++];\n        try {\n            writer__.scalar_unconstrain(alpha);\n        } catch (const std::exception& e) {\n            stan::lang::rethrow_located(std::runtime_error(std::string(\"Error transforming variable alpha: \") + e.what()), current_statement_begin__, prog_reader__());\n        }\n\n        current_statement_begin__ = 11;\n        if (!(context__.contains_r(\"beta\")))\n            stan::lang::rethrow_located(std::runtime_error(std::string(\"Variable beta missing\")), current_statement_begin__, prog_reader__());\n        vals_r__ = context__.vals_r(\"beta\");\n        pos__ = 0U;\n        context__.validate_dims(\"parameter initialization\", \"beta\", \"double\", context__.to_vec());\n        double beta(0);\n        beta = vals_r__[pos__++];\n        try {\n            writer__.scalar_unconstrain(beta);\n        } catch (const std::exception& e) {\n            stan::lang::rethrow_located(std::runtime_error(std::string(\"Error transforming variable beta: \") + e.what()), current_statement_begin__, prog_reader__());\n        }\n\n        current_statement_begin__ = 12;\n        if (!(context__.contains_r(\"sigma\")))\n            stan::lang::rethrow_located(std::runtime_error(std::string(\"Variable sigma missing\")), current_statement_begin__, prog_reader__());\n        vals_r__ = context__.vals_r(\"sigma\");\n        pos__ = 0U;\n        context__.validate_dims(\"parameter initialization\", \"sigma\", \"double\", context__.to_vec());\n        double sigma(0);\n        sigma = vals_r__[pos__++];\n        try {\n            writer__.scalar_lb_unconstrain(0, sigma);\n        } catch (const std::exception& e) {\n            stan::lang::rethrow_located(std::runtime_error(std::string(\"Error transforming variable sigma: \") + e.what()), current_statement_begin__, prog_reader__());\n        }\n\n        params_r__ = writer__.data_r();\n        params_i__ = writer__.data_i();\n    }\n\n    void transform_inits(const stan::io::var_context& context,\n                         Eigen::Matrix<double, Eigen::Dynamic, 1>& params_r,\n                         std::ostream* pstream__) const {\n      std::vector<double> params_r_vec;\n      std::vector<int> params_i_vec;\n      transform_inits(context, params_i_vec, params_r_vec, pstream__);\n      params_r.resize(params_r_vec.size());\n      for (int i = 0; i < params_r.size(); ++i)\n        params_r(i) = params_r_vec[i];\n    }\n\n\n    template <bool propto__, bool jacobian__, typename T__>\n    T__ log_prob(std::vector<T__>& params_r__,\n                 std::vector<int>& params_i__,\n                 std::ostream* pstream__ = 0) const {\n\n        typedef T__ local_scalar_t__;\n\n        local_scalar_t__ DUMMY_VAR__(std::numeric_limits<double>::quiet_NaN());\n        (void) DUMMY_VAR__;  // dummy to suppress unused var warning\n\n        T__ lp__(0.0);\n        stan::math::accumulator<T__> lp_accum__;\n        try {\n            stan::io::reader<local_scalar_t__> in__(params_r__, params_i__);\n\n            // model parameters\n            current_statement_begin__ = 10;\n            local_scalar_t__ alpha;\n            (void) alpha;  // dummy to suppress unused var warning\n            if (jacobian__)\n                alpha = in__.scalar_constrain(lp__);\n            else\n                alpha = in__.scalar_constrain();\n\n            current_statement_begin__ = 11;\n            local_scalar_t__ beta;\n            (void) beta;  // dummy to suppress unused var warning\n            if (jacobian__)\n                beta = in__.scalar_constrain(lp__);\n            else\n                beta = in__.scalar_constrain();\n\n            current_statement_begin__ = 12;\n            local_scalar_t__ sigma;\n            (void) sigma;  // dummy to suppress unused var warning\n            if (jacobian__)\n                sigma = in__.scalar_lb_constrain(0, lp__);\n            else\n                sigma = in__.scalar_lb_constrain(0);\n\n            // model body\n\n            current_statement_begin__ = 16;\n            lp_accum__.add(normal_log<propto__>(y, add(alpha, multiply(x, beta)), sigma));\n\n        } catch (const std::exception& e) {\n            stan::lang::rethrow_located(e, current_statement_begin__, prog_reader__());\n            // Next line prevents compiler griping about no return\n            throw std::runtime_error(\"*** IF YOU SEE THIS, PLEASE REPORT A BUG ***\");\n        }\n\n        lp_accum__.add(lp__);\n        return lp_accum__.sum();\n\n    } // log_prob()\n\n    template <bool propto, bool jacobian, typename T_>\n    T_ log_prob(Eigen::Matrix<T_,Eigen::Dynamic,1>& params_r,\n               std::ostream* pstream = 0) const {\n      std::vector<T_> vec_params_r;\n      vec_params_r.reserve(params_r.size());\n      for (int i = 0; i < params_r.size(); ++i)\n        vec_params_r.push_back(params_r(i));\n      std::vector<int> vec_params_i;\n      return log_prob<propto,jacobian,T_>(vec_params_r, vec_params_i, pstream);\n    }\n\n\n    void get_param_names(std::vector<std::string>& names__) const {\n        names__.resize(0);\n        names__.push_back(\"alpha\");\n        names__.push_back(\"beta\");\n        names__.push_back(\"sigma\");\n    }\n\n\n    void get_dims(std::vector<std::vector<size_t> >& dimss__) const {\n        dimss__.resize(0);\n        std::vector<size_t> dims__;\n        dims__.resize(0);\n        dimss__.push_back(dims__);\n        dims__.resize(0);\n        dimss__.push_back(dims__);\n        dims__.resize(0);\n        dimss__.push_back(dims__);\n    }\n\n    template <typename RNG>\n    void write_array(RNG& base_rng__,\n                     std::vector<double>& params_r__,\n                     std::vector<int>& params_i__,\n                     std::vector<double>& vars__,\n                     bool include_tparams__ = true,\n                     bool include_gqs__ = true,\n                     std::ostream* pstream__ = 0) const {\n        typedef double local_scalar_t__;\n\n        vars__.resize(0);\n        stan::io::reader<local_scalar_t__> in__(params_r__, params_i__);\n        static const char* function__ = \"modelf545553971_stan_model1_namespace::write_array\";\n        (void) function__;  // dummy to suppress unused var warning\n\n        // read-transform, write parameters\n        double alpha = in__.scalar_constrain();\n        vars__.push_back(alpha);\n\n        double beta = in__.scalar_constrain();\n        vars__.push_back(beta);\n\n        double sigma = in__.scalar_lb_constrain(0);\n        vars__.push_back(sigma);\n\n        double lp__ = 0.0;\n        (void) lp__;  // dummy to suppress unused var warning\n        stan::math::accumulator<double> lp_accum__;\n\n        local_scalar_t__ DUMMY_VAR__(std::numeric_limits<double>::quiet_NaN());\n        (void) DUMMY_VAR__;  // suppress unused var warning\n\n        if (!include_tparams__ && !include_gqs__) return;\n\n        try {\n            if (!include_gqs__ && !include_tparams__) return;\n            if (!include_gqs__) return;\n        } catch (const std::exception& e) {\n            stan::lang::rethrow_located(e, current_statement_begin__, prog_reader__());\n            // Next line prevents compiler griping about no return\n            throw std::runtime_error(\"*** IF YOU SEE THIS, PLEASE REPORT A BUG ***\");\n        }\n    }\n\n    template <typename RNG>\n    void write_array(RNG& base_rng,\n                     Eigen::Matrix<double,Eigen::Dynamic,1>& params_r,\n                     Eigen::Matrix<double,Eigen::Dynamic,1>& vars,\n                     bool include_tparams = true,\n                     bool include_gqs = true,\n                     std::ostream* pstream = 0) const {\n      std::vector<double> params_r_vec(params_r.size());\n      for (int i = 0; i < params_r.size(); ++i)\n        params_r_vec[i] = params_r(i);\n      std::vector<double> vars_vec;\n      std::vector<int> params_i_vec;\n      write_array(base_rng, params_r_vec, params_i_vec, vars_vec, include_tparams, include_gqs, pstream);\n      vars.resize(vars_vec.size());\n      for (int i = 0; i < vars.size(); ++i)\n        vars(i) = vars_vec[i];\n    }\n\n    static std::string model_name() {\n        return \"modelf545553971_stan_model1\";\n    }\n\n\n    void constrained_param_names(std::vector<std::string>& param_names__,\n                                 bool include_tparams__ = true,\n                                 bool include_gqs__ = true) const {\n        std::stringstream param_name_stream__;\n        param_name_stream__.str(std::string());\n        param_name_stream__ << \"alpha\";\n        param_names__.push_back(param_name_stream__.str());\n        param_name_stream__.str(std::string());\n        param_name_stream__ << \"beta\";\n        param_names__.push_back(param_name_stream__.str());\n        param_name_stream__.str(std::string());\n        param_name_stream__ << \"sigma\";\n        param_names__.push_back(param_name_stream__.str());\n\n        if (!include_gqs__ && !include_tparams__) return;\n\n        if (include_tparams__) {\n        }\n\n        if (!include_gqs__) return;\n    }\n\n\n    void unconstrained_param_names(std::vector<std::string>& param_names__,\n                                   bool include_tparams__ = true,\n                                   bool include_gqs__ = true) const {\n        std::stringstream param_name_stream__;\n        param_name_stream__.str(std::string());\n        param_name_stream__ << \"alpha\";\n        param_names__.push_back(param_name_stream__.str());\n        param_name_stream__.str(std::string());\n        param_name_stream__ << \"beta\";\n        param_names__.push_back(param_name_stream__.str());\n        param_name_stream__.str(std::string());\n        param_name_stream__ << \"sigma\";\n        param_names__.push_back(param_name_stream__.str());\n\n        if (!include_gqs__ && !include_tparams__) return;\n\n        if (include_tparams__) {\n        }\n\n        if (!include_gqs__) return;\n    }\n\n}; // model\n\n}  // namespace\n\ntypedef modelf545553971_stan_model1_namespace::modelf545553971_stan_model1 stan_model;\n\n"
## 
## $model_name
## [1] "stan_model1"
## 
## $model_code
## [1] "// Stan model for simple linear regression\n  \n  data {\n   int < lower = 1 > N; // Sample size\n   vector[N] x; // Predictor\n   vector[N] y; // Outcome\n  }\n  \n  parameters {\n   real alpha; // Intercept\n   real beta; // Slope (regression coefficients)\n   real < lower = 0 > sigma; // Error SD\n  }\n  \n  model {\n   y ~ normal(alpha + x * beta , sigma);\n  }\n  \n  generated quantities {\n  } // The posterior predictive distribution"
## attr(,"model_name2")
## [1] "stan_model1"

##Running the stan model

  fit <- stan(file = stan_model1, data = stan_data, warmup = 500, iter = 1000, chains = 4, cores = 2, thin = 1)

fit

## Inference for Stan model: stan_model1.
## 4 chains, each with iter=1000; warmup=500; thin=1; 
## post-warmup draws per chain=500, total post-warmup draws=2000.
## 
##        mean se_mean   sd  2.5%   25%   50%   75% 97.5% n_eff Rhat
## alpha 12.56    0.00 0.08 12.40 12.51 12.56 12.61 12.70   729 1.00
## beta  -0.05    0.00 0.00 -0.06 -0.06 -0.05 -0.05 -0.05   789 1.00
## sigma  0.23    0.00 0.03  0.18  0.21  0.23  0.25  0.29   900 1.00
## lp__  37.35    0.05 1.30 33.98 36.76 37.68 38.31 38.85   613 1.02
## 
## Samples were drawn using NUTS(diag_e) at Mon Oct 14 21:28:43 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

#By looking at the Rhat valuses, we can assess model convergence. The chains converge when the values are close to or at 1. Let's look at full posterior of our parameters by extracting them from model object. 

posterior <- extract(fit)
str(posterior)

## List of 4
##  $ alpha: num [1:2000(1d)] 12.6 12.6 12.5 12.6 12.7 ...
##   ..- attr(*, "dimnames")=List of 1
##   .. ..$ iterations: NULL
##  $ beta : num [1:2000(1d)] -0.0539 -0.0546 -0.055 -0.0551 -0.0621 ...
##   ..- attr(*, "dimnames")=List of 1
##   .. ..$ iterations: NULL
##  $ sigma: num [1:2000(1d)] 0.207 0.204 0.224 0.191 0.264 ...
##   ..- attr(*, "dimnames")=List of 1
##   .. ..$ iterations: NULL
##  $ lp__ : num [1:2000(1d)] 38.6 38 38.7 37.7 35.1 ...
##   ..- attr(*, "dimnames")=List of 1
##   .. ..$ iterations: NULL

#Comparing our results to lm.

#Since we are using simple model, we put non-informative priors on our parameters. One way of visualizing this variability is estimation of regression line is plotting multiple estimates from posterior. 

plot(y ~ x, pch = 20)

abline(lm_1, col = 2, lty = 2, lw = 3)
abline( mean(posterior$alpha), mean(posterior$beta), col = 6, lw = 2)

plot(y ~ x, pch = 20)

for (i in 1:500) {
 abline(posterior$alpha[i], posterior$beta[i], col = "gray", lty = 1)
}

abline(mean(posterior$alpha), mean(posterior$beta), col = 6, lw = 2)

write("// Stan model for simple linear regression

data {
 int < lower = 1 > N; // Sample size
 vector[N] x; // Predictor
 vector[N] y; // Outcome
}

parameters {
 real alpha; // Intercept
 real beta; // Slope (regression coefficients)
 real < lower = 0 > sigma; // Error SD
}

model {
 alpha ~ normal(10, 0.1);
 beta ~ normal(1, 0.1);
 y ~ normal(alpha + x * beta , sigma);
}

generated quantities {}",

"stan_model2.stan")

stan_model2 <- "stan_model2.stan"

for (i in 1:500) {
  abline(posterior$alpha[i], posterior$beta[i], col="gray", lty=1)
}

fit2 <- stan(stan_model2, data = stan_data, warmup = 500, iter = 1000, chains = 4, cores = 2, thin = 1)

posterior2 <- extract(fit2)


plot(y ~ x, pch = 20)
abline(lm_alpha, lm_beta, col = 4, lty = 2, lw = 2)
abline(mean(posterior2$alpha), mean(posterior2$beta), col = 3, lw = 2)
abline(mean(posterior$alpha), mean(posterior$beta), col = 36, lw = 3)

#Convergence diagnostic

#We can directly view the traceplots from the posterior
plot(posterior$alpha, type = "l")

plot(posterior$beta, type = "l")

plot(posterior$sigma, type = "l")

#For poor convergence, let's try running the model for 50 iterations and check trace plots

fit_bad <- stan(stan_model1, data = stan_data, warmup = 25, iter = 50, chains = 4, cores = 2, thin = 1)

## Warning: There were 8 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup

## Warning: There were 4 chains where the estimated Bayesian Fraction of Missing Information was low. See
## http://mc-stan.org/misc/warnings.html#bfmi-low

## Warning: Examine the pairs() plot to diagnose sampling problems

## Warning: The largest R-hat is 3.31, indicating chains have not mixed.
## Running the chains for more iterations may help. See
## http://mc-stan.org/misc/warnings.html#r-hat

## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## http://mc-stan.org/misc/warnings.html#bulk-ess

## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## http://mc-stan.org/misc/warnings.html#tail-ess

posterior_bad <- extract(fit_bad)
plot(posterior_bad$alpha, type = "l")

plot(posterior_bad$beta, type = "l")

plot(posterior_bad$sigma, type = "l")

#Plotting the non-Bayesian linear model to ensure our model is running as expected

par(mfrow = c(1,3))

plot(density(posterior$alpha), main = "Alpha")
abline(v = lm_alpha, col = 4, lty = 2)

plot(density(posterior$beta), main = "Beta")
abline(v = lm_beta, col = 4, lty = 2)

plot(density(posterior$sigma), main = "Sigma")
abline(v = lm_sigma, col = 4, lty = 2)

#Calculating the probability of any parameter being over or under certain value of interest. 

sum(posterior$beta>0)/length(posterior$beta)

## [1] 0

sum(posterior$beta>0.2)/length(posterior$beta)

## [1] 0

traceplot(fit)

#Densities and histograms
stan_dens(fit)

stan_hist(fit)

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Indicate mean parameter estimates and credible intervals 
plot(fit, show_density = FALSE, ci_level = 0.5, outer_level = 0.95, fill_color = "salmon")

## ci_level: 0.5 (50% intervals)

## outer_level: 0.95 (95% intervals)

##Posterioir predictive checks

write("// Stan model for simple linear regression

data {
 int < lower = 1 > N; // Sample size
 vector[N] x; // Predictor
 vector[N] y; // Outcome
}

parameters {
 real alpha; // Intercept
 real beta; // Slope (regression coefficients)
 real < lower = 0 > sigma; // Error SD
}

model {
 y ~ normal(x * beta + alpha, sigma);
}

generated quantities {
 real y_rep[N];

 for (n in 1:N) {
 y_rep[n] = normal_rng(x[n] * beta + alpha, sigma);
 }

}",

"stan_model2_GQ.stan")

stan_model2_GQ <- "stan_model2_GQ.stan"

fit3 <- stan(stan_model2_GQ, data = stan_data, iter = 1000, chains = 4, cores = 2, thin = 1)

y_rep <- as.matrix(fit3, pars = "y_rep")
dim(y_rep)

## [1] 2000   39

#Comparing the density of y with over 200 posterior of draws densities of y

ppc_dens_overlay(y, y_rep[1:200, ])

#Comparing estimates of summary statistics
ppc_stat(y = y, yrep = y_rep, stat = "mean")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Prediction per data point vs observed value for each datapoint
ppc_scatter_avg(y = y, yrep = y_rep)

available_ppc()

## bayesplot PPC module:
##   ppc_bars
##   ppc_bars_grouped
##   ppc_boxplot
##   ppc_data
##   ppc_dens
##   ppc_dens_overlay
##   ppc_ecdf_overlay
##   ppc_error_binned
##   ppc_error_hist
##   ppc_error_hist_grouped
##   ppc_error_scatter
##   ppc_error_scatter_avg
##   ppc_error_scatter_avg_vs_x
##   ppc_freqpoly
##   ppc_freqpoly_grouped
##   ppc_hist
##   ppc_intervals
##   ppc_intervals_data
##   ppc_intervals_grouped
##   ppc_loo_intervals
##   ppc_loo_pit
##   ppc_loo_pit_overlay
##   ppc_loo_pit_qq
##   ppc_loo_ribbon
##   ppc_ribbon
##   ppc_ribbon_data
##   ppc_ribbon_grouped
##   ppc_rootogram
##   ppc_scatter
##   ppc_scatter_avg
##   ppc_scatter_avg_grouped
##   ppc_stat
##   ppc_stat_2d
##   ppc_stat_freqpoly_grouped
##   ppc_stat_grouped
##   ppc_violin_grouped

color_scheme_view(c("blue", "gray", "green", "pink", "purple",
 "red","teal","yellow"))

color_scheme_view("mix-blue-red")

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

Intro to rstan_Pradosh

R Markdown

Write the Stan model