This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
data_seaice <- fread("C:/Users/HPi/OneDrive - HP Inc/Pradosh/Personal/Harrisburg/Course 505/seaice.csv")
head(data_seaice)
## year extent_north extent_south
## 1: 1979 12.328 11.700
## 2: 1980 12.337 11.230
## 3: 1981 12.127 11.435
## 4: 1982 12.447 11.640
## 5: 1983 12.332 11.389
## 6: 1984 11.910 11.454
#Let's create a graph to understand whether the sea ice extent is declining in Southern Hemisphere
plot(extent_north ~ year, pch = 20, data = data_seaice)
#Running a general linear model using lm()
lm_1 <- lm(extent_north ~ year, data = data_seaice)
lm_1
##
## Call:
## lm(formula = extent_north ~ year, data = data_seaice)
##
## Coefficients:
## (Intercept) year
## 120.50304 -0.05457
#Preparing the data, rerunning lm(), and extracting summary statistics
summary(lm_1)
##
## Call:
## lm(formula = extent_north ~ year, data = data_seaice)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.49925 -0.17713 0.04898 0.16923 0.32829
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 120.503036 6.267203 19.23 <2e-16 ***
## year -0.054574 0.003137 -17.40 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2205 on 37 degrees of freedom
## Multiple R-squared: 0.8911, Adjusted R-squared: 0.8881
## F-statistic: 302.7 on 1 and 37 DF, p-value: < 2.2e-16
plot(extent_north ~ year, pch = 20, data = data_seaice)
abline(lm_1, col = 2, lty = 2, lw = 3)
If we try renaming the variables and index years from say 1 to 39 as we aren’t interested in knowing what happened from year 1979 to 2017. SO let’s focus on the years thereafter or the information which is more relevant to our dataset. So let’s index the year to zero. So let’s set it up from 1 to 30.
x <- I(data_seaice$year - 1978)
y <- data_seaice$extent_north
N <- length(data_seaice$year)
#Rerun linear model wit hnew data
lm_1 <- lm(y ~ x)
summary(lm_1)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.49925 -0.17713 0.04898 0.16923 0.32829
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.555888 0.071985 174.4 <2e-16 ***
## x -0.054574 0.003137 -17.4 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2205 on 37 degrees of freedom
## Multiple R-squared: 0.8911, Adjusted R-squared: 0.8881
## F-statistic: 302.7 on 1 and 37 DF, p-value: < 2.2e-16
lm_alpha <- summary(lm_1)$coeff[1]#intersect
lm_beta <- summary(lm_1)$coeff[2]#slope
lm_sigma <- sigma(lm_1)#residual error
stan_data <- list(N = N, x = x, y = y)
## $status
## [1] TRUE
##
## $model_cppname
## [1] "modelf545553971_stan_model1"
##
## $cppcode
## [1] "// Code generated by Stan version 2.19.1\n\n#include <stan/model/model_header.hpp>\n\nnamespace modelf545553971_stan_model1_namespace {\n\nusing std::istream;\nusing std::string;\nusing std::stringstream;\nusing std::vector;\nusing stan::io::dump;\nusing stan::math::lgamma;\nusing stan::model::prob_grad;\nusing namespace stan::math;\n\nstatic int current_statement_begin__;\n\nstan::io::program_reader prog_reader__() {\n stan::io::program_reader reader;\n reader.add_event(0, 0, \"start\", \"modelf545553971_stan_model1\");\n reader.add_event(22, 20, \"end\", \"modelf545553971_stan_model1\");\n return reader;\n}\n\nclass modelf545553971_stan_model1 : public prob_grad {\nprivate:\n int N;\n vector_d x;\n vector_d y;\npublic:\n modelf545553971_stan_model1(stan::io::var_context& context__,\n std::ostream* pstream__ = 0)\n : prob_grad(0) {\n ctor_body(context__, 0, pstream__);\n }\n\n modelf545553971_stan_model1(stan::io::var_context& context__,\n unsigned int random_seed__,\n std::ostream* pstream__ = 0)\n : prob_grad(0) {\n ctor_body(context__, random_seed__, pstream__);\n }\n\n void ctor_body(stan::io::var_context& context__,\n unsigned int random_seed__,\n std::ostream* pstream__) {\n typedef double local_scalar_t__;\n\n boost::ecuyer1988 base_rng__ =\n stan::services::util::create_rng(random_seed__, 0);\n (void) base_rng__; // suppress unused var warning\n\n current_statement_begin__ = -1;\n\n static const char* function__ = \"modelf545553971_stan_model1_namespace::modelf545553971_stan_model1\";\n (void) function__; // dummy to suppress unused var warning\n size_t pos__;\n (void) pos__; // dummy to suppress unused var warning\n std::vector<int> vals_i__;\n std::vector<double> vals_r__;\n local_scalar_t__ DUMMY_VAR__(std::numeric_limits<double>::quiet_NaN());\n (void) DUMMY_VAR__; // suppress unused var warning\n\n try {\n // initialize data block variables from context__\n current_statement_begin__ = 4;\n context__.validate_dims(\"data initialization\", \"N\", \"int\", context__.to_vec());\n N = int(0);\n vals_i__ = context__.vals_i(\"N\");\n pos__ = 0;\n N = vals_i__[pos__++];\n check_greater_or_equal(function__, \"N\", N, 1);\n\n current_statement_begin__ = 5;\n validate_non_negative_index(\"x\", \"N\", N);\n context__.validate_dims(\"data initialization\", \"x\", \"vector_d\", context__.to_vec(N));\n x = Eigen::Matrix<double, Eigen::Dynamic, 1>(N);\n vals_r__ = context__.vals_r(\"x\");\n pos__ = 0;\n size_t x_j_1_max__ = N;\n for (size_t j_1__ = 0; j_1__ < x_j_1_max__; ++j_1__) {\n x(j_1__) = vals_r__[pos__++];\n }\n\n current_statement_begin__ = 6;\n validate_non_negative_index(\"y\", \"N\", N);\n context__.validate_dims(\"data initialization\", \"y\", \"vector_d\", context__.to_vec(N));\n y = Eigen::Matrix<double, Eigen::Dynamic, 1>(N);\n vals_r__ = context__.vals_r(\"y\");\n pos__ = 0;\n size_t y_j_1_max__ = N;\n for (size_t j_1__ = 0; j_1__ < y_j_1_max__; ++j_1__) {\n y(j_1__) = vals_r__[pos__++];\n }\n\n\n // initialize transformed data variables\n // execute transformed data statements\n\n // validate transformed data\n\n // validate, set parameter ranges\n num_params_r__ = 0U;\n param_ranges_i__.clear();\n current_statement_begin__ = 10;\n num_params_r__ += 1;\n current_statement_begin__ = 11;\n num_params_r__ += 1;\n current_statement_begin__ = 12;\n num_params_r__ += 1;\n } catch (const std::exception& e) {\n stan::lang::rethrow_located(e, current_statement_begin__, prog_reader__());\n // Next line prevents compiler griping about no return\n throw std::runtime_error(\"*** IF YOU SEE THIS, PLEASE REPORT A BUG ***\");\n }\n }\n\n ~modelf545553971_stan_model1() { }\n\n\n void transform_inits(const stan::io::var_context& context__,\n std::vector<int>& params_i__,\n std::vector<double>& params_r__,\n std::ostream* pstream__) const {\n typedef double local_scalar_t__;\n stan::io::writer<double> writer__(params_r__, params_i__);\n size_t pos__;\n (void) pos__; // dummy call to supress warning\n std::vector<double> vals_r__;\n std::vector<int> vals_i__;\n\n current_statement_begin__ = 10;\n if (!(context__.contains_r(\"alpha\")))\n stan::lang::rethrow_located(std::runtime_error(std::string(\"Variable alpha missing\")), current_statement_begin__, prog_reader__());\n vals_r__ = context__.vals_r(\"alpha\");\n pos__ = 0U;\n context__.validate_dims(\"parameter initialization\", \"alpha\", \"double\", context__.to_vec());\n double alpha(0);\n alpha = vals_r__[pos__++];\n try {\n writer__.scalar_unconstrain(alpha);\n } catch (const std::exception& e) {\n stan::lang::rethrow_located(std::runtime_error(std::string(\"Error transforming variable alpha: \") + e.what()), current_statement_begin__, prog_reader__());\n }\n\n current_statement_begin__ = 11;\n if (!(context__.contains_r(\"beta\")))\n stan::lang::rethrow_located(std::runtime_error(std::string(\"Variable beta missing\")), current_statement_begin__, prog_reader__());\n vals_r__ = context__.vals_r(\"beta\");\n pos__ = 0U;\n context__.validate_dims(\"parameter initialization\", \"beta\", \"double\", context__.to_vec());\n double beta(0);\n beta = vals_r__[pos__++];\n try {\n writer__.scalar_unconstrain(beta);\n } catch (const std::exception& e) {\n stan::lang::rethrow_located(std::runtime_error(std::string(\"Error transforming variable beta: \") + e.what()), current_statement_begin__, prog_reader__());\n }\n\n current_statement_begin__ = 12;\n if (!(context__.contains_r(\"sigma\")))\n stan::lang::rethrow_located(std::runtime_error(std::string(\"Variable sigma missing\")), current_statement_begin__, prog_reader__());\n vals_r__ = context__.vals_r(\"sigma\");\n pos__ = 0U;\n context__.validate_dims(\"parameter initialization\", \"sigma\", \"double\", context__.to_vec());\n double sigma(0);\n sigma = vals_r__[pos__++];\n try {\n writer__.scalar_lb_unconstrain(0, sigma);\n } catch (const std::exception& e) {\n stan::lang::rethrow_located(std::runtime_error(std::string(\"Error transforming variable sigma: \") + e.what()), current_statement_begin__, prog_reader__());\n }\n\n params_r__ = writer__.data_r();\n params_i__ = writer__.data_i();\n }\n\n void transform_inits(const stan::io::var_context& context,\n Eigen::Matrix<double, Eigen::Dynamic, 1>& params_r,\n std::ostream* pstream__) const {\n std::vector<double> params_r_vec;\n std::vector<int> params_i_vec;\n transform_inits(context, params_i_vec, params_r_vec, pstream__);\n params_r.resize(params_r_vec.size());\n for (int i = 0; i < params_r.size(); ++i)\n params_r(i) = params_r_vec[i];\n }\n\n\n template <bool propto__, bool jacobian__, typename T__>\n T__ log_prob(std::vector<T__>& params_r__,\n std::vector<int>& params_i__,\n std::ostream* pstream__ = 0) const {\n\n typedef T__ local_scalar_t__;\n\n local_scalar_t__ DUMMY_VAR__(std::numeric_limits<double>::quiet_NaN());\n (void) DUMMY_VAR__; // dummy to suppress unused var warning\n\n T__ lp__(0.0);\n stan::math::accumulator<T__> lp_accum__;\n try {\n stan::io::reader<local_scalar_t__> in__(params_r__, params_i__);\n\n // model parameters\n current_statement_begin__ = 10;\n local_scalar_t__ alpha;\n (void) alpha; // dummy to suppress unused var warning\n if (jacobian__)\n alpha = in__.scalar_constrain(lp__);\n else\n alpha = in__.scalar_constrain();\n\n current_statement_begin__ = 11;\n local_scalar_t__ beta;\n (void) beta; // dummy to suppress unused var warning\n if (jacobian__)\n beta = in__.scalar_constrain(lp__);\n else\n beta = in__.scalar_constrain();\n\n current_statement_begin__ = 12;\n local_scalar_t__ sigma;\n (void) sigma; // dummy to suppress unused var warning\n if (jacobian__)\n sigma = in__.scalar_lb_constrain(0, lp__);\n else\n sigma = in__.scalar_lb_constrain(0);\n\n // model body\n\n current_statement_begin__ = 16;\n lp_accum__.add(normal_log<propto__>(y, add(alpha, multiply(x, beta)), sigma));\n\n } catch (const std::exception& e) {\n stan::lang::rethrow_located(e, current_statement_begin__, prog_reader__());\n // Next line prevents compiler griping about no return\n throw std::runtime_error(\"*** IF YOU SEE THIS, PLEASE REPORT A BUG ***\");\n }\n\n lp_accum__.add(lp__);\n return lp_accum__.sum();\n\n } // log_prob()\n\n template <bool propto, bool jacobian, typename T_>\n T_ log_prob(Eigen::Matrix<T_,Eigen::Dynamic,1>& params_r,\n std::ostream* pstream = 0) const {\n std::vector<T_> vec_params_r;\n vec_params_r.reserve(params_r.size());\n for (int i = 0; i < params_r.size(); ++i)\n vec_params_r.push_back(params_r(i));\n std::vector<int> vec_params_i;\n return log_prob<propto,jacobian,T_>(vec_params_r, vec_params_i, pstream);\n }\n\n\n void get_param_names(std::vector<std::string>& names__) const {\n names__.resize(0);\n names__.push_back(\"alpha\");\n names__.push_back(\"beta\");\n names__.push_back(\"sigma\");\n }\n\n\n void get_dims(std::vector<std::vector<size_t> >& dimss__) const {\n dimss__.resize(0);\n std::vector<size_t> dims__;\n dims__.resize(0);\n dimss__.push_back(dims__);\n dims__.resize(0);\n dimss__.push_back(dims__);\n dims__.resize(0);\n dimss__.push_back(dims__);\n }\n\n template <typename RNG>\n void write_array(RNG& base_rng__,\n std::vector<double>& params_r__,\n std::vector<int>& params_i__,\n std::vector<double>& vars__,\n bool include_tparams__ = true,\n bool include_gqs__ = true,\n std::ostream* pstream__ = 0) const {\n typedef double local_scalar_t__;\n\n vars__.resize(0);\n stan::io::reader<local_scalar_t__> in__(params_r__, params_i__);\n static const char* function__ = \"modelf545553971_stan_model1_namespace::write_array\";\n (void) function__; // dummy to suppress unused var warning\n\n // read-transform, write parameters\n double alpha = in__.scalar_constrain();\n vars__.push_back(alpha);\n\n double beta = in__.scalar_constrain();\n vars__.push_back(beta);\n\n double sigma = in__.scalar_lb_constrain(0);\n vars__.push_back(sigma);\n\n double lp__ = 0.0;\n (void) lp__; // dummy to suppress unused var warning\n stan::math::accumulator<double> lp_accum__;\n\n local_scalar_t__ DUMMY_VAR__(std::numeric_limits<double>::quiet_NaN());\n (void) DUMMY_VAR__; // suppress unused var warning\n\n if (!include_tparams__ && !include_gqs__) return;\n\n try {\n if (!include_gqs__ && !include_tparams__) return;\n if (!include_gqs__) return;\n } catch (const std::exception& e) {\n stan::lang::rethrow_located(e, current_statement_begin__, prog_reader__());\n // Next line prevents compiler griping about no return\n throw std::runtime_error(\"*** IF YOU SEE THIS, PLEASE REPORT A BUG ***\");\n }\n }\n\n template <typename RNG>\n void write_array(RNG& base_rng,\n Eigen::Matrix<double,Eigen::Dynamic,1>& params_r,\n Eigen::Matrix<double,Eigen::Dynamic,1>& vars,\n bool include_tparams = true,\n bool include_gqs = true,\n std::ostream* pstream = 0) const {\n std::vector<double> params_r_vec(params_r.size());\n for (int i = 0; i < params_r.size(); ++i)\n params_r_vec[i] = params_r(i);\n std::vector<double> vars_vec;\n std::vector<int> params_i_vec;\n write_array(base_rng, params_r_vec, params_i_vec, vars_vec, include_tparams, include_gqs, pstream);\n vars.resize(vars_vec.size());\n for (int i = 0; i < vars.size(); ++i)\n vars(i) = vars_vec[i];\n }\n\n static std::string model_name() {\n return \"modelf545553971_stan_model1\";\n }\n\n\n void constrained_param_names(std::vector<std::string>& param_names__,\n bool include_tparams__ = true,\n bool include_gqs__ = true) const {\n std::stringstream param_name_stream__;\n param_name_stream__.str(std::string());\n param_name_stream__ << \"alpha\";\n param_names__.push_back(param_name_stream__.str());\n param_name_stream__.str(std::string());\n param_name_stream__ << \"beta\";\n param_names__.push_back(param_name_stream__.str());\n param_name_stream__.str(std::string());\n param_name_stream__ << \"sigma\";\n param_names__.push_back(param_name_stream__.str());\n\n if (!include_gqs__ && !include_tparams__) return;\n\n if (include_tparams__) {\n }\n\n if (!include_gqs__) return;\n }\n\n\n void unconstrained_param_names(std::vector<std::string>& param_names__,\n bool include_tparams__ = true,\n bool include_gqs__ = true) const {\n std::stringstream param_name_stream__;\n param_name_stream__.str(std::string());\n param_name_stream__ << \"alpha\";\n param_names__.push_back(param_name_stream__.str());\n param_name_stream__.str(std::string());\n param_name_stream__ << \"beta\";\n param_names__.push_back(param_name_stream__.str());\n param_name_stream__.str(std::string());\n param_name_stream__ << \"sigma\";\n param_names__.push_back(param_name_stream__.str());\n\n if (!include_gqs__ && !include_tparams__) return;\n\n if (include_tparams__) {\n }\n\n if (!include_gqs__) return;\n }\n\n}; // model\n\n} // namespace\n\ntypedef modelf545553971_stan_model1_namespace::modelf545553971_stan_model1 stan_model;\n\n"
##
## $model_name
## [1] "stan_model1"
##
## $model_code
## [1] "// Stan model for simple linear regression\n \n data {\n int < lower = 1 > N; // Sample size\n vector[N] x; // Predictor\n vector[N] y; // Outcome\n }\n \n parameters {\n real alpha; // Intercept\n real beta; // Slope (regression coefficients)\n real < lower = 0 > sigma; // Error SD\n }\n \n model {\n y ~ normal(alpha + x * beta , sigma);\n }\n \n generated quantities {\n } // The posterior predictive distribution"
## attr(,"model_name2")
## [1] "stan_model1"
##Running the stan model
fit <- stan(file = stan_model1, data = stan_data, warmup = 500, iter = 1000, chains = 4, cores = 2, thin = 1)
fit
## Inference for Stan model: stan_model1.
## 4 chains, each with iter=1000; warmup=500; thin=1;
## post-warmup draws per chain=500, total post-warmup draws=2000.
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## alpha 12.56 0.00 0.08 12.40 12.51 12.56 12.61 12.70 729 1.00
## beta -0.05 0.00 0.00 -0.06 -0.06 -0.05 -0.05 -0.05 789 1.00
## sigma 0.23 0.00 0.03 0.18 0.21 0.23 0.25 0.29 900 1.00
## lp__ 37.35 0.05 1.30 33.98 36.76 37.68 38.31 38.85 613 1.02
##
## Samples were drawn using NUTS(diag_e) at Mon Oct 14 21:28:43 2019.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
#By looking at the Rhat valuses, we can assess model convergence. The chains converge when the values are close to or at 1. Let's look at full posterior of our parameters by extracting them from model object.
posterior <- extract(fit)
str(posterior)
## List of 4
## $ alpha: num [1:2000(1d)] 12.6 12.6 12.5 12.6 12.7 ...
## ..- attr(*, "dimnames")=List of 1
## .. ..$ iterations: NULL
## $ beta : num [1:2000(1d)] -0.0539 -0.0546 -0.055 -0.0551 -0.0621 ...
## ..- attr(*, "dimnames")=List of 1
## .. ..$ iterations: NULL
## $ sigma: num [1:2000(1d)] 0.207 0.204 0.224 0.191 0.264 ...
## ..- attr(*, "dimnames")=List of 1
## .. ..$ iterations: NULL
## $ lp__ : num [1:2000(1d)] 38.6 38 38.7 37.7 35.1 ...
## ..- attr(*, "dimnames")=List of 1
## .. ..$ iterations: NULL
#Comparing our results to lm.
#Since we are using simple model, we put non-informative priors on our parameters. One way of visualizing this variability is estimation of regression line is plotting multiple estimates from posterior.
plot(y ~ x, pch = 20)
abline(lm_1, col = 2, lty = 2, lw = 3)
abline( mean(posterior$alpha), mean(posterior$beta), col = 6, lw = 2)
plot(y ~ x, pch = 20)
for (i in 1:500) {
abline(posterior$alpha[i], posterior$beta[i], col = "gray", lty = 1)
}
abline(mean(posterior$alpha), mean(posterior$beta), col = 6, lw = 2)
write("// Stan model for simple linear regression
data {
int < lower = 1 > N; // Sample size
vector[N] x; // Predictor
vector[N] y; // Outcome
}
parameters {
real alpha; // Intercept
real beta; // Slope (regression coefficients)
real < lower = 0 > sigma; // Error SD
}
model {
alpha ~ normal(10, 0.1);
beta ~ normal(1, 0.1);
y ~ normal(alpha + x * beta , sigma);
}
generated quantities {}",
"stan_model2.stan")
stan_model2 <- "stan_model2.stan"
for (i in 1:500) {
abline(posterior$alpha[i], posterior$beta[i], col="gray", lty=1)
}
fit2 <- stan(stan_model2, data = stan_data, warmup = 500, iter = 1000, chains = 4, cores = 2, thin = 1)
posterior2 <- extract(fit2)
plot(y ~ x, pch = 20)
abline(lm_alpha, lm_beta, col = 4, lty = 2, lw = 2)
abline(mean(posterior2$alpha), mean(posterior2$beta), col = 3, lw = 2)
abline(mean(posterior$alpha), mean(posterior$beta), col = 36, lw = 3)
#Convergence diagnostic
#We can directly view the traceplots from the posterior
plot(posterior$alpha, type = "l")
plot(posterior$beta, type = "l")
plot(posterior$sigma, type = "l")
#For poor convergence, let's try running the model for 50 iterations and check trace plots
fit_bad <- stan(stan_model1, data = stan_data, warmup = 25, iter = 50, chains = 4, cores = 2, thin = 1)
## Warning: There were 8 divergent transitions after warmup. Increasing adapt_delta above 0.8 may help. See
## http://mc-stan.org/misc/warnings.html#divergent-transitions-after-warmup
## Warning: There were 4 chains where the estimated Bayesian Fraction of Missing Information was low. See
## http://mc-stan.org/misc/warnings.html#bfmi-low
## Warning: Examine the pairs() plot to diagnose sampling problems
## Warning: The largest R-hat is 3.31, indicating chains have not mixed.
## Running the chains for more iterations may help. See
## http://mc-stan.org/misc/warnings.html#r-hat
## Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
## Running the chains for more iterations may help. See
## http://mc-stan.org/misc/warnings.html#bulk-ess
## Warning: Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
## Running the chains for more iterations may help. See
## http://mc-stan.org/misc/warnings.html#tail-ess
posterior_bad <- extract(fit_bad)
plot(posterior_bad$alpha, type = "l")
plot(posterior_bad$beta, type = "l")
plot(posterior_bad$sigma, type = "l")
#Plotting the non-Bayesian linear model to ensure our model is running as expected
par(mfrow = c(1,3))
plot(density(posterior$alpha), main = "Alpha")
abline(v = lm_alpha, col = 4, lty = 2)
plot(density(posterior$beta), main = "Beta")
abline(v = lm_beta, col = 4, lty = 2)
plot(density(posterior$sigma), main = "Sigma")
abline(v = lm_sigma, col = 4, lty = 2)
#Calculating the probability of any parameter being over or under certain value of interest.
sum(posterior$beta>0)/length(posterior$beta)
## [1] 0
sum(posterior$beta>0.2)/length(posterior$beta)
## [1] 0
traceplot(fit)
#Densities and histograms
stan_dens(fit)
stan_hist(fit)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Indicate mean parameter estimates and credible intervals
plot(fit, show_density = FALSE, ci_level = 0.5, outer_level = 0.95, fill_color = "salmon")
## ci_level: 0.5 (50% intervals)
## outer_level: 0.95 (95% intervals)
##Posterioir predictive checks
write("// Stan model for simple linear regression
data {
int < lower = 1 > N; // Sample size
vector[N] x; // Predictor
vector[N] y; // Outcome
}
parameters {
real alpha; // Intercept
real beta; // Slope (regression coefficients)
real < lower = 0 > sigma; // Error SD
}
model {
y ~ normal(x * beta + alpha, sigma);
}
generated quantities {
real y_rep[N];
for (n in 1:N) {
y_rep[n] = normal_rng(x[n] * beta + alpha, sigma);
}
}",
"stan_model2_GQ.stan")
stan_model2_GQ <- "stan_model2_GQ.stan"
fit3 <- stan(stan_model2_GQ, data = stan_data, iter = 1000, chains = 4, cores = 2, thin = 1)
y_rep <- as.matrix(fit3, pars = "y_rep")
dim(y_rep)
## [1] 2000 39
#Comparing the density of y with over 200 posterior of draws densities of y
ppc_dens_overlay(y, y_rep[1:200, ])
#Comparing estimates of summary statistics
ppc_stat(y = y, yrep = y_rep, stat = "mean")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Prediction per data point vs observed value for each datapoint
ppc_scatter_avg(y = y, yrep = y_rep)
available_ppc()
## bayesplot PPC module:
## ppc_bars
## ppc_bars_grouped
## ppc_boxplot
## ppc_data
## ppc_dens
## ppc_dens_overlay
## ppc_ecdf_overlay
## ppc_error_binned
## ppc_error_hist
## ppc_error_hist_grouped
## ppc_error_scatter
## ppc_error_scatter_avg
## ppc_error_scatter_avg_vs_x
## ppc_freqpoly
## ppc_freqpoly_grouped
## ppc_hist
## ppc_intervals
## ppc_intervals_data
## ppc_intervals_grouped
## ppc_loo_intervals
## ppc_loo_pit
## ppc_loo_pit_overlay
## ppc_loo_pit_qq
## ppc_loo_ribbon
## ppc_ribbon
## ppc_ribbon_data
## ppc_ribbon_grouped
## ppc_rootogram
## ppc_scatter
## ppc_scatter_avg
## ppc_scatter_avg_grouped
## ppc_stat
## ppc_stat_2d
## ppc_stat_freqpoly_grouped
## ppc_stat_grouped
## ppc_violin_grouped
color_scheme_view(c("blue", "gray", "green", "pink", "purple",
"red","teal","yellow"))
color_scheme_view("mix-blue-red")
Note that the echo = FALSE
parameter was added to the code chunk to prevent printing of the R code that generated the plot.