After our Intro to Stan lecture I think it would be valuable to have you go through a similar exercise. Let’s test a second research question.
Research question: Is sea ice extent declining in the Southern Hemisphere over time? Is the same pattern happening in the Antarctic as in the Arctic? Fit a Stan model to find out!
Make sure you follow the steps we used in class.
What do your Stan model results indicate so far?
library(readr)
seaice <- read.csv("C:/Users/Monica/Desktop/2020 Assignnment/505/seaice.csv",stringsAsFactors =FALSE)
year<-seaice$锘縴ear
head(seaice)
## 锘縴ear extent_north extent_south
## 1 1979 12.328 11.700
## 2 1980 12.337 11.230
## 3 1981 12.127 11.435
## 4 1982 12.447 11.640
## 5 1983 12.332 11.389
## 6 1984 11.910 11.454
ggplot(aes(x=锘縴ear, y=extent_south), data=seaice) + geom_point() + theme_bw()
lm1 <- lm(extent_north ~ year, data = seaice)
summary(lm1)
##
## Call:
## lm(formula = extent_north ~ year, data = seaice)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.49925 -0.17713 0.04898 0.16923 0.32829
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 120.503036 6.267203 19.23 <2e-16 ***
## year -0.054574 0.003137 -17.40 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2205 on 37 degrees of freedom
## Multiple R-squared: 0.8911, Adjusted R-squared: 0.8881
## F-statistic: 302.7 on 1 and 37 DF, p-value: < 2.2e-16
x<- I(year - 1978)
y<- seaice$extent_south
N<- length(seaice$year)
lm1 <-lm(y ~ x)
summary(lm1)
##
## Call:
## lm(formula = y ~ x)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.23372 -0.18142 0.01587 0.18465 0.88814
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.421555 0.125490 91.015 <2e-16 ***
## x 0.012953 0.005468 2.369 0.0232 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3843 on 37 degrees of freedom
## Multiple R-squared: 0.1317, Adjusted R-squared: 0.1082
## F-statistic: 5.611 on 1 and 37 DF, p-value: 0.02318
lm_alpha <- summary(lm1)$coeff[1] #intersect
lm_beta <- summary(lm1)$coeff[2] #slope
lm_sigma <- sigma(lm1) #residual error
stan_data <- list(N = N, x = x, y = y)
#write the code
write("// Stan model for simple linear regression
data {
int < lower = 1 > N; // Sample size
vector[N] x; // Predictor
vector[N] y; // Outcome
}
parameters {
real alpha; // Intercept
real beta; // Slope (regression coefficients)
real < lower = 0 > sigma; // Error SD
}
model {
y ~ normal(alpha + x * beta , sigma);
}
generated quantities {
} // The posterior predictive distribution",
"stan_model1.stan")
stan_model1 <- "stan_model1.stan"
fit <- stan(file = stan_model1, data = stan_data, warmup = 500, iter = 1000, chains = 4, cores = 4, thin = 1)
## Error in new_CppObject_xp(fields$.module, fields$.pointer, ...) :
## Exception: model41084db51c9_stan_model1_namespace::model41084db51c9_stan_model1: N is 0, but must be greater than or equal to 1 (in 'model41084db51c9_stan_model1' at line 4)
## failed to create the sampler; sampling not done
fit
## Stan model 'stan_model1' does not contain samples.
posterior <- extract(fit)
## Stan model 'stan_model1' does not contain samples.
str(posterior)
## NULL
#plot(y ~ x, pch = 20)
#abline(lm1, col = 2, lty = 2, lw = 3)
#abline(mean(posterior$alpha), mean(posterior$beta), col = 5, lw = 2)
#plot(y ~ x, pch = 20)
#for (i in 1:500) {
#abline(posterior$alpha[i], posterior$beta[i], col = "gray", lty = 1)}
#abline(mean(posterior$alpha), mean(posterior$beta), col = 6, lw = 2)