library(foreign)
data=read.dta("C:/Users/BINH THANG-TRAN/Dropbox/PhD/Research/PhD thesis/Methods/morkov/Utility_study/Graphs/data_5DEC2019.dta")

1 Variables

require(ggplot2)
## Loading required package: ggplot2
require(GGally)
## Loading required package: GGally
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
require(VGAM)
## Loading required package: VGAM
## Loading required package: stats4
## Loading required package: splines
library(ggplot2)
library(grid)
library(gridExtra)

2 EQ

2.1 1. ggplot

# add a normal distribution line in histogram
hist(data$score2sh, freq=FALSE, col="gray", xlab="Petal Length", main="title here ")
curve(dnorm(x, mean=mean(data$score2sh), sd=sd(data$score2sh)), add=TRUE, col="red") #line

# add a normal distribution line in histogram
hist(data$c2, freq=FALSE, col="gray", xlab="Petal Length", main="title here ")
curve(dnorm(x, mean=mean(data$c2), sd=sd(data$c2)), add=TRUE, col="red") #line

2.2 tobit regression

summary(m <- vglm(score2sh ~ factor(a3_gioi)+ factor(a7honnhan1) + factor(age_code)+ factor(a41_edu1)+  factor(job) + factor(a6_kinhte)+ factor(a8benh_code) + factor(timetoK) + factor(a11_stage)+  factor(tr_status), tobit(Lower = -0.566, Upper = 1), data = data))
## 
## Call:
## vglm(formula = score2sh ~ factor(a3_gioi) + factor(a7honnhan1) + 
##     factor(age_code) + factor(a41_edu1) + factor(job) + factor(a6_kinhte) + 
##     factor(a8benh_code) + factor(timetoK) + factor(a11_stage) + 
##     factor(tr_status), family = tobit(Lower = -0.566, Upper = 1), 
##     data = data)
## 
## Pearson residuals:
##                 Min      1Q   Median       3Q    Max
## mu          -3.9495 -0.5044  0.06878 0.572947  3.574
## loglink(sd) -0.9642 -0.7010 -0.49292 0.007119 10.383
## 
## Coefficients: 
##                            Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1              0.692933   0.181448   3.819 0.000134 ***
## (Intercept):2             -1.502176   0.052802 -28.449  < 2e-16 ***
## factor(a3_gioi)2           0.025408   0.034496   0.737 0.461389    
## factor(a7honnhan1)Married -0.053661   0.056594  -0.948 0.343046    
## factor(age_code)2          0.253854   0.098222   2.584 0.009752 ** 
## factor(age_code)3          0.327966   0.093170   3.520 0.000431 ***
## factor(age_code)4          0.355364   0.094874   3.746 0.000180 ***
## factor(age_code)5          0.204475   0.093471   2.188 0.028700 *  
## factor(a41_edu1)2          0.077719   0.042065   1.848 0.064663 .  
## factor(a41_edu1)3          0.273611   0.074105   3.692 0.000222 ***
## factor(a41_edu1)4          0.134380   0.050682   2.651 0.008015 ** 
## factor(a41_edu1)5          0.213319   0.076055   2.805 0.005035 ** 
## factor(job)2              -0.019298   0.057824  -0.334 0.738586    
## factor(job)3              -0.061382   0.040944  -1.499 0.133830    
## factor(job)4              -0.067976   0.056469  -1.204 0.228674    
## factor(a6_kinhte)2        -0.074705   0.144871  -0.516 0.606090    
## factor(a6_kinhte)3        -0.038010   0.144893  -0.262 0.793064    
## factor(a8benh_code)C20    -0.047294   0.035686  -1.325 0.185077    
## factor(timetoK)1          -0.003268   0.048562  -0.067 0.946352    
## factor(timetoK)2           0.065508   0.067747   0.967 0.333566    
## factor(a11_stage)2        -0.303298   0.076537  -3.963 7.41e-05 ***
## factor(a11_stage)3        -0.305307   0.067528  -4.521 6.15e-06 ***
## factor(a11_stage)4        -0.456580   0.075180  -6.073 1.25e-09 ***
## factor(a11_stage)NA       -0.369006   0.174929  -2.109 0.034904 *  
## factor(tr_status)2        -0.054429   0.050821  -1.071 0.284170    
## factor(tr_status)3        -0.113894   0.048215  -2.362 0.018166 *  
## factor(tr_status)4         0.039229   0.065526   0.599 0.549385    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Names of linear predictors: mu, loglink(sd)
## 
## Log-likelihood: 12.8071 on 367 degrees of freedom
## 
## Number of Fisher scoring iterations: 8 
## 
## No Hauck-Donner effect found in any of the estimates

2.3 coef of model

ctable <- coef(summary(m))
pvals <- 2 * pt(abs(ctable[, "z value"]), df.residual(m), lower.tail = FALSE)
cbind(ctable, pvals)
##                               Estimate Std. Error      z value      Pr(>|z|)
## (Intercept):1              0.692932565 0.18144794   3.81890563  1.340450e-04
## (Intercept):2             -1.502176412 0.05280209 -28.44918447 4.987603e-178
## factor(a3_gioi)2           0.025408286 0.03449580   0.73656179  4.613889e-01
## factor(a7honnhan1)Married -0.053660906 0.05659450  -0.94816470  3.430456e-01
## factor(age_code)2          0.253854430 0.09822216   2.58449248  9.752243e-03
## factor(age_code)3          0.327966306 0.09317027   3.52007463  4.314254e-04
## factor(age_code)4          0.355364000 0.09487391   3.74564526  1.799307e-04
## factor(age_code)5          0.204475311 0.09347097   2.18758084  2.870015e-02
## factor(a41_edu1)2          0.077719213 0.04206532   1.84758416  6.466252e-02
## factor(a41_edu1)3          0.273610682 0.07410506   3.69219948  2.223230e-04
## factor(a41_edu1)4          0.134379855 0.05068150   2.65145753  8.014519e-03
## factor(a41_edu1)5          0.213318702 0.07605495   2.80479721  5.034825e-03
## factor(job)2              -0.019297602 0.05782450  -0.33372710  7.385855e-01
## factor(job)3              -0.061381853 0.04094395  -1.49916798  1.338301e-01
## factor(job)4              -0.067976248 0.05646894  -1.20378125  2.286741e-01
## factor(a6_kinhte)2        -0.074704751 0.14487137  -0.51566264  6.060900e-01
## factor(a6_kinhte)3        -0.038010419 0.14489331  -0.26233384  7.930641e-01
## factor(a8benh_code)C20    -0.047294397 0.03568627  -1.32528265  1.850774e-01
## factor(timetoK)1          -0.003267649 0.04856151  -0.06728886  9.463517e-01
## factor(timetoK)2           0.065507985 0.06774656   0.96695659  3.335657e-01
## factor(a11_stage)2        -0.303297682 0.07653725  -3.96274619  7.409254e-05
## factor(a11_stage)3        -0.305306829 0.06752785  -4.52119846  6.149050e-06
## factor(a11_stage)4        -0.456580246 0.07518032  -6.07313513  1.254368e-09
## factor(a11_stage)NA       -0.369006408 0.17492870  -2.10946753  3.490425e-02
## factor(tr_status)2        -0.054428904 0.05082074  -1.07099791  2.841704e-01
## factor(tr_status)3        -0.113893750 0.04821463  -2.36222398  1.816566e-02
## factor(tr_status)4         0.039229439 0.06552643   0.59868120  5.493855e-01
##                                  pvals
## (Intercept):1             1.573363e-04
## (Intercept):2             7.452985e-95
## factor(a3_gioi)2          4.618594e-01
## factor(a7honnhan1)Married 3.436695e-01
## factor(age_code)2         1.013767e-02
## factor(age_code)3         4.856961e-04
## factor(age_code)4         2.088602e-04
## factor(age_code)5         2.933046e-02
## factor(a41_edu1)2         6.546708e-02
## factor(a41_edu1)3         2.560797e-04
## factor(a41_edu1)4         8.361924e-03
## factor(a41_edu1)5         5.302765e-03
## factor(job)2              7.387761e-01
## factor(job)3              1.346897e-01
## factor(job)4              2.294500e-01
## factor(a6_kinhte)2        6.064005e-01
## factor(a6_kinhte)3        7.932113e-01
## factor(a8benh_code)C20    1.859019e-01
## factor(timetoK)1          9.463884e-01
## factor(timetoK)2          3.342025e-01
## factor(a11_stage)2        8.904617e-05
## factor(a11_stage)3        8.309508e-06
## factor(a11_stage)4        3.135761e-09
## factor(a11_stage)NA       3.558095e-02
## factor(tr_status)2        2.848742e-01
## factor(tr_status)3        1.868696e-02
## factor(tr_status)4        5.497548e-01

2.4 check model

m2 <- vglm(score2sh ~ factor(a3_gioi)+ factor(a7honnhan1) + factor(age_code)+ factor(a41_edu1)+  factor(job) + factor(a6_kinhte)+ factor(a8benh_code) + factor(timetoK) +  factor(tr_status), tobit(Lower = -0.566, Upper = 1), data = data)

(p <- pchisq(2 * (logLik(m) - logLik(m2)), df = 2, lower.tail = FALSE))
## [1] 1.600392e-08

The LRT with two degrees of freedom is associated with a p-value of 1.600392e-08, indicating that the effect of CRC stage is statistically significant.

Below we calculate the upper and lower 95% confidence intervals for the coefficients.

b <- coef(m)
se <- sqrt(diag(vcov(m)))

cbind(LL = b - qnorm(0.975) * se, UL = b + qnorm(0.975) * se)
##                                     LL          UL
## (Intercept):1              0.337301135  1.04856400
## (Intercept):2             -1.605666607 -1.39868622
## factor(a3_gioi)2          -0.042202234  0.09301881
## factor(a7honnhan1)Married -0.164584086  0.05726227
## factor(age_code)2          0.061342537  0.44636632
## factor(age_code)3          0.145355931  0.51057668
## factor(age_code)4          0.169414559  0.54131344
## factor(age_code)5          0.021275569  0.38767505
## factor(a41_edu1)2         -0.004727293  0.16016572
## factor(a41_edu1)3          0.128367424  0.41885394
## factor(a41_edu1)4          0.035045933  0.23371378
## factor(a41_edu1)5          0.064253745  0.36238366
## factor(job)2              -0.132631537  0.09403633
## factor(job)3              -0.141630513  0.01886681
## factor(job)4              -0.178653332  0.04270084
## factor(a6_kinhte)2        -0.358647410  0.20923791
## factor(a6_kinhte)3        -0.321996096  0.24597526
## factor(a8benh_code)C20    -0.117238205  0.02264941
## factor(timetoK)1          -0.098446456  0.09191116
## factor(timetoK)2          -0.067272837  0.19828881
## factor(a11_stage)2        -0.453307929 -0.15328744
## factor(a11_stage)3        -0.437658990 -0.17295467
## factor(a11_stage)4        -0.603930967 -0.30922953
## factor(a11_stage)NA       -0.711860356 -0.02615246
## factor(tr_status)2        -0.154035720  0.04517791
## factor(tr_status)3        -0.208392684 -0.01939482
## factor(tr_status)4        -0.089199995  0.16765887

2.5 Model fits

data$yhat <- fitted(m)[,1]
data$rr <- resid(m, type = "response")
data$rp <- resid(m, type = "pearson")[,1]

par(mfcol = c(2, 3))

with(data, {
  plot(yhat, rr, main = "Fitted vs Residuals")
  qqnorm(rr)
  plot(yhat, rp, main = "Fitted vs Pearson Residuals")
  qqnorm(rp)
  plot(score2sh, rp, main = "Actual vs Pearson Residuals")
  plot(score2sh, yhat, main = "Actual vs Fitted")
})

The graph in the bottom right was the predicted, or fitted, values plotted against the actual. This can be particularly useful when comparing competing models. We can calculate the correlation between these two as well as the squared correlation, to get a sense of how accurate our model predicts the data and how much of the variance in the outcome is accounted for by the model.

# correlation
(r <- with(data, cor(score2sh, yhat)))
## [1] 0.5823048
# variance accounted for
r^2
## [1] 0.3390789

End: The correlation between the predicted and observed values of apt is 0.5823048. If we square this value, we get the multiple squared correlation, this indicates predicted values share 34% of their variance with “score2sh”

3 other parts

3.1 correlation between score and v

(r <- with(data, cor(score2sh, c2)))
## [1] 0.4322855

3.2 graphs

grob1 = grobTree(textGrob(paste("Pearson Correlation : ", round(cor(data$score2sh, data$c2), 4) ), x = 0.63, y = 0.97, hjust = 0, gp = gpar(col = "red", fontsize = 11, fontface = "bold")))


g1 = ggplot(data, aes(x=c2, y=score2sh)) + geom_point() + ggtitle("EQ-5D-5L vs VAS") + geom_smooth(method=lm, se=FALSE) + scale_x_continuous(name = "VAS", limits = c(0, 100), breaks = seq(0, 100, 10)) + scale_y_continuous(name = "EQ-5D-5L", limits = c(-1, 1), breaks = seq(-1, 1, 0.5)) + annotation_custom(grob1) + theme(plot.title = element_text(hjust = 0.5))

g1

#Binh Thang (R)

4 MODEL 2: c2 -vas

4.1 2.1. ggplot

# add a normal distribution line in histogram
hist(data$c2, freq=FALSE, col="gray", xlab="Petal Length", main="title here ")
curve(dnorm(x, mean=mean(data$c2), sd=sd(data$c2)), add=TRUE, col="red") #line

# add a normal distribution line in histogram
hist(data$c2, freq=FALSE, col="gray", xlab="Petal Length", main="title here ")
curve(dnorm(x, mean=mean(data$c2), sd=sd(data$c2)), add=TRUE, col="red") #line

4.2 tobit regression

summary(m <- vglm(c2 ~ factor(a3_gioi)+ factor(a7honnhan1) + factor(age_code)+ factor(a41_edu1)+  factor(job) + factor(a6_kinhte)+ factor(a8benh_code) + factor(timetoK) + factor(a11_stage)+  factor(tr_status), tobit(Lower = 0, Upper = 100), data = data))
## 
## Call:
## vglm(formula = c2 ~ factor(a3_gioi) + factor(a7honnhan1) + factor(age_code) + 
##     factor(a41_edu1) + factor(job) + factor(a6_kinhte) + factor(a8benh_code) + 
##     factor(timetoK) + factor(a11_stage) + factor(tr_status), 
##     family = tobit(Lower = 0, Upper = 100), data = data)
## 
## Pearson residuals:
##                 Min      1Q   Median     3Q   Max
## mu          -3.7737 -0.5794 -0.03367 0.6447 3.187
## loglink(sd) -0.7169 -0.6403 -0.44150 0.0582 9.376
## 
## Coefficients: 
##                            Estimate Std. Error z value Pr(>|z|)    
## (Intercept):1              57.50393    9.41986   6.105 1.03e-09 ***
## (Intercept):2               2.46230    0.05064  48.624  < 2e-16 ***
## factor(a3_gioi)2           -1.15646    1.80775  -0.640 0.522350    
## factor(a7honnhan1)Married   1.90209    2.96707   0.641 0.521479    
## factor(age_code)2           0.06091    5.14903   0.012 0.990562    
## factor(age_code)3           4.95729    4.88637   1.015 0.310338    
## factor(age_code)4           5.11058    4.96943   1.028 0.303760    
## factor(age_code)5          -3.45804    4.90356  -0.705 0.480679    
## factor(a41_edu1)2           1.90411    2.21163   0.861 0.389264    
## factor(a41_edu1)3           8.51597    3.85093   2.211 0.027008 *  
## factor(a41_edu1)4          -1.55091    2.65608  -0.584 0.559282    
## factor(a41_edu1)5           6.73020    3.98095   1.691 0.090913 .  
## factor(job)2                5.31721    3.03068   1.754 0.079351 .  
## factor(job)3                3.93718    2.14712   1.834 0.066698 .  
## factor(job)4                2.54339    2.96002   0.859 0.390205    
## factor(a6_kinhte)2         17.09298    7.52509   2.271 0.023119 *  
## factor(a6_kinhte)3         17.43589    7.53071   2.315 0.020596 *  
## factor(a8benh_code)C20     -2.37479    1.87069  -1.269 0.204272    
## factor(timetoK)1           -2.44672    2.54226  -0.962 0.335841    
## factor(timetoK)2           -7.69443    3.54829  -2.168 0.030121 *  
## factor(a11_stage)2        -11.12852    3.97064  -2.803 0.005068 ** 
## factor(a11_stage)3        -11.53994    3.49377  -3.303 0.000957 ***
## factor(a11_stage)4        -17.52035    3.89832  -4.494 6.98e-06 ***
## factor(a11_stage)NA        -7.60723    9.18515  -0.828 0.407552    
## factor(tr_status)2          2.94708    2.67204   1.103 0.270056    
## factor(tr_status)3         -8.67330    2.53003  -3.428 0.000608 ***
## factor(tr_status)4         -4.27483    3.43543  -1.244 0.213375    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Names of linear predictors: mu, loglink(sd)
## 
## Log-likelihood: -762.1313 on 367 degrees of freedom
## 
## Number of Fisher scoring iterations: 5 
## 
## No Hauck-Donner effect found in any of the estimates

4.3 coef of model

ctable <- coef(summary(m))
pvals <- 2 * pt(abs(ctable[, "z value"]), df.residual(m), lower.tail = FALSE)
cbind(ctable, pvals)
##                               Estimate Std. Error    z value     Pr(>|z|)
## (Intercept):1              57.50392708 9.41986006  6.1045415 1.030962e-09
## (Intercept):2               2.46230477 0.05064015 48.6235639 0.000000e+00
## factor(a3_gioi)2           -1.15646418 1.80774677 -0.6397269 5.223502e-01
## factor(a7honnhan1)Married   1.90209307 2.96706961  0.6410679 5.214786e-01
## factor(age_code)2           0.06091049 5.14903234  0.0118295 9.905616e-01
## factor(age_code)3           4.95728559 4.88637410  1.0145121 3.103385e-01
## factor(age_code)4           5.11057517 4.96942616  1.0284035 3.037601e-01
## factor(age_code)5          -3.45804309 4.90355520 -0.7052114 4.806787e-01
## factor(a41_edu1)2           1.90411487 2.21163491  0.8609535 3.892636e-01
## factor(a41_edu1)3           8.51597062 3.85093327  2.2114044 2.700785e-02
## factor(a41_edu1)4          -1.55090572 2.65607672 -0.5839085 5.592819e-01
## factor(a41_edu1)5           6.73020051 3.98094672  1.6906030 9.091265e-02
## factor(job)2                5.31720710 3.03067677  1.7544620 7.935138e-02
## factor(job)3                3.93717937 2.14711818  1.8337041 6.669793e-02
## factor(job)4                2.54338756 2.96002162  0.8592463 3.902047e-01
## factor(a6_kinhte)2         17.09298391 7.52509481  2.2714643 2.311889e-02
## factor(a6_kinhte)3         17.43589104 7.53070805  2.3153057 2.059621e-02
## factor(a8benh_code)C20     -2.37479126 1.87068994 -1.2694735 2.042722e-01
## factor(timetoK)1           -2.44671564 2.54226500 -0.9624157 3.358409e-01
## factor(timetoK)2           -7.69442536 3.54828702 -2.1684901 3.012142e-02
## factor(a11_stage)2        -11.12852086 3.97063611 -2.8027048 5.067603e-03
## factor(a11_stage)3        -11.53994496 3.49376511 -3.3030111 9.565260e-04
## factor(a11_stage)4        -17.52034992 3.89831839 -4.4943353 6.978759e-06
## factor(a11_stage)NA        -7.60722979 9.18514752 -0.8282099 4.075517e-01
## factor(tr_status)2          2.94708004 2.67203999  1.1029326 2.700564e-01
## factor(tr_status)3         -8.67329725 2.53003280 -3.4281363 6.077403e-04
## factor(tr_status)4         -4.27483238 3.43542603 -1.2443384 2.133751e-01
##                                   pvals
## (Intercept):1              2.624130e-09
## (Intercept):2             4.946868e-162
## factor(a3_gioi)2           5.227493e-01
## factor(a7honnhan1)Married  5.218787e-01
## factor(age_code)2          9.905681e-01
## factor(age_code)3          3.110068e-01
## factor(age_code)4          3.044374e-01
## factor(age_code)5          4.811260e-01
## factor(a41_edu1)2          3.898258e-01
## factor(a41_edu1)3          2.762359e-02
## factor(a41_edu1)4          5.596406e-01
## factor(a41_edu1)5          9.176171e-02
## factor(job)2               8.018591e-02
## factor(job)3               6.750757e-02
## factor(job)4               3.907655e-01
## factor(a6_kinhte)2         2.369732e-02
## factor(a6_kinhte)3         2.114708e-02
## factor(a8benh_code)C20     2.050767e-01
## factor(timetoK)1           3.364745e-01
## factor(timetoK)2           3.076328e-02
## factor(a11_stage)2         5.336550e-03
## factor(a11_stage)3         1.050562e-03
## factor(a11_stage)4         9.368097e-06
## factor(a11_stage)NA        4.080899e-01
## factor(tr_status)2         2.707791e-01
## factor(tr_status)3         6.767025e-04
## factor(tr_status)4         2.141692e-01

4.4 check model

m2 <- vglm(c2 ~ factor(a3_gioi)+ factor(a7honnhan1) + factor(age_code)+ factor(a41_edu1)+  factor(job) + factor(a6_kinhte)+ factor(a8benh_code) + factor(timetoK) +  factor(tr_status), tobit(Lower = 0, Upper = 100), data = data)

(p <- pchisq(2 * (logLik(m) - logLik(m2)), df = 2, lower.tail = FALSE))
## [1] 5.002571e-05

The LRT with two degrees of freedom is associated with a p-value of 5.002571e-05, indicating that the effect of CRC stage is statistically significant.

Below we calculate the upper and lower 95% confidence intervals for the coefficients.

b <- coef(m)
se <- sqrt(diag(vcov(m)))

cbind(LL = b - qnorm(0.975) * se, UL = b + qnorm(0.975) * se)
##                                    LL         UL
## (Intercept):1              39.0413406 75.9665135
## (Intercept):2               2.3630519  2.5615576
## factor(a3_gioi)2           -4.6995827  2.3866544
## factor(a7honnhan1)Married  -3.9132565  7.7174426
## factor(age_code)2         -10.0310074 10.1528284
## factor(age_code)3          -4.6198317 14.5344028
## factor(age_code)4          -4.6293211 14.8504715
## factor(age_code)5         -13.0688347  6.1527485
## factor(a41_edu1)2          -2.4306099  6.2388396
## factor(a41_edu1)3           0.9682801 16.0636611
## factor(a41_edu1)4          -6.7567204  3.6549090
## factor(a41_edu1)5          -1.0723117 14.5327127
## factor(job)2               -0.6228102 11.2572244
## factor(job)3               -0.2710949  8.1454537
## factor(job)4               -3.2581482  8.3449233
## factor(a6_kinhte)2          2.3440691 31.8418987
## factor(a6_kinhte)3          2.6759745 32.1958076
## factor(a8benh_code)C20     -6.0412762  1.2916937
## factor(timetoK)1           -7.4294635  2.5360322
## factor(timetoK)2          -14.6489401 -0.7399106
## factor(a11_stage)2        -18.9108246 -3.3462171
## factor(a11_stage)3        -18.3875987 -4.6922912
## factor(a11_stage)4        -25.1609136 -9.8797863
## factor(a11_stage)NA       -25.6097881 10.3953285
## factor(tr_status)2         -2.2900221  8.1841822
## factor(tr_status)3        -13.6320704 -3.7145241
## factor(tr_status)4        -11.0081437  2.4584789

4.5 Model fits

data$yhat1 <- fitted(m)[,1]
data$rr <- resid(m, type = "response")
data$rp <- resid(m, type = "pearson")[,1]

par(mfcol = c(2, 3))

with(data, {
  plot(yhat1, rr, main = "Fitted vs Residuals")
  qqnorm(rr)
  plot(yhat1, rp, main = "Fitted vs Pearson Residuals")
  qqnorm(rp)
  plot(c2, rp, main = "Actual vs Pearson Residuals")
  plot(c2, yhat1, main = "Actual vs Fitted")
})

The graph in the bottom right was the predicted, or fitted, values plotted against the actual. This can be particularly useful when comparing competing models. We can calculate the correlation between these two as well as the squared correlation, to get a sense of how accurate our model predicts the data and how much of the variance in the outcome is accounted for by the model.

# correlation
(r <- with(data, cor(c2, yhat1)))
## [1] 0.5669197
# variance accounted for
r^2
## [1] 0.321398

End: The correlation between the predicted and observed values of apt is 0.5669197 If we square this value, we get the multiple squared correlation, this indicates predicted values share 32% of their variance with “c2- vas”

LS0tDQp0aXRsZTogIlRPQklUIFJFR1JFU1NJT04gTU9ERUwiDQphdXRob3I6ICJCaW5oIFRoYW5nIFRyYW4iDQpkYXRlOiAiMTIvMDYvMjAxOSINCm91dHB1dDoNCiAgaHRtbF9kb2N1bWVudDoNCiAgICBjb2RlX2Rvd25sb2FkOiB5ZXMNCiAgICBjb2RlX2ZvbGRpbmc6IGhpZGUNCiAgICBudW1iZXJfc2VjdGlvbnM6IHllcw0KICAgIHRoZW1lOiBqb3VybmFsDQogICAgdG9jOiB5ZXMNCiAgICB0b2NfZmxvYXQ6IHllcw0KICB3b3JkX2RvY3VtZW50Og0KICAgIHRvYzogeWVzDQotLS0NCg0KDQpgYGB7cn0NCmxpYnJhcnkoZm9yZWlnbikNCmBgYA0KDQoNCg0KYGBge3J9DQpkYXRhPXJlYWQuZHRhKCJDOi9Vc2Vycy9CSU5IIFRIQU5HLVRSQU4vRHJvcGJveC9QaEQvUmVzZWFyY2gvUGhEIHRoZXNpcy9NZXRob2RzL21vcmtvdi9VdGlsaXR5X3N0dWR5L0dyYXBocy9kYXRhXzVERUMyMDE5LmR0YSIpDQpgYGANCg0KDQojIFZhcmlhYmxlcw0KDQpgYGB7cn0NCnJlcXVpcmUoZ2dwbG90MikNCnJlcXVpcmUoR0dhbGx5KQ0KcmVxdWlyZShWR0FNKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShncmlkKQ0KbGlicmFyeShncmlkRXh0cmEpDQpgYGANCg0KIyBFUQ0KDQojIyAxLiBnZ3Bsb3QNCg0KDQpgYGB7cn0NCiMgYWRkIGEgbm9ybWFsIGRpc3RyaWJ1dGlvbiBsaW5lIGluIGhpc3RvZ3JhbQ0KaGlzdChkYXRhJHNjb3JlMnNoLCBmcmVxPUZBTFNFLCBjb2w9ImdyYXkiLCB4bGFiPSJQZXRhbCBMZW5ndGgiLCBtYWluPSJ0aXRsZSBoZXJlICIpDQpjdXJ2ZShkbm9ybSh4LCBtZWFuPW1lYW4oZGF0YSRzY29yZTJzaCksIHNkPXNkKGRhdGEkc2NvcmUyc2gpKSwgYWRkPVRSVUUsIGNvbD0icmVkIikgI2xpbmUNCmBgYA0KDQpgYGB7cn0NCiMgYWRkIGEgbm9ybWFsIGRpc3RyaWJ1dGlvbiBsaW5lIGluIGhpc3RvZ3JhbQ0KaGlzdChkYXRhJGMyLCBmcmVxPUZBTFNFLCBjb2w9ImdyYXkiLCB4bGFiPSJQZXRhbCBMZW5ndGgiLCBtYWluPSJ0aXRsZSBoZXJlICIpDQpjdXJ2ZShkbm9ybSh4LCBtZWFuPW1lYW4oZGF0YSRjMiksIHNkPXNkKGRhdGEkYzIpKSwgYWRkPVRSVUUsIGNvbD0icmVkIikgI2xpbmUNCmBgYA0KDQoNCg0KIyMgdG9iaXQgcmVncmVzc2lvbg0KDQpgYGB7cn0NCnN1bW1hcnkobSA8LSB2Z2xtKHNjb3JlMnNoIH4gZmFjdG9yKGEzX2dpb2kpKyBmYWN0b3IoYTdob25uaGFuMSkgKyBmYWN0b3IoYWdlX2NvZGUpKyBmYWN0b3IoYTQxX2VkdTEpKyAgZmFjdG9yKGpvYikgKyBmYWN0b3IoYTZfa2luaHRlKSsgZmFjdG9yKGE4YmVuaF9jb2RlKSArIGZhY3Rvcih0aW1ldG9LKSArIGZhY3RvcihhMTFfc3RhZ2UpKyAgZmFjdG9yKHRyX3N0YXR1cyksIHRvYml0KExvd2VyID0gLTAuNTY2LCBVcHBlciA9IDEpLCBkYXRhID0gZGF0YSkpDQpgYGANCg0KIyMgY29lZiBvZiBtb2RlbA0KDQpgYGB7cn0NCmN0YWJsZSA8LSBjb2VmKHN1bW1hcnkobSkpDQpwdmFscyA8LSAyICogcHQoYWJzKGN0YWJsZVssICJ6IHZhbHVlIl0pLCBkZi5yZXNpZHVhbChtKSwgbG93ZXIudGFpbCA9IEZBTFNFKQ0KY2JpbmQoY3RhYmxlLCBwdmFscykNCmBgYA0KDQojIyBjaGVjayBtb2RlbA0KDQpgYGB7cn0NCm0yIDwtIHZnbG0oc2NvcmUyc2ggfiBmYWN0b3IoYTNfZ2lvaSkrIGZhY3RvcihhN2hvbm5oYW4xKSArIGZhY3RvcihhZ2VfY29kZSkrIGZhY3RvcihhNDFfZWR1MSkrICBmYWN0b3Ioam9iKSArIGZhY3RvcihhNl9raW5odGUpKyBmYWN0b3IoYThiZW5oX2NvZGUpICsgZmFjdG9yKHRpbWV0b0spICsgIGZhY3Rvcih0cl9zdGF0dXMpLCB0b2JpdChMb3dlciA9IC0wLjU2NiwgVXBwZXIgPSAxKSwgZGF0YSA9IGRhdGEpDQoNCihwIDwtIHBjaGlzcSgyICogKGxvZ0xpayhtKSAtIGxvZ0xpayhtMikpLCBkZiA9IDIsIGxvd2VyLnRhaWwgPSBGQUxTRSkpDQpgYGANCg0KVGhlIExSVCB3aXRoIHR3byBkZWdyZWVzIG9mIGZyZWVkb20gaXMgYXNzb2NpYXRlZCB3aXRoIGEgcC12YWx1ZSBvZiAxLjYwMDM5MmUtMDgsIGluZGljYXRpbmcgdGhhdCAgdGhlIGVmZmVjdCBvZiBDUkMgc3RhZ2UgaXMgc3RhdGlzdGljYWxseSBzaWduaWZpY2FudC4NCg0KQmVsb3cgd2UgY2FsY3VsYXRlIHRoZSB1cHBlciBhbmQgbG93ZXIgOTUlIGNvbmZpZGVuY2UgaW50ZXJ2YWxzIGZvciB0aGUgY29lZmZpY2llbnRzLg0KDQpgYGB7cn0NCmIgPC0gY29lZihtKQ0Kc2UgPC0gc3FydChkaWFnKHZjb3YobSkpKQ0KDQpjYmluZChMTCA9IGIgLSBxbm9ybSgwLjk3NSkgKiBzZSwgVUwgPSBiICsgcW5vcm0oMC45NzUpICogc2UpDQpgYGANCg0KDQojIyBNb2RlbCBmaXRzDQoNCmBgYHtyfQ0KZGF0YSR5aGF0IDwtIGZpdHRlZChtKVssMV0NCmRhdGEkcnIgPC0gcmVzaWQobSwgdHlwZSA9ICJyZXNwb25zZSIpDQpkYXRhJHJwIDwtIHJlc2lkKG0sIHR5cGUgPSAicGVhcnNvbiIpWywxXQ0KDQpwYXIobWZjb2wgPSBjKDIsIDMpKQ0KDQp3aXRoKGRhdGEsIHsNCiAgcGxvdCh5aGF0LCByciwgbWFpbiA9ICJGaXR0ZWQgdnMgUmVzaWR1YWxzIikNCiAgcXFub3JtKHJyKQ0KICBwbG90KHloYXQsIHJwLCBtYWluID0gIkZpdHRlZCB2cyBQZWFyc29uIFJlc2lkdWFscyIpDQogIHFxbm9ybShycCkNCiAgcGxvdChzY29yZTJzaCwgcnAsIG1haW4gPSAiQWN0dWFsIHZzIFBlYXJzb24gUmVzaWR1YWxzIikNCiAgcGxvdChzY29yZTJzaCwgeWhhdCwgbWFpbiA9ICJBY3R1YWwgdnMgRml0dGVkIikNCn0pDQpgYGANCg0KDQpUaGUgZ3JhcGggaW4gdGhlIGJvdHRvbSByaWdodCB3YXMgdGhlIHByZWRpY3RlZCwgb3IgZml0dGVkLCB2YWx1ZXMgcGxvdHRlZCBhZ2FpbnN0IHRoZSBhY3R1YWwuIFRoaXMgY2FuIGJlIHBhcnRpY3VsYXJseSB1c2VmdWwgd2hlbiBjb21wYXJpbmcgY29tcGV0aW5nIG1vZGVscy4gV2UgY2FuIGNhbGN1bGF0ZSB0aGUgY29ycmVsYXRpb24gYmV0d2VlbiB0aGVzZSB0d28gYXMgd2VsbCBhcyB0aGUgc3F1YXJlZCBjb3JyZWxhdGlvbiwgdG8gZ2V0IGEgc2Vuc2Ugb2YgaG93IGFjY3VyYXRlIG91ciBtb2RlbCBwcmVkaWN0cyB0aGUgZGF0YSBhbmQgaG93IG11Y2ggb2YgdGhlIHZhcmlhbmNlIGluIHRoZSBvdXRjb21lIGlzIGFjY291bnRlZCBmb3IgYnkgdGhlIG1vZGVsLg0KDQpgYGB7cn0NCiMgY29ycmVsYXRpb24NCihyIDwtIHdpdGgoZGF0YSwgY29yKHNjb3JlMnNoLCB5aGF0KSkpDQpgYGANCg0KYGBge3J9DQojIHZhcmlhbmNlIGFjY291bnRlZCBmb3INCnJeMg0KYGBgDQoNCkVuZDogDQpUaGUgY29ycmVsYXRpb24gYmV0d2VlbiB0aGUgcHJlZGljdGVkIGFuZCBvYnNlcnZlZCB2YWx1ZXMgb2YgYXB0IGlzIDAuNTgyMzA0OC4gSWYgd2Ugc3F1YXJlIHRoaXMgdmFsdWUsIHdlIGdldCB0aGUgbXVsdGlwbGUgc3F1YXJlZCBjb3JyZWxhdGlvbiwgdGhpcyBpbmRpY2F0ZXMgcHJlZGljdGVkIHZhbHVlcyBzaGFyZSAzNCUgb2YgdGhlaXIgdmFyaWFuY2Ugd2l0aCAic2NvcmUyc2giDQoNCiMgb3RoZXIgcGFydHMNCg0KIyMgY29ycmVsYXRpb24gYmV0d2VlbiBzY29yZSBhbmQgdg0KDQpgYGB7cn0NCihyIDwtIHdpdGgoZGF0YSwgY29yKHNjb3JlMnNoLCBjMikpKQ0KYGBgDQoNCiMjIGdyYXBocw0KDQpgYGB7cn0NCg0KZ3JvYjEgPSBncm9iVHJlZSh0ZXh0R3JvYihwYXN0ZSgiUGVhcnNvbiBDb3JyZWxhdGlvbiA6ICIsIHJvdW5kKGNvcihkYXRhJHNjb3JlMnNoLCBkYXRhJGMyKSwgNCkgKSwgeCA9IDAuNjMsIHkgPSAwLjk3LCBoanVzdCA9IDAsIGdwID0gZ3Bhcihjb2wgPSAicmVkIiwgZm9udHNpemUgPSAxMSwgZm9udGZhY2UgPSAiYm9sZCIpKSkNCg0KDQpnMSA9IGdncGxvdChkYXRhLCBhZXMoeD1jMiwgeT1zY29yZTJzaCkpICsgZ2VvbV9wb2ludCgpICsgZ2d0aXRsZSgiRVEtNUQtNUwgdnMgVkFTIikgKyBnZW9tX3Ntb290aChtZXRob2Q9bG0sIHNlPUZBTFNFKSArIHNjYWxlX3hfY29udGludW91cyhuYW1lID0gIlZBUyIsIGxpbWl0cyA9IGMoMCwgMTAwKSwgYnJlYWtzID0gc2VxKDAsIDEwMCwgMTApKSArIHNjYWxlX3lfY29udGludW91cyhuYW1lID0gIkVRLTVELTVMIiwgbGltaXRzID0gYygtMSwgMSksIGJyZWFrcyA9IHNlcSgtMSwgMSwgMC41KSkgKyBhbm5vdGF0aW9uX2N1c3RvbShncm9iMSkgKyB0aGVtZShwbG90LnRpdGxlID0gZWxlbWVudF90ZXh0KGhqdXN0ID0gMC41KSkNCg0KZzENCmBgYA0KDQoNCiNCaW5oIFRoYW5nIChSKQ0KDQoNCg0KIyBNT0RFTCAyOiBjMiAtdmFzDQoNCg0KIyMgMi4xLiBnZ3Bsb3QNCg0KDQpgYGB7cn0NCiMgYWRkIGEgbm9ybWFsIGRpc3RyaWJ1dGlvbiBsaW5lIGluIGhpc3RvZ3JhbQ0KaGlzdChkYXRhJGMyLCBmcmVxPUZBTFNFLCBjb2w9ImdyYXkiLCB4bGFiPSJQZXRhbCBMZW5ndGgiLCBtYWluPSJ0aXRsZSBoZXJlICIpDQpjdXJ2ZShkbm9ybSh4LCBtZWFuPW1lYW4oZGF0YSRjMiksIHNkPXNkKGRhdGEkYzIpKSwgYWRkPVRSVUUsIGNvbD0icmVkIikgI2xpbmUNCmBgYA0KDQpgYGB7cn0NCiMgYWRkIGEgbm9ybWFsIGRpc3RyaWJ1dGlvbiBsaW5lIGluIGhpc3RvZ3JhbQ0KaGlzdChkYXRhJGMyLCBmcmVxPUZBTFNFLCBjb2w9ImdyYXkiLCB4bGFiPSJQZXRhbCBMZW5ndGgiLCBtYWluPSJ0aXRsZSBoZXJlICIpDQpjdXJ2ZShkbm9ybSh4LCBtZWFuPW1lYW4oZGF0YSRjMiksIHNkPXNkKGRhdGEkYzIpKSwgYWRkPVRSVUUsIGNvbD0icmVkIikgI2xpbmUNCmBgYA0KDQoNCg0KIyMgdG9iaXQgcmVncmVzc2lvbg0KDQpgYGB7cn0NCnN1bW1hcnkobSA8LSB2Z2xtKGMyIH4gZmFjdG9yKGEzX2dpb2kpKyBmYWN0b3IoYTdob25uaGFuMSkgKyBmYWN0b3IoYWdlX2NvZGUpKyBmYWN0b3IoYTQxX2VkdTEpKyAgZmFjdG9yKGpvYikgKyBmYWN0b3IoYTZfa2luaHRlKSsgZmFjdG9yKGE4YmVuaF9jb2RlKSArIGZhY3Rvcih0aW1ldG9LKSArIGZhY3RvcihhMTFfc3RhZ2UpKyAgZmFjdG9yKHRyX3N0YXR1cyksIHRvYml0KExvd2VyID0gMCwgVXBwZXIgPSAxMDApLCBkYXRhID0gZGF0YSkpDQpgYGANCg0KIyMgY29lZiBvZiBtb2RlbA0KDQpgYGB7cn0NCmN0YWJsZSA8LSBjb2VmKHN1bW1hcnkobSkpDQpwdmFscyA8LSAyICogcHQoYWJzKGN0YWJsZVssICJ6IHZhbHVlIl0pLCBkZi5yZXNpZHVhbChtKSwgbG93ZXIudGFpbCA9IEZBTFNFKQ0KY2JpbmQoY3RhYmxlLCBwdmFscykNCmBgYA0KDQojIyBjaGVjayBtb2RlbA0KDQpgYGB7cn0NCm0yIDwtIHZnbG0oYzIgfiBmYWN0b3IoYTNfZ2lvaSkrIGZhY3RvcihhN2hvbm5oYW4xKSArIGZhY3RvcihhZ2VfY29kZSkrIGZhY3RvcihhNDFfZWR1MSkrICBmYWN0b3Ioam9iKSArIGZhY3RvcihhNl9raW5odGUpKyBmYWN0b3IoYThiZW5oX2NvZGUpICsgZmFjdG9yKHRpbWV0b0spICsgIGZhY3Rvcih0cl9zdGF0dXMpLCB0b2JpdChMb3dlciA9IDAsIFVwcGVyID0gMTAwKSwgZGF0YSA9IGRhdGEpDQoNCihwIDwtIHBjaGlzcSgyICogKGxvZ0xpayhtKSAtIGxvZ0xpayhtMikpLCBkZiA9IDIsIGxvd2VyLnRhaWwgPSBGQUxTRSkpDQpgYGANCg0KVGhlIExSVCB3aXRoIHR3byBkZWdyZWVzIG9mIGZyZWVkb20gaXMgYXNzb2NpYXRlZCB3aXRoIGEgcC12YWx1ZSBvZiA1LjAwMjU3MWUtMDUsIGluZGljYXRpbmcgdGhhdCAgdGhlIGVmZmVjdCBvZiBDUkMgc3RhZ2UgaXMgc3RhdGlzdGljYWxseSBzaWduaWZpY2FudC4NCg0KQmVsb3cgd2UgY2FsY3VsYXRlIHRoZSB1cHBlciBhbmQgbG93ZXIgOTUlIGNvbmZpZGVuY2UgaW50ZXJ2YWxzIGZvciB0aGUgY29lZmZpY2llbnRzLg0KDQpgYGB7cn0NCmIgPC0gY29lZihtKQ0Kc2UgPC0gc3FydChkaWFnKHZjb3YobSkpKQ0KDQpjYmluZChMTCA9IGIgLSBxbm9ybSgwLjk3NSkgKiBzZSwgVUwgPSBiICsgcW5vcm0oMC45NzUpICogc2UpDQpgYGANCg0KDQojIyBNb2RlbCBmaXRzDQoNCmBgYHtyfQ0KZGF0YSR5aGF0MSA8LSBmaXR0ZWQobSlbLDFdDQpkYXRhJHJyIDwtIHJlc2lkKG0sIHR5cGUgPSAicmVzcG9uc2UiKQ0KZGF0YSRycCA8LSByZXNpZChtLCB0eXBlID0gInBlYXJzb24iKVssMV0NCg0KcGFyKG1mY29sID0gYygyLCAzKSkNCg0Kd2l0aChkYXRhLCB7DQogIHBsb3QoeWhhdDEsIHJyLCBtYWluID0gIkZpdHRlZCB2cyBSZXNpZHVhbHMiKQ0KICBxcW5vcm0ocnIpDQogIHBsb3QoeWhhdDEsIHJwLCBtYWluID0gIkZpdHRlZCB2cyBQZWFyc29uIFJlc2lkdWFscyIpDQogIHFxbm9ybShycCkNCiAgcGxvdChjMiwgcnAsIG1haW4gPSAiQWN0dWFsIHZzIFBlYXJzb24gUmVzaWR1YWxzIikNCiAgcGxvdChjMiwgeWhhdDEsIG1haW4gPSAiQWN0dWFsIHZzIEZpdHRlZCIpDQp9KQ0KYGBgDQoNCg0KVGhlIGdyYXBoIGluIHRoZSBib3R0b20gcmlnaHQgd2FzIHRoZSBwcmVkaWN0ZWQsIG9yIGZpdHRlZCwgdmFsdWVzIHBsb3R0ZWQgYWdhaW5zdCB0aGUgYWN0dWFsLiBUaGlzIGNhbiBiZSBwYXJ0aWN1bGFybHkgdXNlZnVsIHdoZW4gY29tcGFyaW5nIGNvbXBldGluZyBtb2RlbHMuIFdlIGNhbiBjYWxjdWxhdGUgdGhlIGNvcnJlbGF0aW9uIGJldHdlZW4gdGhlc2UgdHdvIGFzIHdlbGwgYXMgdGhlIHNxdWFyZWQgY29ycmVsYXRpb24sIHRvIGdldCBhIHNlbnNlIG9mIGhvdyBhY2N1cmF0ZSBvdXIgbW9kZWwgcHJlZGljdHMgdGhlIGRhdGEgYW5kIGhvdyBtdWNoIG9mIHRoZSB2YXJpYW5jZSBpbiB0aGUgb3V0Y29tZSBpcyBhY2NvdW50ZWQgZm9yIGJ5IHRoZSBtb2RlbC4NCg0KYGBge3J9DQojIGNvcnJlbGF0aW9uDQoociA8LSB3aXRoKGRhdGEsIGNvcihjMiwgeWhhdDEpKSkNCmBgYA0KDQpgYGB7cn0NCiMgdmFyaWFuY2UgYWNjb3VudGVkIGZvcg0Kcl4yDQpgYGANCg0KRW5kOiANClRoZSBjb3JyZWxhdGlvbiBiZXR3ZWVuIHRoZSBwcmVkaWN0ZWQgYW5kIG9ic2VydmVkIHZhbHVlcyBvZiBhcHQgaXMgMC41NjY5MTk3IElmIHdlIHNxdWFyZSB0aGlzIHZhbHVlLCB3ZSBnZXQgdGhlIG11bHRpcGxlIHNxdWFyZWQgY29ycmVsYXRpb24sIHRoaXMgaW5kaWNhdGVzIHByZWRpY3RlZCB2YWx1ZXMgc2hhcmUgMzIlIG9mIHRoZWlyIHZhcmlhbmNlIHdpdGggImMyLSB2YXMiDQoNCg0KDQoNCg==