mydata=read.csv("C:/Users/lfult/Google Drive/Courses & Syllabi/Boston College/Health Analytics/midtmreform.csv")
str(mydata)
## 'data.frame': 2170 obs. of 5 variables:
## $ Ctry : Factor w/ 217 levels "ABW","ADO","AFG",..: 3 5 55 9 2 4 10 7 8 1 ...
## $ Yr : int 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## $ LifeExp: num 55.1 74.3 70.2 NA NA ...
## $ Age65 : num 2.24 7.07 4.34 NA NA ...
## $ Cost : num NA 74 61.3 NA 1242.2 ...
mydata$Yr=as.factor(mydata$Yr)
library(psych)
## Warning: package 'psych' was built under R version 3.3.3
describe(mydata[,3:5])
## vars n mean sd median trimmed mad min max
## LifeExp 1 2006 69.92 9.18 72.49 70.81 8.50 38.69 83.98
## Age65 2 1938 7.68 5.24 5.57 7.04 3.87 0.70 25.71
## Cost 3 1891 958.59 1683.49 272.62 519.22 350.58 3.19 9719.99
## range skew kurtosis se
## LifeExp 45.29 -0.79 -0.18 0.21
## Age65 25.01 0.90 -0.40 0.12
## Cost 9716.79 2.67 7.21 38.71
summary(mydata[,3:5])
## LifeExp Age65 Cost
## Min. :38.69 Min. : 0.6969 Min. : 3.195
## 1st Qu.:64.30 1st Qu.: 3.3686 1st Qu.: 66.518
## Median :72.49 Median : 5.5673 Median : 272.619
## Mean :69.92 Mean : 7.6826 Mean : 958.586
## 3rd Qu.:76.59 3rd Qu.:11.7890 3rd Qu.: 851.406
## Max. :83.98 Max. :25.7054 Max. :9719.988
## NA's :164 NA's :232 NA's :279
par(mfrow=c(2,2))
for (i in 3:5){hist(mydata[,i])}
boxplot(scale(mydata[,3:5]), main="Boxplots on Scaled Data", notch=TRUE, col=seq(1:3), horizontal=TRUE)
library(mice)
## Warning: package 'mice' was built under R version 3.3.3
library(VIM)
## Warning: package 'VIM' was built under R version 3.3.3
## Loading required package: colorspace
## Loading required package: grid
## Loading required package: data.table
## VIM is ready to use.
## Since version 4.0.0 the GUI is in its own package VIMGUI.
##
## Please use the package to use the new (and old) GUI.
## Suggestions and bug-reports can be submitted at: https://github.com/alexkowa/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
par(mfrow=c(1,1))
par(ask=TRUE)
aggr(mydata[,3:5], prop = T, numbers = T) #plot missing map
## Warning in plot.aggr(res, ...): not enough horizontal space to display
## frequencies
marginplot(mydata[,3:4])
marginplot(mydata[,c(3,5)])
marginplot(mydata[,4:5])
library(ResourceSelection)
## Warning: package 'ResourceSelection' was built under R version 3.3.3
## ResourceSelection 0.3-2 2017-02-28
kdepairs(na.omit(mydata[,3:5]))
system.time(mi.mydata <- mice(mydata, seed = 1234, printFlag = FALSE))
## user system elapsed
## 19.42 0.25 19.68
mice:::print.mids(mi.mydata)
## Multiply imputed data set
## Call:
## mice(data = mydata, printFlag = FALSE, seed = 1234)
## Number of multiple imputations: 5
## Missing cells per column:
## Ctry Yr LifeExp Age65 Cost
## 0 0 164 232 279
## Imputation methods:
## Ctry Yr LifeExp Age65 Cost
## "" "" "pmm" "pmm" "pmm"
## VisitSequence:
## LifeExp Age65 Cost
## 3 4 5
## PredictorMatrix:
## Ctry Yr LifeExp Age65 Cost
## Ctry 0 0 0 0 0
## Yr 0 0 0 0 0
## LifeExp 1 1 0 1 1
## Age65 1 1 1 0 1
## Cost 1 1 1 1 0
## Random generator seed value: 1234
mi.mydata$imp$LifeExp[,5]=apply(mi.mydata$imp$LifeExp,1,mean)
mi.mydata$imp$Age65[,5]=apply(mi.mydata$imp$Age65,1,mean)
mi.mydata$imp$Cost[,5]=apply(mi.mydata$imp$Cost,1,mean)
mi.mydata$imp$LifeExp
## 1 2 3 4 5
## 4 71.95617 71.01951 71.89883 71.06244 71.67383
## 5 71.69483 72.49144 72.21722 70.40732 71.73102
## 28 71.89883 72.21707 71.40817 70.20244 71.61377
## 37 71.67061 72.25429 72.30129 71.89883 72.05502
## 51 72.97237 72.97237 73.53417 73.71185 73.26521
## 56 71.95617 72.35659 74.59949 71.37815 72.44931
## 75 72.34232 71.88320 73.39729 70.53332 71.96507
## 94 72.34232 71.24939 72.13132 70.40244 71.52237
## 131 70.62395 71.78739 71.96585 71.77976 71.31237
## 138 71.01951 72.16341 73.30105 71.24634 71.98951
## 146 72.10232 72.13132 72.34232 72.86778 72.47758
## 169 68.49761 68.45122 69.02202 69.79756 69.30546
## 172 72.86778 71.71141 71.67246 72.97561 72.29182
## 181 71.72241 72.23185 72.21493 70.77546 71.83154
## 201 71.71141 70.86585 73.39729 70.30529 71.69858
## 202 73.79724 73.72061 72.97561 71.74705 72.89447
## 221 74.32022 73.92573 72.90402 72.88244 73.50649
## 222 72.74980 72.79917 74.90071 72.23185 73.39878
## 245 74.37620 73.53417 74.53659 71.48615 73.65121
## 254 74.00244 73.94500 72.81088 72.70956 73.67372
## 273 74.40976 74.05766 74.01602 73.09695 73.93046
## 292 73.51220 74.20732 72.69146 72.97802 73.50229
## 311 73.80322 73.09695 73.09695 73.09756 72.63314
## 342 68.66651 68.66651 69.65807 65.68488 68.12672
## 348 72.13173 72.47073 72.55885 69.98205 72.13833
## 355 75.14390 74.19439 74.68783 72.98005 74.23676
## 363 73.83959 73.91463 74.53659 72.66341 73.76353
## 367 71.67061 72.16341 75.23144 72.54832 72.88297
## 389 72.65400 72.79917 72.65400 74.84461 73.47514
## 398 75.39971 74.01707 74.59239 73.50076 74.37327
## 418 73.92607 73.46173 75.10198 72.56585 73.82513
## 419 74.09093 74.92685 74.28676 74.20244 74.30402
## 438 74.56995 73.63078 74.44639 70.78590 73.54266
## 439 73.15122 73.30976 74.35641 72.57073 73.39397
## 462 74.28029 73.41220 74.61815 73.46173 73.96847
## 471 75.58780 73.31990 74.01610 73.47624 74.11757
## 490 74.20732 74.97917 75.18666 73.63980 74.46741
## 509 74.00620 73.70161 73.55234 72.96732 73.61449
## 528 75.56585 73.84498 74.18205 70.78590 73.67624
## 559 68.45122 67.86129 69.59422 65.30044 67.93982
## 565 72.41483 72.89837 73.46173 71.24939 72.51941
## 572 73.98537 74.70488 75.16627 73.56427 74.36660
## 580 74.48385 73.22927 73.63078 74.68598 74.11133
## 584 70.90000 73.42102 74.34283 73.54827 73.36232
## 615 74.22634 75.16341 74.42456 74.05946 74.47376
## 635 73.36632 74.77480 73.89239 73.52102 73.92244
## 636 74.42390 74.42202 75.40732 73.80322 74.59666
## 655 74.65310 74.35641 73.98537 74.07171 74.41513
## 656 73.71185 73.84498 74.42390 71.65768 73.43012
## 679 74.34283 74.85854 73.22927 73.56427 74.17738
## 688 75.39971 74.82439 75.23144 74.26073 74.81182
## 707 75.43878 75.51198 75.08624 74.18780 75.00658
## 726 75.39971 74.96829 74.17763 73.19356 74.56052
## 745 74.47110 75.23144 74.30195 73.25039 74.24960
## 776 69.48732 68.42563 70.77546 65.87015 69.06349
## 782 72.80105 72.56585 72.90402 71.69483 72.63816
## 789 74.23351 74.84529 74.91446 74.41220 74.68705
## 797 74.67573 74.82439 74.89198 73.91463 74.57514
## 801 72.57073 73.44576 75.97020 73.42102 73.62180
## 832 73.92573 74.83659 75.04529 73.88537 74.65189
## 852 74.31463 75.20576 74.80327 72.64034 74.51036
## 853 75.76210 74.45210 75.30878 74.40976 74.87289
## 872 75.43878 74.53659 74.86098 74.40976 74.87441
## 873 74.12244 75.16341 75.16627 73.71185 74.67908
## 896 74.51476 74.56895 74.42390 74.20488 74.52089
## 905 75.09802 74.07441 75.39971 74.18780 74.78932
## 924 74.90976 75.15680 75.91702 74.66783 75.10519
## 943 74.68249 74.89198 74.63571 74.42202 74.56197
## 962 74.34283 74.85854 74.05215 73.77073 74.24038
## 993 69.78229 69.46390 72.52622 65.31159 69.25020
## 999 72.57073 74.45154 73.19356 72.10232 73.16378
## 1006 74.53124 74.04390 75.23366 74.67573 74.58733
## 1014 74.56895 74.68249 74.68783 74.00244 74.47876
## 1018 72.67420 72.73224 75.29983 73.09695 73.31165
## 1049 74.90071 75.70561 76.14100 74.65310 75.28914
## 1069 75.38932 75.12361 75.90454 73.51220 74.97664
## 1070 75.51198 76.32683 75.84751 73.74641 75.28803
## 1089 74.90071 74.35185 73.99512 74.07190 74.46540
## 1090 74.58327 73.90000 75.08624 73.56341 74.18073
## 1113 75.00905 74.45210 74.51390 73.99315 74.69603
## 1122 74.46968 75.18666 73.99512 74.32295 74.65980
## 1136 77.62283 75.94634 76.97073 76.10976 76.39841
## 1141 75.30878 77.42268 76.97073 74.89198 76.25669
## 1160 75.02980 75.16627 74.84529 73.77073 74.84827
## 1179 74.70976 74.34390 75.75954 74.51920 74.90763
## 1210 68.22200 69.48732 72.09937 65.96368 69.69881
## 1216 73.34507 73.47624 74.07000 71.91832 73.64968
## 1223 75.53415 75.33395 75.48824 74.28676 75.37447
## 1231 75.45254 74.42456 75.14702 74.05766 74.83360
## 1235 73.36907 73.78551 75.91220 73.15122 73.83266
## 1257 74.42390 74.70488 74.33659 75.69539 74.97327
## 1266 75.20217 74.67571 75.33395 75.06341 75.03759
## 1286 75.12598 75.42927 74.51390 74.52676 74.95961
## 1287 75.56585 75.84751 75.43300 74.86098 75.51610
## 1306 75.04685 74.91271 75.53415 74.79934 75.48690
## 1307 75.37149 75.24390 76.14100 74.12244 75.20049
## 1330 74.66863 76.01866 75.53415 74.41220 75.36039
## 1339 74.14390 75.84751 74.91446 75.23144 75.51200
## 1358 75.43300 76.42812 76.32763 74.05215 75.93272
## 1377 75.23366 74.90071 74.23620 74.51920 74.91897
## 1396 74.75441 75.18666 75.11220 74.96829 75.06626
## 1427 69.56500 70.11078 70.20244 67.37476 69.24054
## 1433 73.30976 73.67161 74.19439 71.71141 73.46671
## 1440 77.14146 76.63659 76.47029 74.84461 76.19407
## 1448 75.91220 75.22644 75.00741 74.86098 75.39311
## 1452 73.71185 73.36632 75.62180 73.85485 74.08365
## 1474 75.42927 74.35185 73.79166 75.64851 75.07792
## 1483 76.68927 75.97020 76.42812 74.81815 75.88993
## 1503 74.04390 75.84751 75.39132 74.60244 75.02377
## 1504 75.39132 76.47029 76.02678 74.80327 75.87619
## 1523 77.23902 75.87741 75.65737 73.99315 75.77360
## 1524 74.51390 75.22644 75.31939 74.52676 74.81123
## 1547 75.87249 76.20049 75.91220 74.33924 75.76384
## 1556 75.31939 74.56751 73.05366 75.20576 74.92439
## 1570 76.60261 76.58956 75.98610 75.62912 76.45611
## 1575 76.69512 76.18071 76.69512 76.97073 76.90267
## 1594 75.58515 75.15680 75.33395 74.35185 75.36799
## 1613 76.33751 75.33395 74.58502 75.14702 75.51192
## 1644 70.27110 69.78205 73.14859 68.59076 70.69118
## 1650 74.07171 74.42390 74.63680 73.90000 74.26079
## 1657 75.49641 75.96293 75.96293 73.99512 75.48883
## 1665 74.58502 75.69512 76.14100 74.90976 75.39054
## 1669 73.26683 74.79934 77.14561 74.32295 74.72513
## 1683 83.58780 83.48049 83.48049 83.58780 83.62341
## 1700 75.49641 76.71463 77.08510 75.79641 76.24402
## 1720 75.65737 77.62283 75.87741 74.84461 75.89744
## 1721 76.49359 75.87249 76.54120 75.20217 76.31652
## 1740 75.91702 76.47029 75.46602 75.14390 75.69485
## 1741 74.56895 75.04529 75.02980 73.86341 74.64344
## 1764 75.46602 76.24859 74.04390 75.12361 75.70099
## 1773 76.16829 76.12702 75.30976 75.02980 75.73233
## 1787 76.86363 77.00000 76.77561 77.47317 76.95753
## 1792 76.18566 76.43324 75.41220 75.95102 75.95216
## 1811 76.24859 77.23902 76.05280 75.30878 76.19387
## 1813 71.05610 72.21302 72.31317 72.61220 71.65439
## 1830 75.48824 75.97020 75.64851 74.46968 75.47106
## 1861 70.59939 69.18349 71.93173 67.63420 69.90654
## 1867 73.56800 72.72439 75.23144 72.51876 73.70274
## 1874 76.49478 76.71463 75.41220 75.81624 76.01340
## 1882 76.05280 75.64851 76.16829 74.65383 75.69727
## 1886 74.20732 74.34390 75.98610 75.58780 74.93901
## 1900 83.48049 83.42195 83.58780 83.32322 83.44708
## 1908 75.90454 77.14146 75.03127 76.46234 76.23834
## 1917 76.16829 76.45610 75.98610 76.20049 76.26707
## 1937 76.68927 75.18705 75.30878 74.51390 75.56531
## 1938 76.39461 76.12017 76.67556 75.32866 76.24282
## 1957 76.49478 76.49359 75.87249 74.04390 75.71165
## 1958 74.86098 76.20049 76.24859 74.42202 75.45520
## 1981 76.71463 77.14561 76.41220 75.32456 76.44672
## 1990 76.46098 75.49641 76.41220 75.42927 76.25440
## 2009 76.35410 76.86363 76.54168 75.70507 76.38412
## 2028 76.12756 76.18071 76.02678 73.99512 75.76475
## 2030 70.90000 73.72061 73.21951 73.18205 72.54432
## 2047 76.54168 76.42812 76.24634 74.99037 75.99703
## 2078 70.80632 70.31463 71.67061 66.96341 69.93202
## 2084 73.25478 73.96585 74.79934 72.03176 73.58303
## 2091 77.03695 77.88537 78.44920 75.48824 77.10734
## 2099 76.65207 76.18566 75.70507 74.65383 75.86473
## 2103 74.51920 74.91446 77.51451 74.68783 75.32303
## 2117 83.83171 83.42195 83.83171 83.98049 83.70927
## 2125 76.10976 76.42812 74.58502 77.12683 76.24507
## 2134 76.97561 76.67695 76.92439 75.07354 76.21254
## 2154 76.33751 76.41220 76.42812 75.84751 76.19765
## 2155 76.52439 76.40763 77.00000 75.34239 76.28040
newdata=complete(mi.mydata, action=5)
aggr(newdata[,3:5], prop = T, numbers = T) #plot missing map
par(ask=FALSE)
plot(newdata$LifeExp~newdata$Yr, notch=TRUE, xlab="2000 Base Year, then 2006-2014", ylab="Life Expectancy", main="Life Expectancy, US = Red Point, Regression=Blue Line")
points(seq(1:10), newdata$LifeExp[newdata$Ctry=="USA"], pch=9, col="red")
lines(seq(1:10), newdata$LifeExp[newdata$Ctry=="USA"])
abline(lm(newdata$LifeExp~as.numeric(newdata$Yr)), col="blue")
par(ask=FALSE)
plot(newdata$Cost~newdata$Yr, notch=TRUE, xlab="2000 Base Year, then 2006-2014", ylab="Expenditures", main="Expenditures, US = Red Point, Regression=Blue Line")
points(seq(1:10), newdata$Cost[newdata$Ctry=="USA"], pch=9, col="red")
lines(seq(1:10), newdata$Cost[newdata$Ctry=="USA"])
abline(lm(newdata$Cost~as.numeric(newdata$Yr)), col="blue")
par(ask=FALSE)
plot(newdata$Age65~newdata$Yr, notch=TRUE, xlab="2000 Base Year, then 2006-2014", ylab="%Age>65", main="%>65 Yrs, US = Red Point, Regression=Blue Line")
points(seq(1:10), newdata$Age65[newdata$Ctry=="USA"], pch=9, col="red")
lines(seq(1:10), newdata$Age65[newdata$Ctry=="USA"])
abline(lm(newdata$Age65~as.numeric(newdata$Yr)), col="blue")
##Correlation Analysis
cor(newdata[,3:5]) #run correlations
## LifeExp Age65 Cost
## LifeExp 1.0000000 0.6781462 0.5298468
## Age65 0.6781462 1.0000000 0.6323416
## Cost 0.5298468 0.6323416 1.0000000
gr <- colorRampPalette(c("#B52127", "white", "#2171B5")) #set a color gradient
cor.plot(newdata[,3:5], number=TRUE,stars=TRUE, upper=FALSE,gr=gr) #plot the correlations
##95% CI for LifeExp / Cost
USdata=subset(newdata,newdata$Ctry=="USA")
ratio=USdata$LifeExp/USdata$Cost
plot(ratio~USdata$Yr, main="Plot of Life Expectancy / Year, 2000: 2016-2014")
t.test(ratio)
##
## One Sample t-test
##
## data: ratio
## t = 14.795, df = 9, p-value = 1.271e-07
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.008635158 0.011752385
## sample estimates:
## mean of x
## 0.01019377
\(H_o: \mu1=\mu2\)
\(H_a: \mu1 \ne mu2\)
\(\alpha=.05\)
Assume both are normally distributed (which does not hold, by the way).
Use a two-sample t-test
USA=newdata$LifeExp[newdata$Ctry=="USA"]/newdata$Cost[newdata$Ctry=="USA"]
SWE=newdata$LifeExp[newdata$Ctry=="SWE"]/newdata$Cost[newdata$Ctry=="SWE"]
#Assume normal distributions of both. This assumption does not actually hold, but just assume it.
t.test(USA, SWE)
##
## Welch Two Sample t-test
##
## data: USA and SWE
## t = -3.1652, df = 10.775, p-value = 0.009215
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.012297791 -0.002194529
## sample estimates:
## mean of x mean of y
## 0.01019377 0.01743993
#p=.009215 < .05, Reject the null. The means are different.
#In fact, .01019377 is the mean of USA, and .01743993 is the mean of SWE.
#This indicates that Sweden's life expectancy for per capita dollars spent is higher than the US. Why would this be?
#NOTE: we could have run a matched test based on year.
t.test(USA, SWE, paired=TRUE) #p=.001 < alpha implies REJECT
##
## Paired t-test
##
## data: USA and SWE
## t = -4.7763, df = 9, p-value = 0.001006
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.010678073 -0.003814247
## sample estimates:
## mean of the differences
## -0.00724616
#Also, normality really doesn't hold here. A better, non-parametric test would be the Wilcoxon Rank Sum Test (or Mann-Whitney).
wilcox.test(USA,SWE)
##
## Wilcoxon rank sum test
##
## data: USA and SWE
## W = 4, p-value = 0.0001299
## alternative hypothesis: true location shift is not equal to 0
#Here, the p-value is also tiny, .00129. Reject the null. Note that it is nearly the same as the t-test in this case.
wilcox.test(USA,SWE, paired=TRUE) #p=.001953 < alpha
##
## Wilcoxon signed rank test
##
## data: USA and SWE
## V = 0, p-value = 0.001953
## alternative hypothesis: true location shift is not equal to 0
par(mfrow=c(1,2))
plot(LifeExp~Cost, data=newdata) #Needs Power Function
plot(LifeExp~Age65, data=newdata) #Needs Power Function
mylm=lm(LifeExp~Cost+Age65, data=newdata)
anova(mylm)
## Analysis of Variance Table
##
## Response: LifeExp
## Df Sum Sq Mean Sq F value Pr(>F)
## Cost 1 48541 48541 1162.96 < 2.2e-16 ***
## Age65 1 33916 33916 812.56 < 2.2e-16 ***
## Residuals 2167 90449 42
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(mylm)
##
## Call:
## lm(formula = LifeExp ~ Cost + Age65, data = newdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.1272 -3.6552 0.6376 4.5629 14.1724
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.125e+01 2.677e-01 228.844 <2e-16 ***
## Cost 9.481e-04 1.130e-04 8.393 <2e-16 ***
## Age65 1.019e+00 3.575e-02 28.505 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.461 on 2167 degrees of freedom
## Multiple R-squared: 0.4769, Adjusted R-squared: 0.4764
## F-statistic: 987.8 on 2 and 2167 DF, p-value: < 2.2e-16
plot(mylm) #these plots are horrific...assumptions do not hold.
#Model is statistically significant, p<.05. Effect size is good, R^2=.4769. All variables in model
#Life Expectancy =61.25+.0009481 x Cost + 1.019 x Age%>65
mylm2=lm(log(LifeExp)~log(Cost), data=newdata)
anova(mylm2)
## Analysis of Variance Table
##
## Response: log(LifeExp)
## Df Sum Sq Mean Sq F value Pr(>F)
## log(Cost) 1 23.362 23.3622 2889.1 < 2.2e-16 ***
## Residuals 2168 17.531 0.0081
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(mylm2)
##
## Call:
## lm(formula = log(LifeExp) ~ log(Cost), data = newdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.41871 -0.02816 0.01620 0.05701 0.22074
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.883220 0.006971 557.03 <2e-16 ***
## log(Cost) 0.062424 0.001161 53.75 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08992 on 2168 degrees of freedom
## Multiple R-squared: 0.5713, Adjusted R-squared: 0.5711
## F-statistic: 2889 on 1 and 2168 DF, p-value: < 2.2e-16
plot(mylm2) #Still ugly...Should do some tranformations
#Again, statistically significant... p<.05... R^2=.5713
#Model: Log(LifeExp)=3.883220 + .062424 x Log(Cost)
library(car)
## Warning: package 'car' was built under R version 3.3.3
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
attach(newdata)
myt=powerTransform(cbind(LifeExp,Cost,Age65)~1)
myt
## Estimated transformation parameters
## LifeExp Cost Age65
## 4.92190768 0.05880928 0.23486885
newLife=LifeExp^myt$lambda[1]
newCost=Cost^myt$lambda[2]
newAge=Age65^myt$lambda[3]
kdepairs(cbind(newLife,newCost,newAge))
newlm=lm(newLife~newCost+newAge)
anova(newlm)
## Analysis of Variance Table
##
## Response: newLife
## Df Sum Sq Mean Sq F value Pr(>F)
## newCost 1 7.1400e+20 7.1400e+20 5723.49 < 2.2e-16 ***
## newAge 1 4.2081e+19 4.2081e+19 337.33 < 2.2e-16 ***
## Residuals 2167 2.7033e+20 1.2475e+17
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(newlm)
##
## Call:
## lm(formula = newLife ~ newCost + newAge)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.121e+09 -2.165e+08 2.473e+07 2.295e+08 9.726e+08
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.274e+09 7.938e+07 -53.83 <2e-16 ***
## newCost 3.083e+09 8.164e+07 37.76 <2e-16 ***
## newAge 8.476e+08 4.615e+07 18.37 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 353200000 on 2167 degrees of freedom
## Multiple R-squared: 0.7366, Adjusted R-squared: 0.7364
## F-statistic: 3030 on 2 and 2167 DF, p-value: < 2.2e-16
plot(newlm)