# install libraries for data manipulation, clustering and data plots
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.1
## -- Attaching packages --------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.0 v purrr 0.3.2
## v tibble 2.1.1 v dplyr 0.8.0.1
## v tidyr 0.8.3 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## Warning: package 'readr' was built under R version 3.6.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(cluster)
## Warning: package 'cluster' was built under R version 3.6.1
library(factoextra)
## Warning: package 'factoextra' was built under R version 3.6.1
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
library(car)
## Warning: package 'car' was built under R version 3.6.1
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
library(Rcmdr)
## Warning: package 'Rcmdr' was built under R version 3.6.1
## Loading required package: splines
## Loading required package: RcmdrMisc
## Loading required package: sandwich
## Loading required package: effects
## Registered S3 methods overwritten by 'lme4':
## method from
## cooks.distance.influence.merMod car
## influence.merMod car
## dfbeta.influence.merMod car
## dfbetas.influence.merMod car
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
## The Commander GUI is launched only in interactive sessions
##
## Attaching package: 'Rcmdr'
## The following object is masked from 'package:car':
##
## Confint
## The following object is masked from 'package:base':
##
## errorCondition
library(ggplot2)
library(QuantPsyc)
## Warning: package 'QuantPsyc' was built under R version 3.6.1
## Loading required package: boot
##
## Attaching package: 'boot'
## The following object is masked from 'package:car':
##
## logit
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
##
## Attaching package: 'QuantPsyc'
## The following object is masked from 'package:base':
##
## norm
# read csv file
nchsdf <- read.csv("C:/Users/Owner/Desktop/Lenin Files/Data Sciences/Assignments/DSC520 Final Project/NCHS_Leading_Causes_of_Death__United_States.csv", header = TRUE)
# remove unwanted columns
nchsdf1 <- subset(nchsdf, Cause_Name!="All causes" , select = c("Year", "Cause_Name", "State", "Deaths"))
nchsdfmod <- subset(nchsdf1, State!="United States")
# Structure of the final dataset
head(nchsdfmod)
## Year Cause_Name State Deaths
## 1 2012 Kidney disease Vermont 21
## 2 2016 Kidney disease Vermont 30
## 3 2013 Kidney disease Vermont 30
## 4 2000 Suicide District of Columbia 23
## 5 2014 Kidney disease Arizona 325
## 6 2009 Suicide District of Columbia 29
str(nchsdfmod)
## 'data.frame': 9180 obs. of 4 variables:
## $ Year : int 2012 2016 2013 2000 2014 2009 2011 2015 2014 2013 ...
## $ Cause_Name: Factor w/ 11 levels "All causes","Alzheimer's disease",..: 8 8 8 10 8 10 8 8 8 8 ...
## $ State : Factor w/ 52 levels "Alabama","Alaska",..: 47 47 47 9 3 9 42 47 47 3 ...
## $ Deaths : int 21 30 30 23 325 29 49 39 37 374 ...
summary(nchsdfmod)
## Year Cause_Name State
## Min. :1999 Alzheimer's disease : 918 Alabama : 180
## 1st Qu.:2003 Cancer : 918 Alaska : 180
## Median :2008 CLRD : 918 Arizona : 180
## Mean :2008 Diabetes : 918 Arkansas : 180
## 3rd Qu.:2012 Heart disease : 918 California: 180
## Max. :2016 Influenza and pneumonia: 918 Colorado : 180
## (Other) :3672 (Other) :8100
## Deaths
## Min. : 21
## 1st Qu.: 526
## Median : 1402
## Mean : 3725
## 3rd Qu.: 3228
## Max. :71930
##
There are 10 leading causes of death in United States, which are Heart disease, stroke, cancer, diabetes, influenza/pneumonia, suicide, stroke, CLRD (Chronic lower respiratory diseases), unintentional injuries and kidney disease
# Aggregate data by Cause_name and plot a graph
nchshist_agr <- aggregate(nchsdfmod$Deaths,by=list(Disease=nchsdfmod$Cause_Name), FUN=sum)
nchshist_ind <- ggplot(nchshist_agr, aes(x, Disease, colour = Disease)) + geom_point() + geom_smooth(method = "lm", aes(fill = Disease)) + labs( x = "No of.Deaths", y = "Disease", colour = "Disease")
nchshist_ind

The scatter plot between the cause of death and death rate indicates that the major cause of death since 1999 in the United States is the Heart Disease (27 Million+ deaths) followed by Cancer (20 Million + deaths) which is the second. These two causes are far ahead of the third cause stroke, which has a death rate of 5 Million+. Suicide accounts for less than 2.5 Million deaths, which is the cause of least deaths.
# Aggregate data by State and plot a graph
nchshist_agrs <- aggregate(nchsdfmod$Deaths,by=list(State=nchsdfmod$State), FUN=sum)
nchshist_inds <- ggplot(nchshist_agrs, aes(x, State)) + geom_point() + geom_smooth(method = "lm", aes(fill = State)) + labs( x = "No of Deaths", y = "State")
nchshist_inds

Majority of deaths occurred in California followed by Florida and a very few deaths occurred in Alaska and Wyoming
# Aggregate data by Year and plot a graph to see how the death rate is changing
nchshist_agry <- aggregate(nchsdfmod$Deaths,by=list(Year=nchsdfmod$Year), FUN=sum)
nchshist_indy <- ggplot(nchshist_agry, aes(Year, x)) + geom_point() + geom_smooth(method = "lm", aes(fill = Year)) + labs( x = "Year", y = "No of.Deaths", colour = "Year")
nchshist_indy

The total number of deaths has gradually decreased from 1999 to 2008 reaching the least in 2008, and then increased from 2008 to 2017 in a linear pattern. However, there is a big jump in death rate from 2014 (3.855 Million+) to 2015 (4.5 Million +)
# Aggregate data by state and cause_name to find out which disease is causing maximum deaths in each state
nchshist_max <- aggregate(nchsdfmod$Deaths, by=list(State=nchsdfmod$State, Cause=nchsdfmod$Cause_Name, Year=nchsdfmod$Year), max)
nchshist_maxy <- ggplot(nchshist_max, aes(x, State, color = Cause)) + geom_point() + geom_smooth(method = "lm", aes(fill = x)) + labs( x = "Deaths", y = "State", colour = "Cause")
nchshist_maxy

Majority of the states have heart disease as the leading cause of death followed by cancer with an exception to Washington where cancer is the leading cause of death
# Develop regression models using different predictors
nchsdfMult1 <- lm(Deaths ~ Cause_Name, data = nchsdfmod)
summary(nchsdfMult1)
##
## Call:
## lm(formula = Deaths ~ Cause_Name, data = nchsdfmod)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12046 -1252 -451 455 59321
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1496.09 192.75 7.762 9.27e-15 ***
## Cause_NameCancer 9663.53 272.59 35.451 < 2e-16 ***
## Cause_NameCLRD 1156.12 272.59 4.241 2.24e-05 ***
## Cause_NameDiabetes -62.13 272.59 -0.228 0.81972
## Cause_NameHeart disease 11113.04 272.59 40.768 < 2e-16 ***
## Cause_NameInfluenza and pneumonia -364.32 272.59 -1.337 0.18142
## Cause_NameKidney disease -615.94 272.59 -2.260 0.02387 *
## Cause_NameStroke 1314.52 272.59 4.822 1.44e-06 ***
## Cause_NameSuicide -788.20 272.59 -2.892 0.00384 **
## Cause_NameUnintentional injuries 876.33 272.59 3.215 0.00131 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5840 on 9170 degrees of freedom
## Multiple R-squared: 0.3357, Adjusted R-squared: 0.335
## F-statistic: 514.9 on 9 and 9170 DF, p-value: < 2.2e-16
nchsdfMult2 <- lm(Deaths ~ State + Cause_Name, data = nchsdfmod)
summary(nchsdfMult2)
##
## Call:
## lm(formula = Deaths ~ State + Cause_Name, data = nchsdfmod)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14189 -1220 424 1273 44227
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1382.38 359.67 3.843 0.000122 ***
## StateAlaska -3350.66 468.96 -7.145 9.70e-13 ***
## StateArizona -91.40 468.96 -0.195 0.845476
## StateArkansas -1352.41 468.96 -2.884 0.003938 **
## StateCalifornia 15207.77 468.96 32.429 < 2e-16 ***
## StateColorado -1320.30 468.96 -2.815 0.004882 **
## StateConnecticut -1410.09 468.96 -3.007 0.002647 **
## StateDelaware -3039.79 468.96 -6.482 9.52e-11 ***
## StateDistrict of Columbia -3243.87 468.96 -6.917 4.92e-12 ***
## StateFlorida 9616.17 468.96 20.505 < 2e-16 ***
## StateGeorgia 1517.08 468.96 3.235 0.001221 **
## StateHawaii -2898.48 468.96 -6.181 6.66e-10 ***
## StateIdaho -2773.34 468.96 -5.914 3.46e-09 ***
## StateIllinois 4317.96 468.96 9.208 < 2e-16 ***
## StateIndiana 774.27 468.96 1.651 0.098764 .
## StateIowa -1421.54 468.96 -3.031 0.002442 **
## StateKansas -1742.09 468.96 -3.715 0.000205 ***
## StateKentucky -346.28 468.96 -0.738 0.460285
## StateLouisiana -426.76 468.96 -0.910 0.362837
## StateMaine -2633.23 468.96 -5.615 2.02e-08 ***
## StateMaryland -365.05 468.96 -0.778 0.436338
## StateMassachusetts 420.19 468.96 0.896 0.370274
## StateMichigan 3273.25 468.96 6.980 3.16e-12 ***
## StateMinnesota -778.74 468.96 -1.661 0.096832 .
## StateMississippi -1339.82 468.96 -2.857 0.004286 **
## StateMissouri 769.35 468.96 1.641 0.100925
## StateMontana -2951.82 468.96 -6.294 3.23e-10 ***
## StateNebraska -2466.61 468.96 -5.260 1.48e-07 ***
## StateNevada -2146.41 468.96 -4.577 4.78e-06 ***
## StateNew Hampshire -2810.11 468.96 -5.992 2.15e-09 ***
## StateNew Jersey 1809.85 468.96 3.859 0.000114 ***
## StateNew Mexico -2458.39 468.96 -5.242 1.62e-07 ***
## StateNew York 8319.75 468.96 17.741 < 2e-16 ***
## StateNorth Carolina 2211.76 468.96 4.716 2.44e-06 ***
## StateNorth Dakota -3146.93 468.96 -6.710 2.06e-11 ***
## StateOhio 4798.40 468.96 10.232 < 2e-16 ***
## StateOklahoma -752.50 468.96 -1.605 0.108612
## StateOregon -1248.88 468.96 -2.663 0.007756 **
## StatePennsylvania 6142.75 468.96 13.099 < 2e-16 ***
## StateRhode Island -2868.52 468.96 -6.117 9.94e-10 ***
## StateSouth Carolina -574.99 468.96 -1.226 0.220189
## StateSouth Dakota -3048.23 468.96 -6.500 8.45e-11 ***
## StateTennessee 960.83 468.96 2.049 0.040504 *
## StateTexas 8738.95 468.96 18.635 < 2e-16 ***
## StateUtah -2605.06 468.96 -5.555 2.85e-08 ***
## StateVermont -3204.16 468.96 -6.832 8.88e-12 ***
## StateVirginia 834.14 468.96 1.779 0.075322 .
## StateWashington 111.60 468.96 0.238 0.811907
## StateWest Virginia -1970.78 468.96 -4.202 2.67e-05 ***
## StateWisconsin 44.01 468.96 0.094 0.925232
## StateWyoming -3281.68 468.96 -6.998 2.78e-12 ***
## Cause_NameCancer 9663.53 207.66 46.536 < 2e-16 ***
## Cause_NameCLRD 1156.12 207.66 5.567 2.66e-08 ***
## Cause_NameDiabetes -62.13 207.66 -0.299 0.764809
## Cause_NameHeart disease 11113.04 207.66 53.516 < 2e-16 ***
## Cause_NameInfluenza and pneumonia -364.32 207.66 -1.754 0.079395 .
## Cause_NameKidney disease -615.94 207.66 -2.966 0.003024 **
## Cause_NameStroke 1314.52 207.66 6.330 2.56e-10 ***
## Cause_NameSuicide -788.20 207.66 -3.796 0.000148 ***
## Cause_NameUnintentional injuries 876.33 207.66 4.220 2.47e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4449 on 9120 degrees of freedom
## Multiple R-squared: 0.6166, Adjusted R-squared: 0.6141
## F-statistic: 248.6 on 59 and 9120 DF, p-value: < 2.2e-16
nchsdfMult3 <- lm(Deaths ~ State + Year + Cause_Name, data = nchsdfmod)
summary(nchsdfMult3)
##
## Call:
## lm(formula = Deaths ~ State + Year + Cause_Name, data = nchsdfmod)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14132 -1218 431 1255 44292
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -13905.157 17971.033 -0.774 0.439096
## StateAlaska -3350.661 468.966 -7.145 9.71e-13
## StateArizona -91.400 468.966 -0.195 0.845478
## StateArkansas -1352.406 468.966 -2.884 0.003938
## StateCalifornia 15207.767 468.966 32.428 < 2e-16
## StateColorado -1320.300 468.966 -2.815 0.004883
## StateConnecticut -1410.089 468.966 -3.007 0.002647
## StateDelaware -3039.789 468.966 -6.482 9.53e-11
## StateDistrict of Columbia -3243.872 468.966 -6.917 4.92e-12
## StateFlorida 9616.172 468.966 20.505 < 2e-16
## StateGeorgia 1517.083 468.966 3.235 0.001221
## StateHawaii -2898.483 468.966 -6.181 6.66e-10
## StateIdaho -2773.344 468.966 -5.914 3.46e-09
## StateIllinois 4317.961 468.966 9.207 < 2e-16
## StateIndiana 774.272 468.966 1.651 0.098769
## StateIowa -1421.539 468.966 -3.031 0.002443
## StateKansas -1742.089 468.966 -3.715 0.000205
## StateKentucky -346.283 468.966 -0.738 0.460292
## StateLouisiana -426.761 468.966 -0.910 0.362844
## StateMaine -2633.233 468.966 -5.615 2.02e-08
## StateMaryland -365.050 468.966 -0.778 0.436345
## StateMassachusetts 420.189 468.966 0.896 0.370282
## StateMichigan 3273.250 468.966 6.980 3.16e-12
## StateMinnesota -778.744 468.966 -1.661 0.096837
## StateMississippi -1339.817 468.966 -2.857 0.004287
## StateMissouri 769.350 468.966 1.641 0.100931
## StateMontana -2951.817 468.966 -6.294 3.23e-10
## StateNebraska -2466.606 468.966 -5.260 1.48e-07
## StateNevada -2146.406 468.966 -4.577 4.78e-06
## StateNew Hampshire -2810.106 468.966 -5.992 2.15e-09
## StateNew Jersey 1809.850 468.966 3.859 0.000115
## StateNew Mexico -2458.389 468.966 -5.242 1.62e-07
## StateNew York 8319.750 468.966 17.741 < 2e-16
## StateNorth Carolina 2211.761 468.966 4.716 2.44e-06
## StateNorth Dakota -3146.928 468.966 -6.710 2.06e-11
## StateOhio 4798.400 468.966 10.232 < 2e-16
## StateOklahoma -752.500 468.966 -1.605 0.108618
## StateOregon -1248.883 468.966 -2.663 0.007757
## StatePennsylvania 6142.750 468.966 13.099 < 2e-16
## StateRhode Island -2868.522 468.966 -6.117 9.95e-10
## StateSouth Carolina -574.994 468.966 -1.226 0.220196
## StateSouth Dakota -3048.233 468.966 -6.500 8.46e-11
## StateTennessee 960.833 468.966 2.049 0.040507
## StateTexas 8738.950 468.966 18.635 < 2e-16
## StateUtah -2605.056 468.966 -5.555 2.86e-08
## StateVermont -3204.156 468.966 -6.832 8.89e-12
## StateVirginia 834.139 468.966 1.779 0.075326
## StateWashington 111.600 468.966 0.238 0.811909
## StateWest Virginia -1970.783 468.966 -4.202 2.67e-05
## StateWisconsin 44.011 468.966 0.094 0.925233
## StateWyoming -3281.683 468.966 -6.998 2.79e-12
## Year 7.615 8.950 0.851 0.394877
## Cause_NameCancer 9663.534 207.662 46.535 < 2e-16
## Cause_NameCLRD 1156.115 207.662 5.567 2.66e-08
## Cause_NameDiabetes -62.127 207.662 -0.299 0.764812
## Cause_NameHeart disease 11113.040 207.662 53.515 < 2e-16
## Cause_NameInfluenza and pneumonia -364.317 207.662 -1.754 0.079399
## Cause_NameKidney disease -615.939 207.662 -2.966 0.003024
## Cause_NameStroke 1314.519 207.662 6.330 2.57e-10
## Cause_NameSuicide -788.202 207.662 -3.796 0.000148
## Cause_NameUnintentional injuries 876.331 207.662 4.220 2.47e-05
##
## (Intercept)
## StateAlaska ***
## StateArizona
## StateArkansas **
## StateCalifornia ***
## StateColorado **
## StateConnecticut **
## StateDelaware ***
## StateDistrict of Columbia ***
## StateFlorida ***
## StateGeorgia **
## StateHawaii ***
## StateIdaho ***
## StateIllinois ***
## StateIndiana .
## StateIowa **
## StateKansas ***
## StateKentucky
## StateLouisiana
## StateMaine ***
## StateMaryland
## StateMassachusetts
## StateMichigan ***
## StateMinnesota .
## StateMississippi **
## StateMissouri
## StateMontana ***
## StateNebraska ***
## StateNevada ***
## StateNew Hampshire ***
## StateNew Jersey ***
## StateNew Mexico ***
## StateNew York ***
## StateNorth Carolina ***
## StateNorth Dakota ***
## StateOhio ***
## StateOklahoma
## StateOregon **
## StatePennsylvania ***
## StateRhode Island ***
## StateSouth Carolina
## StateSouth Dakota ***
## StateTennessee *
## StateTexas ***
## StateUtah ***
## StateVermont ***
## StateVirginia .
## StateWashington
## StateWest Virginia ***
## StateWisconsin
## StateWyoming ***
## Year
## Cause_NameCancer ***
## Cause_NameCLRD ***
## Cause_NameDiabetes
## Cause_NameHeart disease ***
## Cause_NameInfluenza and pneumonia .
## Cause_NameKidney disease **
## Cause_NameStroke ***
## Cause_NameSuicide ***
## Cause_NameUnintentional injuries ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4449 on 9119 degrees of freedom
## Multiple R-squared: 0.6166, Adjusted R-squared: 0.6141
## F-statistic: 244.4 on 60 and 9119 DF, p-value: < 2.2e-16
# Create Diagnostics - Add residuals, cooks distance and covariance ratio to the dataset
nchsdfmod$residuals <- resid(nchsdfMult1)
nchsdfmod$standardized.residuals <- rstandard(nchsdfMult1)
nchsdfmod$cooks.distance <- cooks.distance(nchsdfMult1)
nchsdfmod$covariance.ratios <- covratio(nchsdfMult1)
# Update dataset with Diagnostic information
write.table(nchsdfmod, "Data with Diagnostics", sep="\t", row.names = FALSE)
# Find the residuals with value greater 2 or smaller than -2
nchsdfmod$large.residual <- nchsdfmod$standardized.residuals > 2 | nchsdfmod$standardized.residuals < -2
sum(nchsdfmod$large.residual)
## [1] 295
# Find cooks distance for large residuals
nchsdfmod[nchsdfmod$large.residual, c("cooks.distance", "covariance.ratios")]
## cooks.distance covariance.ratios
## 5126 0.0005893342 0.9962923
## 5176 0.0006340175 0.9958470
## 5191 0.0004806790 0.9973760
## 5254 0.0005301365 0.9968826
## 6065 0.0004416524 0.9977655
## 6362 0.0004786415 0.9973963
## 6635 0.0005223386 0.9969604
## 7001 0.0006337906 0.9958492
## 7183 0.0007088573 0.9951014
## 7231 0.0007025835 0.9951639
## 7314 0.0007470852 0.9947208
## 7359 0.0007566022 0.9946261
## 7370 0.0007348129 0.9948430
## 7526 0.0004535177 0.9976471
## 7538 0.0004533653 0.9976486
## 7558 0.0074844742 0.9296513
## 7567 0.0004440426 0.9977416
## 7574 0.0066499430 0.9374976
## 7584 0.0075198058 0.9293204
## 7587 0.0076740254 0.9278773
## 7594 0.0071469225 0.9328178
## 7606 0.0075852619 0.9287077
## 7612 0.0034963206 0.9676895
## 7613 0.0004477449 0.9977047
## 7619 0.0035082864 0.9675733
## 7624 0.0069373369 0.9347888
## 7628 0.0018758714 0.9835411
## 7641 0.0018328819 0.9839648
## 7643 0.0026985263 0.9754648
## 7652 0.0025025850 0.9773830
## 7657 0.0004510066 0.9976721
## 7660 0.0034503487 0.9681360
## 7661 0.0028898162 0.9735954
## 7664 0.0004590208 0.9975921
## 7670 0.0034578156 0.9680635
## 7678 0.0069260163 0.9348954
## 7692 0.0032576470 0.9700097
## 7695 0.0004534415 0.9976478
## 7699 0.0018795927 0.9835044
## 7701 0.0072798769 0.9315694
## 7703 0.0065654491 0.9382954
## 7716 0.0032884574 0.9697100
## 7719 0.0024537785 0.9778613
## 7722 0.0027885788 0.9745843
## 7723 0.0025653551 0.9767681
## 7726 0.0069993143 0.9342056
## 7738 0.0004429123 0.9977529
## 7742 0.0004606322 0.9975761
## 7772 0.0031913087 0.9706555
## 7774 0.0019336511 0.9829719
## 7798 0.0065666089 0.9382844
## 7800 0.0023772809 0.9786114
## 7835 0.0064333311 0.9395440
## 7836 0.0060642456 0.9430400
## 7837 0.0030816964 0.9717234
## 7841 0.0004529083 0.9976531
## 7842 0.0004369083 0.9978128
## 7843 0.0071198724 0.9330720
## 7849 0.0019563753 0.9827481
## 7861 0.0004624771 0.9975576
## 7867 0.0023304089 0.9790713
## 7870 0.0018915567 0.9833866
## 7881 0.0029818100 0.9726975
## 7892 0.0067824874 0.9362475
## 7896 0.0021957777 0.9803933
## 7898 0.0027157463 0.9752963
## 7912 0.0018856476 0.9834448
## 7925 0.0005602094 0.9965827
## 7930 0.0061551440 0.9421779
## 7931 0.0026300965 0.9761343
## 7940 0.0018523888 0.9837725
## 7942 0.0019105904 0.9831990
## 7943 0.0059130586 0.9444754
## 7950 0.0009615856 0.9925876
## 7971 0.0029401428 0.9731041
## 7974 0.0004924771 0.9972583
## 7976 0.0020907604 0.9814257
## 7986 0.0004595576 0.9975868
## 7996 0.0028372574 0.9741087
## 8003 0.0018732377 0.9835671
## 8004 0.0069500071 0.9346696
## 8011 0.0009844665 0.9923603
## 8022 0.0005879856 0.9963057
## 8028 0.0031084697 0.9714625
## 8052 0.0060770702 0.9429183
## 8053 0.0005697351 0.9964877
## 8060 0.0057926117 0.9456204
## 8064 0.0004641716 0.9975407
## 8068 0.0028148116 0.9743280
## 8072 0.0009839053 0.9923659
## 8075 0.0004933510 0.9972496
## 8077 0.0026786740 0.9756589
## 8078 0.0018940475 0.9833620
## 8081 0.0019297195 0.9830106
## 8082 0.0026678499 0.9757648
## 8099 0.0009638060 0.9925656
## 8100 0.0004581779 0.9976005
## 8104 0.0024372325 0.9780235
## 8107 0.0027776302 0.9746913
## 8109 0.0018628759 0.9836692
## 8110 0.0004787572 0.9973952
## 8119 0.0022396724 0.9799621
## 8122 0.0005445252 0.9967391
## 8124 0.0030146267 0.9723774
## 8127 0.0005688813 0.9964962
## 8131 0.0018305848 0.9839875
## 8144 0.0020789142 0.9815422
## 8149 0.0004653855 0.9975286
## 8167 0.0019051202 0.9832529
## 8168 0.0006590794 0.9955972
## 8178 0.0059624133 0.9440066
## 8186 0.0027395745 0.9750633
## 8197 0.0005283243 0.9969007
## 8201 0.0006487399 0.9957003
## 8213 0.0010084118 0.9921225
## 8218 0.0005749569 0.9964356
## 8220 0.0028130072 0.9743456
## 8221 0.0011245797 0.9909694
## 8224 0.0018099753 0.9841906
## 8227 0.0074055883 0.9303904
## 8228 0.0012023150 0.9901985
## 8251 0.0059138841 0.9444676
## 8256 0.0006119112 0.9960673
## 8260 0.0010769766 0.9914418
## 8263 0.0019910297 0.9824069
## 8264 0.0006521164 0.9956666
## 8267 0.0031694130 0.9708688
## 8268 0.0012087754 0.9901344
## 8273 0.0010068216 0.9921383
## 8276 0.0004904147 0.9972788
## 8281 0.0029789763 0.9727251
## 8282 0.0027728246 0.9747383
## 8285 0.0005335185 0.9968489
## 8293 0.0017126739 0.9851504
## 8300 0.0011579420 0.9906385
## 8308 0.0026329429 0.9761064
## 8330 0.0058537753 0.9450388
## 8332 0.0020131242 0.9821895
## 8355 0.0021049358 0.9812863
## 8360 0.0010250697 0.9919571
## 8361 0.0032452009 0.9701309
## 8362 0.0025534992 0.9768842
## 8366 0.0004881985 0.9973010
## 8368 0.0005430655 0.9967536
## 8373 0.0056459465 0.9470163
## 8374 0.0016606539 0.9856639
## 8386 0.0010052327 0.9921540
## 8391 0.0006364946 0.9958223
## 8412 0.0025059872 0.9773496
## 8415 0.0077107436 0.9275341
## 8418 0.0004859873 0.9973230
## 8419 0.0020761416 0.9815695
## 8427 0.0020816081 0.9815157
## 8433 0.0010146710 0.9920603
## 8435 0.0031619639 0.9709413
## 8436 0.0056215063 0.9472491
## 8437 0.0006256183 0.9959307
## 8438 0.0005526972 0.9966576
## 8442 0.0007012069 0.9951776
## 8454 0.0004596343 0.9975860
## 8462 0.0011953769 0.9902672
## 8465 0.0016504624 0.9857645
## 8476 0.0006628966 0.9955592
## 8479 0.0031130598 0.9714178
## 8480 0.0005372442 0.9968117
## 8485 0.0010203780 0.9920036
## 8489 0.0011490705 0.9907264
## 8495 0.0024970389 0.9774373
## 8502 0.0006205272 0.9959814
## 8503 0.0006110701 0.9960756
## 8508 0.0032157116 0.9704179
## 8509 0.0006517047 0.9956707
## 8514 0.0021331857 0.9810085
## 8524 0.0005346762 0.9968373
## 8534 0.0004614004 0.9975684
## 8557 0.0005998006 0.9961880
## 8558 0.0005280359 0.9969035
## 8569 0.0006264240 0.9959226
## 8570 0.0010385173 0.9918235
## 8572 0.0023888094 0.9784983
## 8592 0.0025136955 0.9772741
## 8598 0.0007650386 0.9945421
## 8599 0.0021264966 0.9810743
## 8605 0.0006129739 0.9960567
## 8606 0.0005484991 0.9966995
## 8610 0.0021701989 0.9806447
## 8611 0.0016939191 0.9853355
## 8615 0.0005881152 0.9963045
## 8621 0.0015958278 0.9863041
## 8647 0.0032556050 0.9700296
## 8648 0.0012407068 0.9898179
## 8666 0.0005517723 0.9966668
## 8674 0.0005806762 0.9963786
## 8682 0.0010903466 0.9913091
## 8690 0.0005580513 0.9966042
## 8691 0.0010677791 0.9915330
## 8692 0.0014927713 0.9873227
## 8696 0.0006336991 0.9958501
## 8697 0.0015690679 0.9865685
## 8706 0.0022392501 0.9799663
## 8716 0.0006109380 0.9960770
## 8736 0.0004644800 0.9975376
## 8738 0.0005870316 0.9963153
## 8741 0.0005665793 0.9965192
## 8752 0.0087426210 0.9179337
## 8755 0.0004752405 0.9974303
## 8776 0.0011170933 0.9910437
## 8778 0.0036287297 0.9664045
## 8779 0.0033543418 0.9690691
## 8782 0.0022145119 0.9802093
## 8787 0.0005158556 0.9970250
## 8791 0.0004673173 0.9975093
## 8797 0.0013129400 0.9891023
## 8813 0.0013714027 0.9885234
## 8814 0.0011180503 0.9910342
## 8817 0.0006078464 0.9961078
## 8827 0.0005900698 0.9962850
## 8829 0.0087576831 0.9177942
## 8856 0.0011258408 0.9909569
## 8864 0.0005936389 0.9962494
## 8869 0.0021926758 0.9804238
## 8871 0.0006199034 0.9959876
## 8883 0.0006406986 0.9957804
## 8903 0.0006930819 0.9952585
## 8904 0.0011556913 0.9906608
## 8931 0.0005959078 0.9962268
## 8932 0.0006635416 0.9955528
## 8936 0.0038211116 0.9645402
## 8939 0.0006285752 0.9959012
## 8940 0.0087854984 0.9175367
## 8944 0.0011741323 0.9904779
## 8955 0.0006152794 0.9960337
## 8956 0.0014297855 0.9879456
## 8966 0.0004610930 0.9975715
## 8971 0.0006459177 0.9957284
## 8973 0.0006120883 0.9960655
## 8993 0.0038483661 0.9642763
## 9017 0.0006339694 0.9958474
## 9028 0.0040411770 0.9624115
## 9039 0.0007470341 0.9947213
## 9053 0.0024282317 0.9781117
## 9055 0.0004407864 0.9977741
## 9056 0.0007806560 0.9943867
## 9084 0.0007864653 0.9943289
## 9089 0.0101295934 0.9051705
## 9094 0.0043147510 0.9597712
## 9097 0.0042938577 0.9599726
## 9099 0.0024359961 0.9780356
## 9120 0.0017824499 0.9844621
## 9129 0.0101054789 0.9053910
## 9130 0.0043628326 0.9593078
## 9134 0.0008602698 0.9935947
## 9144 0.0099039806 0.9072357
## 9146 0.0018169074 0.9841223
## 9157 0.0008681591 0.9935162
## 9159 0.0046269308 0.9567662
## 9174 0.0045883119 0.9571375
## 9177 0.0045558907 0.9574493
## 9180 0.0009477100 0.9927255
## 9182 0.0099724696 0.9066083
## 9194 0.0027235837 0.9752197
## 9195 0.0005016513 0.9971667
## 9200 0.0004776618 0.9974061
## 9212 0.0020758156 0.9815727
## 9216 0.0048249384 0.9548647
## 9217 0.0049628606 0.9535421
## 9225 0.0010616486 0.9915939
## 9231 0.0010437699 0.9917714
## 9237 0.0010814435 0.9913974
## 9241 0.0050884226 0.9523396
## 9242 0.0022044202 0.9803084
## 9246 0.0030449580 0.9720816
## 9248 0.0112638482 0.8948518
## 9253 0.0005748277 0.9964369
## 9255 0.0029952081 0.9725668
## 9260 0.0023039683 0.9793308
## 9262 0.0011287834 0.9909277
## 9267 0.0029602571 0.9729078
## 9269 0.0011842686 0.9903774
## 9277 0.0006318533 0.9958685
## 9278 0.0030382484 0.9721470
## 9281 0.0058271114 0.9452923
## 9283 0.0025272572 0.9771412
## 9291 0.0012604466 0.9896223
## 9296 0.0006533500 0.9956543
## 9301 0.0013818909 0.9884196
## 9303 0.0012948510 0.9892814
## 9306 0.0062146592 0.9416138
## 9307 0.0027101550 0.9753510
## 9315 0.0062064815 0.9416913
## 9317 0.0013560746 0.9886752
## 9318 0.0006931761 0.9952576
## 9330 0.0007282781 0.9949081
## 9332 0.0064429474 0.9394530
## 9347 0.0068848321 0.9352832
R2 statistics in the first model is 0.3357. It means cause of death accounts for 33% of the variation in the death rate
R2 value of 0.6166 in the second and third plot indicates that year did not account for any variability in the prediction. The state accounts for around 28% of variability in the death rate
The difference between R2 and adjusted R2 is equal to 0.002(0.6166-0.6141), which accounts for less than 0.2%. This shrinkage means that if the model is derived from the population rather than a sample if would accounts for approximately 0.002% less variance in the outcome, which means the model is very good
Since year has no effect on the outcome, I took nchsdfMult1 and nchsdfMult2 for analysis where multiple categorical variables such as Cause_Name and State are taken as predictor variables. The coefficient or beta value of heart disease is 11113.04, which means the change in death score is greater for heart disease than it is for all diseases. Same thing with the Cancer, where the change in death rate is 9663.53
Diagnostic information is also stored in the table. Since there are 9180 observations, we would expect 95% of cases to have standardized residuals within about +-2. This means 459(5%) of cases to have standardized residuals outside this limit. But there are only 295(3.2%) cases which have standardized residuals outside the limit. Therefore our sample appears to conform what we would expect for a fairly accurate model
The relationship is non-linear based on the graph above. Both state and cause of disease appears to be equally important to predict the death rate