Heart_BP-Cholestrol

Author

Nupoor K

install.packages(“car”) install.packages(“DescTools”) install.packages(“MASS”)

Exercise 1: Heart BP Cholesterol

# Load data
heartbpchol <- read.csv("/Users/nupoor/Documents/MSDA/DA- Algo/Week 2 exercise/heartbpchol.csv")
head(heartbpchol)
  Cholesterol BP_Status
1         221   Optimal
2         188      High
3         292      High
4         319    Normal
5         205    Normal
6         247      High
str(heartbpchol)
'data.frame':   541 obs. of  2 variables:
 $ Cholesterol: int  221 188 292 319 205 247 202 150 228 280 ...
 $ BP_Status  : chr  "Optimal" "High" "High" "Normal" ...
# Load required packages
library(DescTools) 
library(MASS)
library(car)
Loading required package: carData

Attaching package: 'car'
The following object is masked from 'package:DescTools':

    Recode
# ANOVA analysis
aov.heartbpchol = aov(Cholesterol ~ BP_Status, data = heartbpchol)
summary(aov.heartbpchol)
             Df  Sum Sq Mean Sq F value  Pr(>F)   
BP_Status     2   25211   12605   6.671 0.00137 **
Residuals   538 1016631    1890                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Convert BP_Status to factor
heartbpchol$BP_Status = as.factor(heartbpchol$BP_Status)

# ANOVA using linear model
anova(lm(Cholesterol ~ BP_Status, data = heartbpchol))
Analysis of Variance Table

Response: Cholesterol
           Df  Sum Sq Mean Sq F value   Pr(>F)   
BP_Status   2   25211 12605.4  6.6708 0.001375 **
Residuals 538 1016631  1889.6                    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov.heartbpchol)
             Df  Sum Sq Mean Sq F value  Pr(>F)   
BP_Status     2   25211   12605   6.671 0.00137 **
Residuals   538 1016631    1890                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Table of BP_Status
table(heartbpchol$BP_Status)

   High  Normal Optimal 
    229     245      67 
# Levene's Test for homogeneity of variance
LeveneTest(aov.heartbpchol)
Warning in LeveneTest.default(y = y, group = group, ...): group coerced to
factor.
Levene's Test for Homogeneity of Variance (center = median)
       Df F value Pr(>F)
group   2  0.1825 0.8332
      538               
# Linear regression model
lm.res_heartbpchol = lm(Cholesterol ~ BP_Status, data = heartbpchol)
anova(lm.res_heartbpchol)
Analysis of Variance Table

Response: Cholesterol
           Df  Sum Sq Mean Sq F value   Pr(>F)   
BP_Status   2   25211 12605.4  6.6708 0.001375 **
Residuals 538 1016631  1889.6                    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm.res_heartbpchol)$r.squared
[1] 0.02419833
# Post-hoc tests
ScheffeTest(aov.heartbpchol)

  Posthoc multiple comparisons of means: Scheffe Test 
    95% family-wise confidence level

$BP_Status
                     diff    lwr.ci    upr.ci   pval    
Normal-High    -11.543481 -21.35092 -1.736038 0.0159 *  
Optimal-High   -18.646679 -33.46702 -3.826341 0.0089 ** 
Optimal-Normal  -7.103198 -21.81359  7.607194 0.4958    

---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(aov.heartbpchol)
  Tukey multiple comparisons of means
    95% family-wise confidence level

Fit: aov(formula = Cholesterol ~ BP_Status, data = heartbpchol)

$BP_Status
                     diff       lwr       upr     p adj
Normal-High    -11.543481 -20.93394 -2.153023 0.0111929
Optimal-High   -18.646679 -32.83690 -4.456460 0.0059898
Optimal-Normal  -7.103198 -21.18815  6.981749 0.4624869
# Diagnostic plots
par(mfrow = c(2, 2))
plot(aov.heartbpchol)

par(mfrow = c(1, 1))
plot(aov.heartbpchol, 2)