MATH 1324 Introduction to Statistics Assignment 3

What segment of people are vulnerable to Heart Attacks.

Eswar Phani Paruchuri(s3798488) | Madisetty Sujay Kamal(s3794983) |Kuntachikanahalli Srinivasa Reddy Harshitha Reddy(s3797186)

Last updated: 27 October, 2019

Introduction

Problem Statement

Data

Data Cont.

Descriptive Statistics and Visualisation

#Reading the data from csv file:
Heart_Incidences <- read_csv("C:/Users/Dick Smith/Desktop/Intro 2 Stat/Assignment 3 Data Sets/heart.csv")

#Summarising the Data:
summary(Heart_Incidences)
##       age             sex              chol        thalach     
##  Min.   :29.00   Min.   :0.0000   Min.   :126   Min.   : 71.0  
##  1st Qu.:48.00   1st Qu.:0.0000   1st Qu.:211   1st Qu.:132.0  
##  Median :56.00   Median :1.0000   Median :240   Median :152.0  
##  Mean   :54.43   Mean   :0.6956   Mean   :246   Mean   :149.1  
##  3rd Qu.:61.00   3rd Qu.:1.0000   3rd Qu.:275   3rd Qu.:166.0  
##  Max.   :77.00   Max.   :1.0000   Max.   :564   Max.   :202.0  
##      target      
##  Min.   :0.0000  
##  1st Qu.:0.0000  
##  Median :1.0000  
##  Mean   :0.5132  
##  3rd Qu.:1.0000  
##  Max.   :1.0000
dim(Heart_Incidences)
## [1] 1025    5
#Checking the classes of the Data:
str(Heart_Incidences)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 1025 obs. of  5 variables:
##  $ age    : num  52 53 70 61 62 58 58 55 46 54 ...
##  $ sex    : num  1 1 1 1 0 0 1 1 1 1 ...
##  $ chol   : num  212 203 174 203 294 248 318 289 249 286 ...
##  $ thalach: num  168 155 125 161 106 122 140 145 144 116 ...
##  $ target : num  0 0 0 0 0 1 0 0 0 0 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   age = col_double(),
##   ..   sex = col_double(),
##   ..   chol = col_double(),
##   ..   thalach = col_double(),
##   ..   target = col_double()
##   .. )
  Heart_Incidences$sex <- factor(Heart_Incidences$sex,levels = c(0,1),labels=c("Female","Male"))



#Target From Number to Factor:
Heart_Incidences$target<-factor(Heart_Incidences$target,levels=c(0,1),labels=c("NO","YES"))

#Checking if there are any missing values in the data set: 
sum(is.na(Heart_Incidences))
## [1] 0

Decsriptive Statistics Cont.

#Proportions of Male and Females for Analysis:
prop.table(table(Heart_Incidences$sex))
## 
##    Female      Male 
## 0.3043902 0.6956098
#Proportion of people who are targets of heart disease:
prop.table(table(Heart_Incidences$target))
## 
##        NO       YES 
## 0.4868293 0.5131707
plot_sex_bp<- ggplot(data = Heart_Incidences, aes(x=Heart_Incidences$target, y=Heart_Incidences$chol)) + geom_boxplot(aes(fill=Heart_Incidences$target))
plot_sex_bp+ggtitle("Cholestrol variation with Heart Diesease")+xlab("Heart Disease(Yes or No)")+ylab("cholestrol")

plot_sex_bp<- ggplot(data = Heart_Incidences, aes(x=Heart_Incidences$target, y=Heart_Incidences$thalach)) + geom_boxplot(aes(fill=Heart_Incidences$target))
plot_sex_bp+ggtitle("Max Heart Rate  variation with Heart Disease")+xlab("Heart Disease(Yes or No)")+ylab("Max Heart Rate")

#Code used to remove outliers from the boxplots:
outliers <- boxplot(Heart_Incidences$chol, plot=FALSE)$out
Heart_Incidences <- Heart_Incidences[-which(Heart_Incidences$chol %in% outliers),]

plot_sex_bp<- ggplot(data = Heart_Incidences, aes(x=Heart_Incidences$target, y=Heart_Incidences$chol)) + geom_boxplot(aes(fill=Heart_Incidences$target))
plot_sex_bp+ggtitle("Cholestrol variation with Heart Diesease")+xlab("Heart Disease(Yes or No)")+ylab("cholestrol")

outliers <- boxplot(Heart_Incidences$thalach, plot=FALSE)$out
Heart_Incidences <- Heart_Incidences[-which(Heart_Incidences$thalach %in% outliers),]

plot_sex_bp<- ggplot(data = Heart_Incidences, aes(x=Heart_Incidences$target, y=Heart_Incidences$thalach)) + geom_boxplot(aes(fill=Heart_Incidences$target))
plot_sex_bp+ggtitle("Max Heart Rate  variation with Heart Disease")+xlab("Heart Disease(Yes or No)")+ylab("Max Heart Rate")

#knitr::kable(table)

Hypothesis Testing

var(Heart_Incidences$chol[Heart_Incidences$target=="YES"])
## [1] 1921.851
var(Heart_Incidences$chol[Heart_Incidences$target=="NO"])
## [1] 2152.996
t.test(Heart_Incidences$chol~Heart_Incidences$target,
       paired = FALSE,
       var.equal = FALSE,
       alternative = "two.sided")
## 
##  Welch Two Sample t-test
## 
## data:  Heart_Incidences$chol by Heart_Incidences$target
## t = 4.1978, df = 990.04, p-value = 2.938e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   6.373539 17.563614
## sample estimates:
##  mean in group NO mean in group YES 
##          249.1639          237.1954
var(Heart_Incidences$thalach[Heart_Incidences$target=="YES"])
## [1] 371.0126
var(Heart_Incidences$thalach[Heart_Incidences$target=="NO"])
## [1] 479.885
t.test(Heart_Incidences$thalach~Heart_Incidences$target,
       paired = FALSE,
       var.equal = FALSE,
       alternative = "two.sided")
## 
##  Welch Two Sample t-test
## 
## data:  Heart_Incidences$thalach by Heart_Incidences$target
## t = -14.641, df = 969.73, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -21.65517 -16.53632
## sample estimates:
##  mean in group NO mean in group YES 
##          139.5000          158.5957

Hypothesis Testing Result for Cholestrol Level:(ask feedback)

Hypothesis Testing result for Max Heart Rate:(ask feedback)

Discussion

References

-External Refernce on Heart Rates: https://www.health.harvard.edu/heart-health/what-your-heart-rate-is-telling-you