Source: https://www.kaggle.com/fedesoriano/heart-failure-prediction
heart_failure_predictions <- read_csv("https://raw.githubusercontent.com/baruab/msdsrepo/main/DATA-607/heart_failure_prediction.csv")
#, col_names = FALSE)
head(heart_failure_predictions)
## # A tibble: 6 x 12
## Age Sex ChestPainType RestingBP Cholesterol FastingBS RestingECG MaxHR
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 40 M ATA 140 289 0 Normal 172
## 2 49 F NAP 160 180 0 Normal 156
## 3 37 M ATA 130 283 0 ST 98
## 4 48 F ASY 138 214 0 Normal 108
## 5 54 M NAP 150 195 0 Normal 122
## 6 39 M NAP 120 339 0 Normal 170
## # ... with 4 more variables: ExerciseAngina <chr>, Oldpeak <dbl>,
## # ST_Slope <chr>, HeartDisease <dbl>
glimpse(heart_failure_predictions)
## Rows: 918
## Columns: 12
## $ Age <dbl> 40, 49, 37, 48, 54, 39, 45, 54, 37, 48, 37, 58, 39, 49,~
## $ Sex <chr> "M", "F", "M", "F", "M", "M", "F", "M", "M", "F", "F", ~
## $ ChestPainType <chr> "ATA", "NAP", "ATA", "ASY", "NAP", "NAP", "ATA", "ATA",~
## $ RestingBP <dbl> 140, 160, 130, 138, 150, 120, 130, 110, 140, 120, 130, ~
## $ Cholesterol <dbl> 289, 180, 283, 214, 195, 339, 237, 208, 207, 284, 211, ~
## $ FastingBS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ RestingECG <chr> "Normal", "Normal", "ST", "Normal", "Normal", "Normal",~
## $ MaxHR <dbl> 172, 156, 98, 108, 122, 170, 170, 142, 130, 120, 142, 9~
## $ ExerciseAngina <chr> "N", "N", "N", "Y", "N", "N", "N", "N", "Y", "N", "N", ~
## $ Oldpeak <dbl> 0.0, 1.0, 0.0, 1.5, 0.0, 0.0, 0.0, 0.0, 1.5, 0.0, 0.0, ~
## $ ST_Slope <chr> "Up", "Flat", "Up", "Flat", "Up", "Up", "Up", "Up", "Fl~
## $ HeartDisease <dbl> 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1~
names(heart_failure_predictions) <- c('Age', 'Sex', 'Chest_Pain_Type', 'Resting_Blood_Pressure', 'Cholesterol', 'Fasting_Blood_Sugar', 'Resting_ECG', 'Max_Heart_Rate', 'Excercise_Angina', 'Old_Peak', 'ST_Slope', 'Heart_Disease')
unique(heart_failure_predictions$Chest_Pain_Type)
## [1] "ATA" "NAP" "ASY" "TA"
pain_types <- c("ATA" = "Atypical Angina",
"NAP"= "Non-Anginal Pain",
"ASY"= "Asymptomatic",
"TA" = "Typical Angina")
heart_failure_predictions$Chest_Pain_Type <- pain_types[heart_failure_predictions$Chest_Pain_Type]
head(heart_failure_predictions)
## # A tibble: 6 x 12
## Age Sex Chest_Pain_Type Resting_Blood_Pressure Cholesterol Fasting_Blood_S~
## <dbl> <chr> <chr> <dbl> <dbl> <dbl>
## 1 40 M Atypical Angina 140 289 0
## 2 49 F Non-Anginal Pain 160 180 0
## 3 37 M Atypical Angina 130 283 0
## 4 48 F Asymptomatic 138 214 0
## 5 54 M Non-Anginal Pain 150 195 0
## 6 39 M Non-Anginal Pain 120 339 0
## # ... with 6 more variables: Resting_ECG <chr>, Max_Heart_Rate <dbl>,
## # Excercise_Angina <chr>, Old_Peak <dbl>, ST_Slope <chr>, Heart_Disease <dbl>
g1 <- ggplot(heart_failure_predictions, aes(Age)) + geom_bar()
g2 <- ggplot(heart_failure_predictions, aes(Resting_Blood_Pressure)) + geom_bar()
g3 <- ggplot(heart_failure_predictions, aes(Max_Heart_Rate)) + geom_bar()
grid.arrange(g1, g2,g3, nrow=3)
# Filter by ECG ST abnormality
ST_cases <-filter(heart_failure_predictions,Resting_ECG=="ST",Cholesterol > 200)
# Heart disease by Age,Resting BP with ST abnormalities
ST_cases %>% ggplot()+
geom_bar(aes(Age,Resting_Blood_Pressure,fill=Heart_Disease),position="dodge",stat = "identity")+
theme(axis.text.x=element_text(angle=0))+
labs(x="Age",y="Resting BP", title="Statistics on heart disease by Age",
subtitle= "Heart disease by Age,Chest Pain Type, ST condition")
# Heart disease by Age,Chest Pain Type, ST condition
heart_failure_predictions %>% ggplot()+
geom_bar(aes(Age,Resting_Blood_Pressure,fill=Heart_Disease),position="dodge",stat = "identity")+
theme(axis.text.x=element_text(angle=0))+
labs(x="Age",y="Resting Blood Pressure", title="Statistics on heart disease by Age",
subtitle= "Heart disease by Age,Chest Pain Type, ST condition")
# Scatter Plot by Age, Resting BP and Chest Pain Type
s1 <- ggplot(heart_failure_predictions, aes( Resting_Blood_Pressure, Age, color = Excercise_Angina)) +
geom_point()
s2 <- ggplot(heart_failure_predictions, aes( Max_Heart_Rate, Age, color = Excercise_Angina)) +
geom_point()
s3 <- ggplot(heart_failure_predictions, aes( Resting_Blood_Pressure, Age, color = Sex)) +
geom_point()
s4 <- ggplot(heart_failure_predictions, aes( Max_Heart_Rate, Age, color = Sex)) +
geom_point()
grid.arrange(s1, s2, s3, s4, nrow = 4)