Heart Failure Prediction Dataset

11 clinical features for predicting heart disease events.

Source: https://www.kaggle.com/fedesoriano/heart-failure-prediction


heart_failure_predictions <- read_csv("https://raw.githubusercontent.com/baruab/msdsrepo/main/DATA-607/heart_failure_prediction.csv")
#,                  col_names = FALSE)

head(heart_failure_predictions)
## # A tibble: 6 x 12
##     Age Sex   ChestPainType RestingBP Cholesterol FastingBS RestingECG MaxHR
##   <dbl> <chr> <chr>             <dbl>       <dbl>     <dbl> <chr>      <dbl>
## 1    40 M     ATA                 140         289         0 Normal       172
## 2    49 F     NAP                 160         180         0 Normal       156
## 3    37 M     ATA                 130         283         0 ST            98
## 4    48 F     ASY                 138         214         0 Normal       108
## 5    54 M     NAP                 150         195         0 Normal       122
## 6    39 M     NAP                 120         339         0 Normal       170
## # ... with 4 more variables: ExerciseAngina <chr>, Oldpeak <dbl>,
## #   ST_Slope <chr>, HeartDisease <dbl>


glimpse () function provides a snapshot of the file in R. It tells number of Rows and Column with listing of the data/types of the columns.
glimpse(heart_failure_predictions)
## Rows: 918
## Columns: 12
## $ Age            <dbl> 40, 49, 37, 48, 54, 39, 45, 54, 37, 48, 37, 58, 39, 49,~
## $ Sex            <chr> "M", "F", "M", "F", "M", "M", "F", "M", "M", "F", "F", ~
## $ ChestPainType  <chr> "ATA", "NAP", "ATA", "ASY", "NAP", "NAP", "ATA", "ATA",~
## $ RestingBP      <dbl> 140, 160, 130, 138, 150, 120, 130, 110, 140, 120, 130, ~
## $ Cholesterol    <dbl> 289, 180, 283, 214, 195, 339, 237, 208, 207, 284, 211, ~
## $ FastingBS      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
## $ RestingECG     <chr> "Normal", "Normal", "ST", "Normal", "Normal", "Normal",~
## $ MaxHR          <dbl> 172, 156, 98, 108, 122, 170, 170, 142, 130, 120, 142, 9~
## $ ExerciseAngina <chr> "N", "N", "N", "Y", "N", "N", "N", "N", "Y", "N", "N", ~
## $ Oldpeak        <dbl> 0.0, 1.0, 0.0, 1.5, 0.0, 0.0, 0.0, 0.0, 1.5, 0.0, 0.0, ~
## $ ST_Slope       <chr> "Up", "Flat", "Up", "Flat", "Up", "Up", "Up", "Up", "Fl~
## $ HeartDisease   <dbl> 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1~


Give descriptive names to the columns

names(heart_failure_predictions) <- c('Age', 'Sex', 'Chest_Pain_Type', 'Resting_Blood_Pressure', 'Cholesterol', 'Fasting_Blood_Sugar', 'Resting_ECG', 'Max_Heart_Rate', 'Excercise_Angina', 'Old_Peak', 'ST_Slope', 'Heart_Disease')


List the unique types of chest pains
unique(heart_failure_predictions$Chest_Pain_Type)
## [1] "ATA" "NAP" "ASY" "TA"


Converting the chest_pain acronyms to descriptive names

pain_types <- c("ATA" = "Atypical Angina",
                "NAP"= "Non-Anginal Pain",
                "ASY"= "Asymptomatic",
                "TA" = "Typical Angina")
heart_failure_predictions$Chest_Pain_Type <- pain_types[heart_failure_predictions$Chest_Pain_Type]
head(heart_failure_predictions)
## # A tibble: 6 x 12
##     Age Sex   Chest_Pain_Type  Resting_Blood_Pressure Cholesterol Fasting_Blood_S~
##   <dbl> <chr> <chr>                             <dbl>       <dbl>            <dbl>
## 1    40 M     Atypical Angina                     140         289                0
## 2    49 F     Non-Anginal Pain                    160         180                0
## 3    37 M     Atypical Angina                     130         283                0
## 4    48 F     Asymptomatic                        138         214                0
## 5    54 M     Non-Anginal Pain                    150         195                0
## 6    39 M     Non-Anginal Pain                    120         339                0
## # ... with 6 more variables: Resting_ECG <chr>, Max_Heart_Rate <dbl>,
## #   Excercise_Angina <chr>, Old_Peak <dbl>, ST_Slope <chr>, Heart_Disease <dbl>


Barplots for Age, Resting BP and Max Heart Rate
g1 <- ggplot(heart_failure_predictions, aes(Age)) + geom_bar() 
g2 <- ggplot(heart_failure_predictions, aes(Resting_Blood_Pressure)) + geom_bar()
g3 <- ggplot(heart_failure_predictions, aes(Max_Heart_Rate)) + geom_bar() 

grid.arrange(g1, g2,g3, nrow=3)  


Resting ST-T wave abnormalities have been associated with an increased risk of adverse cardiac events.
# Filter by ECG ST abnormality
ST_cases <-filter(heart_failure_predictions,Resting_ECG=="ST",Cholesterol > 200)
For cases with Resting ST-T wave abnormalities
# Heart disease by Age,Resting BP with ST abnormalities
ST_cases %>% ggplot()+
  geom_bar(aes(Age,Resting_Blood_Pressure,fill=Heart_Disease),position="dodge",stat = "identity")+
  theme(axis.text.x=element_text(angle=0))+
  labs(x="Age",y="Resting BP", title="Statistics on heart disease by Age",
       subtitle= "Heart disease by Age,Chest Pain Type, ST condition")


For All cases (no filter)
# Heart disease by Age,Chest Pain Type, ST condition
heart_failure_predictions %>% ggplot()+
  geom_bar(aes(Age,Resting_Blood_Pressure,fill=Heart_Disease),position="dodge",stat = "identity")+
  theme(axis.text.x=element_text(angle=0))+
  labs(x="Age",y="Resting Blood Pressure", title="Statistics on heart disease by Age",
       subtitle= "Heart disease by Age,Chest Pain Type, ST condition")


Fo All cases by Age, Resting BP and Chest Pain Type
# Scatter Plot by Age, Resting BP and Chest Pain Type

s1 <- ggplot(heart_failure_predictions, aes( Resting_Blood_Pressure, Age, color = Excercise_Angina)) + 
  geom_point()

s2 <- ggplot(heart_failure_predictions, aes( Max_Heart_Rate, Age, color = Excercise_Angina)) + 
  geom_point()


s3 <- ggplot(heart_failure_predictions, aes( Resting_Blood_Pressure, Age, color = Sex)) + 
  geom_point()

s4 <- ggplot(heart_failure_predictions, aes( Max_Heart_Rate, Age, color = Sex)) + 
  geom_point()


grid.arrange(s1, s2, s3, s4,  nrow = 4)

Conclusion

This is to summarise few capabilities of using R Tidyverse to manipulate and interpret using the types of visualisations. Tidyverse can accomodate more continuous and categorical variables to see if there is a relationship or causation exist in a particular situation.