data=read.csv("C:/Users/sudha/OneDrive/Desktop/dv/heart attack prediction.csv")
summary(data)
## Patient.ID Age Sex Cholesterol
## Length:8763 Min. :18.00 Length:8763 Min. :120.0
## Class :character 1st Qu.:35.00 Class :character 1st Qu.:192.0
## Mode :character Median :54.00 Mode :character Median :259.0
## Mean :53.71 Mean :259.9
## 3rd Qu.:72.00 3rd Qu.:330.0
## Max. :90.00 Max. :400.0
## Blood.Pressure Heart.Rate Diabetes Family.History
## Length:8763 Min. : 40.00 Min. :0.0000 Min. :0.000
## Class :character 1st Qu.: 57.00 1st Qu.:0.0000 1st Qu.:0.000
## Mode :character Median : 75.00 Median :1.0000 Median :0.000
## Mean : 75.02 Mean :0.6523 Mean :0.493
## 3rd Qu.: 93.00 3rd Qu.:1.0000 3rd Qu.:1.000
## Max. :110.00 Max. :1.0000 Max. :1.000
## Smoking Obesity Alcohol.Consumption Exercise.Hours.Per.Week
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. : 0.002442
## 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.: 4.981579
## Median :1.0000 Median :1.0000 Median :1.0000 Median :10.069559
## Mean :0.8968 Mean :0.5014 Mean :0.5981 Mean :10.014284
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:15.050018
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :19.998709
## Diet Previous.Heart.Problems Medication.Use Stress.Level
## Length:8763 Min. :0.0000 Min. :0.0000 Min. : 1.00
## Class :character 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.: 3.00
## Mode :character Median :0.0000 Median :0.0000 Median : 5.00
## Mean :0.4958 Mean :0.4983 Mean : 5.47
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.: 8.00
## Max. :1.0000 Max. :1.0000 Max. :10.00
## Sedentary.Hours.Per.Day Income BMI Triglycerides
## Min. : 0.001263 Min. : 20062 Min. :18.00 Min. : 30.0
## 1st Qu.: 2.998794 1st Qu.: 88310 1st Qu.:23.42 1st Qu.:225.5
## Median : 5.933622 Median :157866 Median :28.77 Median :417.0
## Mean : 5.993690 Mean :158263 Mean :28.89 Mean :417.7
## 3rd Qu.: 9.019124 3rd Qu.:227749 3rd Qu.:34.32 3rd Qu.:612.0
## Max. :11.999313 Max. :299954 Max. :40.00 Max. :800.0
## Physical.Activity.Days.Per.Week Sleep.Hours.Per.Day Country
## Min. :0.00 Min. : 4.000 Length:8763
## 1st Qu.:2.00 1st Qu.: 5.000 Class :character
## Median :3.00 Median : 7.000 Mode :character
## Mean :3.49 Mean : 7.024
## 3rd Qu.:5.00 3rd Qu.: 9.000
## Max. :7.00 Max. :10.000
## Continent Hemisphere Heart.Attack.Risk
## Length:8763 Length:8763 Min. :0.0000
## Class :character Class :character 1st Qu.:0.0000
## Mode :character Mode :character Median :0.0000
## Mean :0.3582
## 3rd Qu.:1.0000
## Max. :1.0000
str(data)
## 'data.frame': 8763 obs. of 26 variables:
## $ Patient.ID : chr "BMW7812" "CZE1114" "BNI9906" "JLN3497" ...
## $ Age : int 67 21 21 84 66 54 90 84 20 43 ...
## $ Sex : chr "Male" "Male" "Female" "Male" ...
## $ Cholesterol : int 208 389 324 383 318 297 358 220 145 248 ...
## $ Blood.Pressure : chr "158/88" "165/93" "174/99" "163/100" ...
## $ Heart.Rate : int 72 98 72 73 93 48 84 107 68 55 ...
## $ Diabetes : int 0 1 1 1 1 1 0 0 1 0 ...
## $ Family.History : int 0 1 0 1 1 1 0 0 0 1 ...
## $ Smoking : int 1 1 0 1 1 1 1 1 1 1 ...
## $ Obesity : int 0 1 0 0 1 0 0 1 1 1 ...
## $ Alcohol.Consumption : int 0 1 0 1 0 1 1 1 0 1 ...
## $ Exercise.Hours.Per.Week : num 4.17 1.81 2.08 9.83 5.8 ...
## $ Diet : chr "Average" "Unhealthy" "Healthy" "Average" ...
## $ Previous.Heart.Problems : int 0 1 1 1 1 1 0 0 0 0 ...
## $ Medication.Use : int 0 0 1 0 0 1 0 1 0 0 ...
## $ Stress.Level : int 9 1 9 9 6 2 7 4 5 4 ...
## $ Sedentary.Hours.Per.Day : num 6.62 4.96 9.46 7.65 1.51 ...
## $ Income : int 261404 285768 235282 125640 160555 241339 190450 122093 25086 209703 ...
## $ BMI : num 31.3 27.2 28.2 36.5 21.8 ...
## $ Triglycerides : int 286 235 587 378 231 795 284 370 790 232 ...
## $ Physical.Activity.Days.Per.Week: int 0 1 4 3 1 5 4 6 7 7 ...
## $ Sleep.Hours.Per.Day : int 6 7 4 4 5 10 10 7 4 7 ...
## $ Country : chr "Argentina" "Canada" "France" "Canada" ...
## $ Continent : chr "South America" "North America" "Europe" "North America" ...
## $ Hemisphere : chr "Southern Hemisphere" "Northern Hemisphere" "Northern Hemisphere" "Northern Hemisphere" ...
## $ Heart.Attack.Risk : int 0 0 0 0 0 1 1 1 0 0 ...
library(ggplot2)
ggplot(data)+labs(title="Heart attack risk prediction Data plot")

ggplot(data, aes(x = Heart.Rate, y = Exercise.Hours.Per.Week , col =BMI ))+labs(title = "Heart attack risk prediction Data Plot")

ggplot(data,aes(x=Heart.Rate,y=Exercise.Hours.Per.Week,col=BMI))+geom_point()+labs(title = "Heart attack risk prediction Data Plot")

ggplot(data,aes(x=Heart.Rate,y=Exercise.Hours.Per.Week,size=Stress.Level ))+geom_point()+labs(title = "MTCars Data Plot")

data$Heart.Rate <-factor(data$Heart.Rate )
ggplot(data, aes(x = factor( Diabetes), y =Stress.Level )) +
geom_point()

ggplot(data, aes(x = Exercise.Hours.Per.Week)) +
geom_histogram(binwidth = 5,color="black", fill="lightblue") +
labs(title = "Histogram of Exercise.Hours.Per.Week", x = "Exercise.Hours.Per.Week", y = "Count")

ggplot(data=data, aes(x=as.factor(Sleep.Hours.Per.Day ), fill=Sleep.Hours.Per.Day ))+
geom_bar(stat="count")

Stress.Level = table(data$Stress.Level )
data.labels = names(Stress.Level )
share = round(Stress.Level/sum(Stress.Level)*100)
data.labels = paste(data.labels, share)
data.labels = paste(data.labels,"%",sep="")
pie(Stress.Level,labels = data.labels,clockwise=TRUE, col=heat.colors(length(data.labels)), main="Frequency of Stress.Level")

bx <- ggplot(data = data, aes(x = factor(Stress.Level), y =Sleep.Hours.Per.Day)) +
geom_boxplot(fill = "blue") +
ggtitle("Distribution of Sleep.Hours.Per.Day") +
ylab("Sleep.Hours.Per.Day") +
xlab("Stress.Level")
bx

ggplot(data, aes(x = as.factor(Stress.Level ), y = BMI , col = Stress.Level )) +
geom_jitter() +
facet_grid(. ~ Stress.Level )
