data=read.csv("C:/Users/sudha/OneDrive/Desktop/dv/heart attack prediction.csv")
summary(data)
##   Patient.ID             Age            Sex             Cholesterol   
##  Length:8763        Min.   :18.00   Length:8763        Min.   :120.0  
##  Class :character   1st Qu.:35.00   Class :character   1st Qu.:192.0  
##  Mode  :character   Median :54.00   Mode  :character   Median :259.0  
##                     Mean   :53.71                      Mean   :259.9  
##                     3rd Qu.:72.00                      3rd Qu.:330.0  
##                     Max.   :90.00                      Max.   :400.0  
##  Blood.Pressure       Heart.Rate        Diabetes      Family.History 
##  Length:8763        Min.   : 40.00   Min.   :0.0000   Min.   :0.000  
##  Class :character   1st Qu.: 57.00   1st Qu.:0.0000   1st Qu.:0.000  
##  Mode  :character   Median : 75.00   Median :1.0000   Median :0.000  
##                     Mean   : 75.02   Mean   :0.6523   Mean   :0.493  
##                     3rd Qu.: 93.00   3rd Qu.:1.0000   3rd Qu.:1.000  
##                     Max.   :110.00   Max.   :1.0000   Max.   :1.000  
##     Smoking          Obesity       Alcohol.Consumption Exercise.Hours.Per.Week
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000      Min.   : 0.002442      
##  1st Qu.:1.0000   1st Qu.:0.0000   1st Qu.:0.0000      1st Qu.: 4.981579      
##  Median :1.0000   Median :1.0000   Median :1.0000      Median :10.069559      
##  Mean   :0.8968   Mean   :0.5014   Mean   :0.5981      Mean   :10.014284      
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000      3rd Qu.:15.050018      
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000      Max.   :19.998709      
##      Diet           Previous.Heart.Problems Medication.Use    Stress.Level  
##  Length:8763        Min.   :0.0000          Min.   :0.0000   Min.   : 1.00  
##  Class :character   1st Qu.:0.0000          1st Qu.:0.0000   1st Qu.: 3.00  
##  Mode  :character   Median :0.0000          Median :0.0000   Median : 5.00  
##                     Mean   :0.4958          Mean   :0.4983   Mean   : 5.47  
##                     3rd Qu.:1.0000          3rd Qu.:1.0000   3rd Qu.: 8.00  
##                     Max.   :1.0000          Max.   :1.0000   Max.   :10.00  
##  Sedentary.Hours.Per.Day     Income            BMI        Triglycerides  
##  Min.   : 0.001263       Min.   : 20062   Min.   :18.00   Min.   : 30.0  
##  1st Qu.: 2.998794       1st Qu.: 88310   1st Qu.:23.42   1st Qu.:225.5  
##  Median : 5.933622       Median :157866   Median :28.77   Median :417.0  
##  Mean   : 5.993690       Mean   :158263   Mean   :28.89   Mean   :417.7  
##  3rd Qu.: 9.019124       3rd Qu.:227749   3rd Qu.:34.32   3rd Qu.:612.0  
##  Max.   :11.999313       Max.   :299954   Max.   :40.00   Max.   :800.0  
##  Physical.Activity.Days.Per.Week Sleep.Hours.Per.Day   Country         
##  Min.   :0.00                    Min.   : 4.000      Length:8763       
##  1st Qu.:2.00                    1st Qu.: 5.000      Class :character  
##  Median :3.00                    Median : 7.000      Mode  :character  
##  Mean   :3.49                    Mean   : 7.024                        
##  3rd Qu.:5.00                    3rd Qu.: 9.000                        
##  Max.   :7.00                    Max.   :10.000                        
##   Continent          Hemisphere        Heart.Attack.Risk
##  Length:8763        Length:8763        Min.   :0.0000   
##  Class :character   Class :character   1st Qu.:0.0000   
##  Mode  :character   Mode  :character   Median :0.0000   
##                                        Mean   :0.3582   
##                                        3rd Qu.:1.0000   
##                                        Max.   :1.0000
str(data)
## 'data.frame':    8763 obs. of  26 variables:
##  $ Patient.ID                     : chr  "BMW7812" "CZE1114" "BNI9906" "JLN3497" ...
##  $ Age                            : int  67 21 21 84 66 54 90 84 20 43 ...
##  $ Sex                            : chr  "Male" "Male" "Female" "Male" ...
##  $ Cholesterol                    : int  208 389 324 383 318 297 358 220 145 248 ...
##  $ Blood.Pressure                 : chr  "158/88" "165/93" "174/99" "163/100" ...
##  $ Heart.Rate                     : int  72 98 72 73 93 48 84 107 68 55 ...
##  $ Diabetes                       : int  0 1 1 1 1 1 0 0 1 0 ...
##  $ Family.History                 : int  0 1 0 1 1 1 0 0 0 1 ...
##  $ Smoking                        : int  1 1 0 1 1 1 1 1 1 1 ...
##  $ Obesity                        : int  0 1 0 0 1 0 0 1 1 1 ...
##  $ Alcohol.Consumption            : int  0 1 0 1 0 1 1 1 0 1 ...
##  $ Exercise.Hours.Per.Week        : num  4.17 1.81 2.08 9.83 5.8 ...
##  $ Diet                           : chr  "Average" "Unhealthy" "Healthy" "Average" ...
##  $ Previous.Heart.Problems        : int  0 1 1 1 1 1 0 0 0 0 ...
##  $ Medication.Use                 : int  0 0 1 0 0 1 0 1 0 0 ...
##  $ Stress.Level                   : int  9 1 9 9 6 2 7 4 5 4 ...
##  $ Sedentary.Hours.Per.Day        : num  6.62 4.96 9.46 7.65 1.51 ...
##  $ Income                         : int  261404 285768 235282 125640 160555 241339 190450 122093 25086 209703 ...
##  $ BMI                            : num  31.3 27.2 28.2 36.5 21.8 ...
##  $ Triglycerides                  : int  286 235 587 378 231 795 284 370 790 232 ...
##  $ Physical.Activity.Days.Per.Week: int  0 1 4 3 1 5 4 6 7 7 ...
##  $ Sleep.Hours.Per.Day            : int  6 7 4 4 5 10 10 7 4 7 ...
##  $ Country                        : chr  "Argentina" "Canada" "France" "Canada" ...
##  $ Continent                      : chr  "South America" "North America" "Europe" "North America" ...
##  $ Hemisphere                     : chr  "Southern Hemisphere" "Northern Hemisphere" "Northern Hemisphere" "Northern Hemisphere" ...
##  $ Heart.Attack.Risk              : int  0 0 0 0 0 1 1 1 0 0 ...
library(ggplot2)
ggplot(data)+labs(title="Heart attack risk prediction Data plot")

ggplot(data, aes(x = Heart.Rate, y = Exercise.Hours.Per.Week , col =BMI  ))+labs(title = "Heart attack risk prediction Data Plot")

ggplot(data,aes(x=Heart.Rate,y=Exercise.Hours.Per.Week,col=BMI))+geom_point()+labs(title = "Heart attack risk prediction Data Plot")

ggplot(data,aes(x=Heart.Rate,y=Exercise.Hours.Per.Week,size=Stress.Level  ))+geom_point()+labs(title = "MTCars Data Plot")

data$Heart.Rate <-factor(data$Heart.Rate )
ggplot(data, aes(x = factor( Diabetes), y =Stress.Level  )) +
  geom_point()

ggplot(data, aes(x = Exercise.Hours.Per.Week)) +
geom_histogram(binwidth = 5,color="black", fill="lightblue") +
labs(title = "Histogram of Exercise.Hours.Per.Week", x = "Exercise.Hours.Per.Week", y = "Count")

ggplot(data=data, aes(x=as.factor(Sleep.Hours.Per.Day ), fill=Sleep.Hours.Per.Day ))+
       geom_bar(stat="count")

Stress.Level   = table(data$Stress.Level    )
data.labels = names(Stress.Level    )
share = round(Stress.Level/sum(Stress.Level)*100)
data.labels = paste(data.labels, share)
data.labels = paste(data.labels,"%",sep="") 
pie(Stress.Level,labels = data.labels,clockwise=TRUE, col=heat.colors(length(data.labels)), main="Frequency of Stress.Level")

bx <- ggplot(data = data, aes(x = factor(Stress.Level), y =Sleep.Hours.Per.Day)) + 
  geom_boxplot(fill = "blue") + 
  ggtitle("Distribution of Sleep.Hours.Per.Day") +
  ylab("Sleep.Hours.Per.Day") + 
  xlab("Stress.Level") 
bx

ggplot(data, aes(x = as.factor(Stress.Level ), y = BMI , col = Stress.Level )) +
  geom_jitter() +
  facet_grid(. ~ Stress.Level )