# Install the MixAll package if not already installed
if (!require(MixAll)) {
    install.packages("MixAll")
}
## Loading required package: MixAll
## Loading required package: rtkore
## Loading required package: Rcpp
## 
## Attaching package: 'rtkore'
## The following object is masked from 'package:Rcpp':
## 
##     LdFlags
# Load the MixAll package
library(MixAll)
library(ggplot2)
data(HeartDisease.cat)
data(HeartDisease.cont)
data(HeartDisease.target)
head(HeartDisease.cat)
##   sex cp fbs restecg exang slope ca thal
## 1   1  1   1       2     0     3  0    6
## 2   1  4   0       2     1     2  3    3
## 3   1  4   0       2     1     2  2    7
## 4   1  3   0       0     0     3  0    3
## 5   0  2   0       2     0     1  0    3
## 6   1  2   0       0     0     1  0    3
heart_data <- cbind(HeartDisease.cat, HeartDisease.cont, HeartDisease.target)

# Heart disease remains one of the most prevalent and lethal health challenges across the globe. Its onset and progression are influenced by a myriad of factors, ranging from genetic predispositions to lifestyle choices. While some of these factors can be controlled, others are inherent, such as age and gender. To devise effective prevention and treatment strategies, it's imperative to understand the interplay of these factors and their collective impact on heart health. In our exploration, we delve into the Cleveland Heart Disease dataset to uncover patterns and relationships that might shed light on the intricacies of heart disease susceptibility.

# Visualization 1: Age vs. Presence of Heart Disease faceted by Gender
p1 <- ggplot(heart_data, aes(x = age, fill = as.factor(num))) +
  geom_histogram(position="dodge", bins=30) +
  labs(title = "Distribution of Age vs. Presence of Heart Disease",
       x = "Age",
       y = "Count",
       fill = "Presence of Heart Disease") +
  theme_minimal()
print(p1)

# Our initial focus was on the age distribution of individuals with and without heart disease. The histogram revealed that while heart disease can onset at any age, there is a discernible increase in cases from the late 50s onwards. This could suggest that as the body ages, it becomes more susceptible to heart ailments due to reduced physiological resilience or the accumulation of other risk factors over time.

# Visualization 2: Resting BP vs. chol colored by num
p2 <- ggplot(heart_data, aes(x = trestbps, y = chol, color = as.factor(num))) +
  geom_point(alpha=0.5) +
  labs(title = "Resting Blood Pressure vs. Cholesterol by Heart Disease Presence",
       x = "Resting Blood Pressure",
       y = "Cholesterol",
       color = "Presence of Heart Disease") +
  theme_minimal()
print(p2)

# Transitioning from demographic to clinical measurements, we probed the relationship between resting blood pressure and cholesterol, two well-known indicators of heart health. The scatter plot indicated that individuals with both high cholesterol and elevated blood pressure seem particularly at risk, underscoring the peril of multiple simultaneous risk factors.

# Visualization 3: Chest pain type distribution by num
p3 <- ggplot(heart_data, aes(x = as.factor(cp), fill = as.factor(num))) +
  geom_bar(position="dodge") +
  labs(title = "Distribution of Chest Pain Type by Heart Disease Presence",
       x = "Chest Pain Type",
       y = "Count",
       fill = "Presence of Heart Disease") +
  theme_minimal()
print(p3)

# Chest pain, a common symptom associated with heart ailments, was our next area of exploration. The bar chart demonstrated that asymptomatic chest pain (type 4) was more prevalent among those diagnosed with heart disease. This emphasizes the importance of regular check-ups, as the absence of pain does not necessarily denote good heart health.

# Visualization 4: Heart rate vs. Age colored by num
p4 <- ggplot(heart_data, aes(x = age, y = thalach, color = as.factor(num))) +
  geom_point(alpha=0.5) +
  labs(title = "Maximum Heart Rate Achieved vs. Age by Heart Disease Presence",
       x = "Age",
       y = "Max Heart Rate Achieved",
       color = "Presence of Heart Disease") +
  theme_minimal()
print(p4)

# Age and maximum heart rate achieved during stress tests are often utilized to gauge cardiovascular fitness. The scatter plot showcased that younger individuals typically achieve higher heart rates. However, mid-aged individuals with reduced maximum heart rates might be at a heightened risk, suggesting potential cardiovascular inefficiencies.

# Visualization 5: Boxplot of oldpeak across slope types colored by num
p5 <- ggplot(heart_data, aes(x = as.factor(slope), y = oldpeak, color = as.factor(num))) +
  geom_boxplot() +
  labs(title = "Oldpeak Distribution Across Slope Types by Heart Disease Presence",
       x = "Slope Type",
       y = "Oldpeak",
       color = "Presence of Heart Disease") +
  theme_minimal()
print(p5)

# The final visualization focused on exercise-induced metrics, notably the ST depression (oldpeak) triggered by exercise relative to rest. The boxplot highlighted that a downsloping ST segment (slope type 3) often corresponds with higher oldpeak values, indicative of potential cardiac stress during physical exertion.

# Through this comprehensive exploration, we unearthed several key insights that accentuate the multifaceted nature of heart disease risk. While certain factors like age are beyond our control, others, especially clinical measurements, can be monitored and managed with timely medical intervention. Regular health check-ups, even in the absence of overt symptoms, are crucial. Understanding these patterns and relationships is not just an academic exercise; it's a step towards personalized medical care, where interventions are tailored based on individual risk profiles. As we continue to collect and analyze data, our hope is to further refine our understanding, paving the way for more effective prevention and treatment strategies for heart disease.