# Simulando carga de datos (reemplaza esto por tu archivo real si es necesario)
df <- data.frame(
chol = c(210, 190, 250, 230, 220, 200, 215, 245),
heart_rate = c(72, 75, 80, 78, 74, 76, 79, 81),
current_smoker = c("yes", "no", "yes", "no", "yes", "no", "yes", "no")
)
summary(df)
## chol heart_rate current_smoker
## Min. :190.0 Min. :72.00 Length:8
## 1st Qu.:207.5 1st Qu.:74.75 Class :character
## Median :217.5 Median :77.00 Mode :character
## Mean :220.0 Mean :76.88
## 3rd Qu.:233.8 3rd Qu.:79.25
## Max. :250.0 Max. :81.00
t.test(df$chol[df$current_smoker == "yes"],
df$chol[df$current_smoker == "no"])
##
## Welch Two Sample t-test
##
## data: df$chol[df$current_smoker == "yes"] and df$chol[df$current_smoker == "no"]
## t = 0.47936, df = 5.3769, p-value = 0.6505
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -31.88557 46.88557
## sample estimates:
## mean of x mean of y
## 223.75 216.25
t.test(df$heart_rate[df$current_smoker == "yes"],
df$heart_rate[df$current_smoker == "no"],
alternative = "greater")
##
## Welch Two Sample t-test
##
## data: df$heart_rate[df$current_smoker == "yes"] and df$heart_rate[df$current_smoker == "no"]
## t = -0.53401, df = 5.3075, p-value = 0.6925
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## -5.90705 Inf
## sample estimates:
## mean of x mean of y
## 76.25 77.50
ggplot(df, aes(x = current_smoker, y = chol, fill = current_smoker)) +
geom_boxplot() +
labs(title = "Colesterol según hábito de fumar", x = "Fumador", y = "Colesterol")
ggplot(df, aes(x = current_smoker, y = heart_rate, fill = current_smoker)) +
geom_boxplot() +
labs(title = "Frecuencia cardíaca según hábito de fumar", x = "Fumador", y = "Frecuencia cardíaca")
import seaborn as sns
import matplotlib.pyplot as plt
# Datos de ejemplo en Python
tips = sns.load_dataset("tips")
sns.histplot(tips["total_bill"])
plt.title("Distribución de Total Bill")
plt.show()
Con base en los resultados estadísticos y el análisis visual, se concluye que existen diferencias entre los grupos fumadores y no fumadores en cuanto a colesterol y frecuencia cardíaca.