Análisis exploratorio
corrplot(cor(phd), type = "full")

male
phd <- phd %>%
mutate(male = ifelse(male == 1, "Male", "Female"))
unique(phd$male)
## [1] "Male" "Female"
phd %>%
group_by(male, TenYearCHD) %>%
summarize(count = n()) %>%
mutate(perc = 100*count/sum(count)) %>%
filter(TenYearCHD == 1)
## # A tibble: 2 × 4
## # Groups: male [2]
## male TenYearCHD count perc
## <chr> <int> <int> <dbl>
## 1 Female 1 250 12.3
## 2 Male 1 307 18.9
phd %>%
group_by(male, TenYearCHD) %>%
summarize(count = n()) %>%
mutate(perc = 100*count/sum(count)) %>%
ggplot(aes(x = factor(TenYearCHD), y = perc)) +
geom_col(aes(fill = TenYearCHD)) +
theme_classic()+
scale_x_discrete(labels = c("No TenYearCHD", "TenYearCHD")) +
theme(axis.title.x = element_blank()) + ylab("% Outcome by male") +
theme(legend.position = "none") +
facet_wrap(~ male)

age
ggplot(phd, aes(age, TenYearCHD))+
geom_jitter(alpha = 0.5)+
geom_smooth(method = "glm", se = FALSE, method.args = list(family = binomial)) +
theme_bw()
## `geom_smooth()` using formula = 'y ~ x'

phd %>%
group_by(age, TenYearCHD) %>%
summarize(count = n()) %>%
mutate(perc = 100*count/sum(count)) %>%
arrange(desc(age)) %>%
filter(TenYearCHD == 1) %>%
ggplot(aes(age, perc)) +
geom_point()+
geom_smooth()+
theme_bw()+
ylab("% TenYearCHD")

phd <- phd %>%
mutate(AgeGroup = case_when(age < 40 ~ "18_39", age < 50 ~ "40_49", age < 61 ~ "50_60", TRUE ~ "61+" ))
phd %>%
group_by(age, TenYearCHD) %>%
summarize(count = n()) %>%
mutate(perc = 100*count/sum(count)) %>%
arrange(desc(age)) %>%
filter(TenYearCHD == 1) %>%
ggplot(aes(age, perc)) +
geom_point()+
geom_smooth()+
theme_bw()+
ylab("% TenYearCHD")

education
unique(phd$education)
## [1] 4 2 1 3
ggplot(phd, aes(education, TenYearCHD)) +
geom_col()

phd %>%
group_by(education, TenYearCHD) %>%
summarize(count = n()) %>%
mutate(perc = 100*count/sum(count)) %>%
ggplot(aes(x = factor(TenYearCHD), y = perc)) +
geom_col(aes(fill = TenYearCHD)) +
theme_classic()+
scale_x_discrete(labels = c("No TenYearCHD", "TenYearCHD")) +
theme(axis.title.x = element_blank()) + ylab("% Outcome by education") +
theme(legend.position = "none") +
facet_wrap(~ education)

currentSmoker
ggplot(phd, aes(currentSmoker, TenYearCHD)) +
geom_col()

phd %>%
group_by(currentSmoker,TenYearCHD ) %>%
summarize(count = n()) %>%
mutate(perc = 100*count/sum(count)) %>%
ggplot(aes(x = factor(TenYearCHD), y = perc)) +
geom_col(aes(fill = TenYearCHD)) +
theme_classic()+
scale_x_discrete(labels = c("No TenYearCHD", "TenYearCHD")) +
theme(axis.title.x = element_blank()) + ylab("% Outcome by currentSmoker") +
theme(legend.position = "none") +
facet_wrap(~ currentSmoker)

cigsPerDay
ggplot(phd, aes(cigsPerDay, TenYearCHD)) +
geom_jitter(alpha = 0.5)+
geom_smooth(method = "glm", se = FALSE, method.args = list(family = binomial)) +
theme_bw()

BPMeds
unique(phd$BPMeds)
## [1] 0 1
phd %>%
group_by(BPMeds,TenYearCHD ) %>%
summarize(count = n()) %>%
mutate(perc = 100*count/sum(count)) %>%
ggplot(aes(x = factor(TenYearCHD), y = perc)) +
geom_col(aes(fill = TenYearCHD)) +
theme_classic()+
scale_x_discrete(labels = c("No TenYearCHD", "TenYearCHD")) +
theme(axis.title.x = element_blank()) + ylab("% Outcome by BPMeds") +
theme(legend.position = "none") +
facet_wrap(~ BPMeds)

prevalentStroke
phd %>%
group_by(prevalentStroke,TenYearCHD ) %>%
summarize(count = n()) %>%
mutate(perc = 100*count/sum(count)) %>%
ggplot(aes(x = factor(TenYearCHD), y = perc)) +
geom_col(aes(fill = TenYearCHD)) +
theme_classic()+
scale_x_discrete(labels = c("No TenYearCHD", "TenYearCHD")) +
theme(axis.title.x = element_blank()) + ylab("% Outcome by prevalentStroke") +
theme(legend.position = "none") +
facet_wrap(~ prevalentStroke)

prevalentHyp
phd %>%
group_by(prevalentHyp,TenYearCHD ) %>%
summarize(count = n()) %>%
mutate(perc = 100*count/sum(count)) %>%
ggplot(aes(x = factor(TenYearCHD), y = perc)) +
geom_col(aes(fill = TenYearCHD)) +
theme_classic()+
scale_x_discrete(labels = c("No TenYearCHD", "TenYearCHD")) +
theme(axis.title.x = element_blank()) + ylab("% Outcome by prevalentHyp") +
theme(legend.position = "none") +
facet_wrap(~ prevalentHyp)

diabetes
phd %>%
group_by(diabetes,TenYearCHD ) %>%
summarize(count = n()) %>%
mutate(perc = 100*count/sum(count)) %>%
ggplot(aes(x = factor(TenYearCHD), y = perc)) +
geom_col(aes(fill = TenYearCHD)) +
theme_classic()+
scale_x_discrete(labels = c("No TenYearCHD", "TenYearCHD")) +
theme(axis.title.x = element_blank()) + ylab("% Outcome by diabetes") +
theme(legend.position = "none") +
facet_wrap(~ diabetes)

totChol
phd %>%
group_by(totChol, TenYearCHD) %>%
summarize(count = n()) %>%
mutate(perc = 100*count/sum(count)) %>%
arrange(desc(totChol)) %>%
filter(TenYearCHD == 1) %>%
ggplot(aes(totChol, perc)) +
geom_point()+
geom_smooth()+
theme_bw()+
ylab("% TenYearCHD")

sysBP
phd %>%
group_by(sysBP, TenYearCHD) %>%
summarize(count = n()) %>%
mutate(perc = 100*count/sum(count)) %>%
arrange(desc(sysBP)) %>%
filter(TenYearCHD == 1) %>%
ggplot(aes(sysBP, perc)) +
geom_point()+
geom_smooth()+
theme_bw()+
ylab("% TenYearCHD")

diaBP
phd %>%
group_by(diaBP, TenYearCHD) %>%
summarize(count = n()) %>%
mutate(perc = 100*count/sum(count)) %>%
arrange(desc(diaBP)) %>%
filter(TenYearCHD == 1) %>%
ggplot(aes(diaBP, perc)) +
geom_point()+
geom_smooth()+
theme_bw()+
ylab("% TenYearCHD")

BMI
phd %>%
group_by(BMI, TenYearCHD) %>%
summarize(count = n()) %>%
mutate(perc = 100*count/sum(count)) %>%
arrange(desc(BMI)) %>%
filter(TenYearCHD == 1) %>%
ggplot(aes(BMI, perc)) +
geom_point()+
geom_smooth()+
theme_bw()+
ylab("% TenYearCHD")

heartRate
phd %>%
group_by(heartRate, TenYearCHD) %>%
summarize(count = n()) %>%
mutate(perc = 100*count/sum(count)) %>%
arrange(desc(heartRate)) %>%
filter(TenYearCHD == 1) %>%
ggplot(aes(heartRate, perc)) +
geom_point()+
geom_smooth()+
theme_bw()+
ylab("% TenYearCHD")

glucose
phd %>%
group_by(glucose, TenYearCHD) %>%
summarize(count = n()) %>%
mutate(perc = 100*count/sum(count)) %>%
arrange(desc(glucose)) %>%
filter(TenYearCHD == 1) %>%
ggplot(aes(glucose, perc)) +
geom_point()+
geom_smooth()+
theme_bw()+
ylab("% TenYearCHD")