library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ ggplot2 3.5.1 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(naniar)
library(readr)
library(ggplot2)
library(broom)
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(tidyr)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:purrr':
##
## some
##
## The following object is masked from 'package:dplyr':
##
## recode
library(Hmisc)
##
## Attaching package: 'Hmisc'
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
## The following objects are masked from 'package:base':
##
## format.pval, units
library(mctest)
library(gmodels)
library(skimr)
##
## Attaching package: 'skimr'
##
## The following object is masked from 'package:naniar':
##
## n_complete
library(corrplot)
## corrplot 0.95 loaded
library(readxl)
library(openxlsx)
my_data <- read_excel("/Users/anna/Downloads/DataExam3Fall2024.xls")
View(my_data)
my_data$alb <- na_if(my_data$alb, 99)
my_data$calc <- na_if(my_data$calc, 99)
my_data$uric <- na_if(my_data$uric, 99)
my_data$height <- na_if(my_data$height, 999)
my_data$weight <- na_if(my_data$weight, 999)
pct_complete(my_data)
## [1] 99.40898
summary(my_data)
## id age height weight pill
## Min. : 3.0 Min. :19.00 Min. :57.00 Min. : 94.0 Min. :1.0
## 1st Qu.: 563.2 1st Qu.:25.00 1st Qu.:63.00 1st Qu.:118.0 1st Qu.:1.0
## Median :1742.5 Median :32.00 Median :64.50 Median :130.0 Median :1.5
## Mean :1599.0 Mean :33.82 Mean :64.51 Mean :131.7 Mean :1.5
## 3rd Qu.:2454.5 3rd Qu.:42.00 3rd Qu.:66.00 3rd Qu.:140.0 3rd Qu.:2.0
## Max. :3519.0 Max. :55.00 Max. :71.00 Max. :215.0 Max. :2.0
## NA's :2 NA's :2
## chol alb calc uric
## Min. : 50.0 Min. :3.200 Min. : 8.600 Min. :2.200
## 1st Qu.:200.0 1st Qu.:3.900 1st Qu.: 9.600 1st Qu.:4.000
## Median :235.0 Median :4.100 Median :10.000 Median :4.600
## Mean :237.1 Mean :4.111 Mean : 9.962 Mean :4.771
## 3rd Qu.:260.0 3rd Qu.:4.400 3rd Qu.:10.300 3rd Qu.:5.300
## Max. :600.0 Max. :5.000 Max. :11.100 Max. :9.900
## NA's :2 NA's :3 NA's :1
dim(my_data)
## [1] 188 9
my_data_clear <- na.omit(my_data)
View(my_data_clear)
dim(my_data_clear)
## [1] 181 9
my_data_clear$age <- as.numeric(my_data_clear$age)
my_data_clear$age_groups <- ifelse(my_data_clear$age < 30, 1,
ifelse(my_data_clear$age >= 40 , 3 , 2))
my_data_clear$age_groups <- as.factor(my_data_clear$age_groups)
View(my_data_clear)
my_data_clear$pill <- as.factor(my_data_clear$pill)
table(my_data_clear$pill)
##
## 1 2
## 89 92
#Pie chart for Pill taking distribution
pie(table(my_data_clear$pill),
main = "Pill Distribution",
col = c("pink", "lightgreen"))
table(my_data_clear$age_groups)
##
## 1 2 3
## 73 52 56
#Pie chart for Age distribution
pie(table(my_data_clear$age_groups),
main = "Age Distribution",
col = c("blue", "lightgreen", "red"))
ggplot(my_data_clear, aes(y = height)) +
geom_boxplot(fill = "skyblue", color = "black") +
ylim(55,75)+
labs(title = "Boxplot of Heights",
y = "Height",
x = "") +
theme_minimal()
ggplot(my_data_clear, aes(x = height)) +
geom_histogram(binwidth = 2, fill = "lightgreen", color = "black", alpha = 0.9) +
labs(title = "Distribution of Height", x = "Height", y = "Frequency") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
axis.title = element_text(size = 12))
#summary
summary(my_data_clear$height)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 57.00 63.00 64.00 64.49 66.00 71.00
stats <- boxplot.stats(my_data_clear$height)
# Outliers
outliers <- stats$out
# Print outliers
print(paste("Outliers:", paste(outliers, collapse = ", ")))
## [1] "Outliers: 57, 58, 71, 71"
sd_height <- sd(my_data_clear$height)
print(paste("Standard Deviation:", round(sd_height, 2)))
## [1] "Standard Deviation: 2.49"
ggplot(my_data_clear, aes(y = weight)) +
geom_boxplot(fill = "skyblue", color = "black") +
labs(title = "Boxplot of Weights",
y = "Weight",
x = "") +
theme_minimal()
ggplot(my_data_clear, aes(x = weight)) +
geom_histogram(binwidth = 10, fill = "lightgreen", color = "black", alpha = 0.9) +
labs(title = "Distribution of Weight", x = "Weight", y = "Frequency") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
axis.title = element_text(size = 12))
#summary
summary(my_data_clear$weight)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 94.0 118.0 129.0 131.2 140.0 215.0
stats2 <- boxplot.stats(my_data_clear$weight)
# Outliers
outliers2 <- stats2$out
# Print outliers
print(paste("Outliers:", paste(outliers2, collapse = ", ")))
## [1] "Outliers: 175, 180, 203, 195, 215, 180, 190"
sd_weight <- sd(my_data_clear$weight)
print(paste("Standard Deviation:", round(sd_weight, 2)))
## [1] "Standard Deviation: 20.49"
ggplot(my_data_clear, aes(y = chol)) +
geom_boxplot(fill = "skyblue", color = "black") +
labs(title = "Boxplot of Cholesterol",
y = "Cholesterol",
x = "") +
theme_minimal()
ggplot(my_data_clear, aes(x = chol)) +
geom_histogram(binwidth = 20, fill = "lightgreen", color = "black", alpha = 0.9) +
labs(title = "Distribution of Cholesterol", x = "Cholesterol", y = "Frequency") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
axis.title = element_text(size = 12))
#summary
summary(my_data_clear$chol)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 50.0 200.0 235.0 234.8 260.0 390.0
stats3 <- boxplot.stats(my_data_clear$chol)
# Outliers
outliers3 <- stats3$out
# Print outliers
print(paste("Outliers:", paste(outliers3, collapse = ", ")))
## [1] "Outliers: 50, 390"
sd_chol <- sd(my_data_clear$chol)
print(paste("Standard Deviation:", round(sd_chol, 2)))
## [1] "Standard Deviation: 44.81"
ggplot(my_data_clear, aes(y = alb)) +
geom_boxplot(fill = "skyblue", color = "black") +
labs(title = "Boxplot of Albumin",
y = "Albumin",
x = "") +
theme_minimal()
ggplot(my_data_clear, aes(x = alb)) +
geom_histogram(binwidth = 0.2, fill = "lightgreen", color = "black", alpha = 0.9) +
labs(title = "Distribution of Albumin", x = "Albumin", y = "Frequency") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
axis.title = element_text(size = 12))
#summary
summary(my_data_clear$alb)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.200 3.900 4.100 4.119 4.400 5.000
stats4 <- boxplot.stats(my_data_clear$alb)
# Outliers
outliers4 <- stats4$out
# Print outliers
print(paste("Outliers:", paste(outliers4, collapse = ", ")))
## [1] "Outliers: "
sd_alb <- sd(my_data_clear$alb)
print(paste("Standard Deviation:", round(sd_alb, 2)))
## [1] "Standard Deviation: 0.36"
ggplot(my_data_clear, aes(y = calc)) +
geom_boxplot(fill = "skyblue", color = "black") +
labs(title = "Boxplot of Calcium",
y = "Calcium",
x = "") +
theme_minimal()
ggplot(my_data_clear, aes(x = calc)) +
geom_histogram(binwidth = 0.2, fill = "lightgreen", color = "black", alpha = 0.9) +
labs(title = "Distribution of Calcium", x = "Calcium", y = "Frequency") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
axis.title = element_text(size = 12))
#summary
summary(my_data_clear$calc)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 8.800 9.600 9.900 9.966 10.300 11.100
stats5 <- boxplot.stats(my_data_clear$calc)
# Outliers
outliers5 <- stats5$out
# Print outliers
print(paste("Outliers:", paste(outliers5, collapse = ", ")))
## [1] "Outliers: "
sd_calc <- sd(my_data_clear$calc)
print(paste("Standard Deviation:", round(sd_calc, 2)))
## [1] "Standard Deviation: 0.47"
ggplot(my_data_clear, aes(y = uric)) +
geom_boxplot(fill = "skyblue", color = "black") +
labs(title = "Boxplot of Uric acid",
y = "Uric acid",
x = "") +
theme_minimal()
ggplot(my_data_clear, aes(x = uric)) +
geom_histogram(binwidth = 0.5, fill = "lightgreen", color = "black", alpha = 0.9) +
labs(title = "Distribution of Uric acid", x = "Uric acid", y = "Frequency") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
axis.title = element_text(size = 12))
#summary
summary(my_data_clear$uric)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.200 4.000 4.600 4.746 5.300 9.900
stats6 <- boxplot.stats(my_data_clear$uric)
# Outliers
outliers6 <- stats6$out
# Print outliers
print(paste("Outliers:", paste(outliers6, collapse = ", ")))
## [1] "Outliers: 8.3, 7.5, 8.4, 9.9, 8.5, 7.8"
sd_uric <- sd(my_data_clear$uric)
print(paste("Standard Deviation:", round(sd_uric, 2)))
## [1] "Standard Deviation: 1.13"
my_data_clear$bmi <- round(my_data_clear$weight * 703 / (my_data_clear$height^2), 1)
my_data_clear$bmi_groups <- ifelse(my_data_clear$bmi < 18.5, "Underweight",
ifelse(my_data_clear$bmi >= 30, "Overweight" , "Normal"))
my_data_clear$bmi_groups <- as.factor(my_data_clear$bmi_groups)
table(my_data_clear$bmi_groups)
##
## Normal Overweight Underweight
## 166 6 9
ggplot(my_data_clear, aes(y = bmi)) +
geom_boxplot(fill = "skyblue", color = "black") +
labs(title = "Boxplot of BMI",
y = "BMI",
x = "") +
theme_minimal()
ggplot(my_data_clear, aes(x = bmi)) +
geom_histogram(binwidth = 2, fill = "lightgreen", color = "black", alpha = 0.9) +
labs(title = "Distribution of BMI", x = "BMI", y = "Frequency") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
axis.title = element_text(size = 12))
#summary
summary(my_data_clear$bmi)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 17.70 20.00 21.60 22.15 23.20 34.50
stats7 <- boxplot.stats(my_data_clear$bmi)
# Outliers
outliers7 <- stats7$out
# Print outliers
print(paste("Outliers:", paste(outliers7, collapse = ", ")))
## [1] "Outliers: 28.3, 30.9, 30.9, 28.3, 28.3, 34.5, 32.7, 28.3, 28.2, 31.6, 33.2"
sd_bmi <- sd(my_data_clear$bmi)
print(paste("Standard Deviation:", round(sd_bmi, 2)))
## [1] "Standard Deviation: 3.05"
222222222222222222________##################
reg1 <- lm(chol ~ calc, data=my_data_clear)
summary(reg1)
##
## Call:
## lm(formula = chol ~ calc, data = my_data_clear)
##
## Residuals:
## Min 1Q Median 3Q Max
## -175.797 -28.656 -2.515 22.414 161.738
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10.818 68.320 -0.158 0.874368
## calc 24.647 6.848 3.599 0.000413 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 43.39 on 179 degrees of freedom
## Multiple R-squared: 0.06749, Adjusted R-squared: 0.06228
## F-statistic: 12.95 on 1 and 179 DF, p-value: 0.0004129
reg2 <- lm(chol ~ uric, data=my_data_clear)
summary(reg2)
##
## Call:
## lm(formula = chol ~ uric, data = my_data_clear)
##
## Residuals:
## Min 1Q Median 3Q Max
## -164.337 -33.039 1.034 28.726 146.343
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 179.153 13.866 12.920 < 2e-16 ***
## uric 11.728 2.843 4.125 5.68e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 42.94 on 179 degrees of freedom
## Multiple R-squared: 0.0868, Adjusted R-squared: 0.0817
## F-statistic: 17.01 on 1 and 179 DF, p-value: 5.676e-05
reg3 <- lm(chol ~ alb, data=my_data_clear)
summary(reg3)
##
## Call:
## lm(formula = chol ~ alb, data = my_data_clear)
##
## Residuals:
## Min 1Q Median 3Q Max
## -181.822 -34.326 0.055 25.674 150.674
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 196.174 38.511 5.094 8.84e-07 ***
## alb 9.381 9.315 1.007 0.315
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 44.81 on 179 degrees of freedom
## Multiple R-squared: 0.005634, Adjusted R-squared: 7.886e-05
## F-statistic: 1.014 on 1 and 179 DF, p-value: 0.3153
reg4 <- lm(chol ~ bmi, data=my_data_clear)
summary(reg4)
##
## Call:
## lm(formula = chol ~ bmi, data = my_data_clear)
##
## Residuals:
## Min 1Q Median 3Q Max
## -186.199 -32.990 1.155 25.727 154.229
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 187.422 24.267 7.723 7.76e-13 ***
## bmi 2.139 1.085 1.971 0.0502 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 44.45 on 179 degrees of freedom
## Multiple R-squared: 0.02125, Adjusted R-squared: 0.01578
## F-statistic: 3.886 on 1 and 179 DF, p-value: 0.05024
my_data_clear$age_groups <- as.factor(my_data_clear$age_groups)
reg5 <- lm(chol ~ age_groups, data=my_data_clear)
summary(reg5)
##
## Call:
## lm(formula = chol ~ age_groups, data = my_data_clear)
##
## Residuals:
## Min 1Q Median 3Q Max
## -170.384 -26.589 -0.384 28.411 128.411
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 220.384 4.823 45.698 < 2e-16 ***
## age_groups2 5.847 7.477 0.782 0.435
## age_groups3 41.206 7.319 5.630 6.92e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 41.2 on 178 degrees of freedom
## Multiple R-squared: 0.1638, Adjusted R-squared: 0.1544
## F-statistic: 17.43 on 2 and 178 DF, p-value: 1.223e-07
reg6 <- lm(chol ~ pill, data=my_data_clear)
summary(reg6)
##
## Call:
## lm(formula = chol ~ pill, data = my_data_clear)
##
## Residuals:
## Min 1Q Median 3Q Max
## -187.641 -32.641 -1.888 25.359 152.359
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 231.888 4.753 48.788 <2e-16 ***
## pill2 5.754 6.667 0.863 0.389
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 44.84 on 179 degrees of freedom
## Multiple R-squared: 0.004144, Adjusted R-squared: -0.001419
## F-statistic: 0.7449 on 1 and 179 DF, p-value: 0.3893
reg_f <- lm(chol ~ calc + uric + age_groups, data=my_data_clear)
summary(reg_f)
##
## Call:
## lm(formula = chol ~ calc + uric + age_groups, data = my_data_clear)
##
## Residuals:
## Min 1Q Median 3Q Max
## -151.299 -24.637 -0.645 23.571 132.992
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -14.629 62.786 -0.233 0.81604
## calc 20.369 6.329 3.218 0.00154 **
## uric 6.795 2.761 2.461 0.01480 *
## age_groups2 9.922 7.201 1.378 0.16997
## age_groups3 36.685 7.169 5.117 8.06e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 39.35 on 176 degrees of freedom
## Multiple R-squared: 0.246, Adjusted R-squared: 0.2289
## F-statistic: 14.36 on 4 and 176 DF, p-value: 3.649e-10
vif(reg_f)
## GVIF Df GVIF^(1/(2*Df))
## calc 1.038918 1 1.019273
## uric 1.122662 1 1.059557
## age_groups 1.106847 2 1.025704
confint(reg_f)
## 2.5 % 97.5 %
## (Intercept) -138.539947 109.28259
## calc 7.877583 32.86033
## uric 1.346797 12.24336
## age_groups2 -4.288745 24.13314
## age_groups3 22.536945 50.83236
plot(reg_f, 1)
plot(reg_f, 2)
ggplot(my_data_clear, aes(x = calc, y = chol, color = as.factor(pill))) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", se = FALSE, formula = y ~ x) +
labs(
x = "Calcium Intake (calc)",
y = "Cholesterol Levels (chol)",
color = "Pill Usage"
) +
theme_minimal()
int_model <- lm(chol ~ calc * pill + uric + age_groups , data=my_data_clear)
summary(int_model)
##
## Call:
## lm(formula = chol ~ calc * pill + uric + age_groups, data = my_data_clear)
##
## Residuals:
## Min 1Q Median 3Q Max
## -156.414 -26.868 -0.779 23.566 128.207
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -82.585 86.514 -0.955 0.34111
## calc 26.886 8.669 3.101 0.00225 **
## pill2 135.262 125.962 1.074 0.28438
## uric 6.788 2.760 2.459 0.01491 *
## age_groups2 9.211 7.234 1.273 0.20458
## age_groups3 37.019 7.195 5.145 7.15e-07 ***
## calc:pill2 -12.993 12.630 -1.029 0.30503
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 39.34 on 174 degrees of freedom
## Multiple R-squared: 0.2548, Adjusted R-squared: 0.2291
## F-statistic: 9.914 on 6 and 174 DF, p-value: 2.104e-09
confint(int_model)
## 2.5 % 97.5 %
## (Intercept) -253.337273 88.16683
## calc 9.776345 43.99648
## pill2 -113.348108 383.87267
## uric 1.339882 12.23640
## age_groups2 -5.065637 23.48797
## age_groups3 22.819554 51.21941
## calc:pill2 -37.921780 11.93491
my_data_clear$chol_level <- ifelse(my_data_clear$chol< 240, 0, 1)
my_data_clear$chol_level <- as.factor(my_data_clear$chol_level)
table(my_data_clear$chol_level)
##
## 0 1
## 96 85
#logistic regression model
reg_n <- glm(chol_level ~ calc + uric + pill + height + weight + alb + age_groups, data=my_data_clear, family = binomial)
new_individual <- data.frame(
age_groups = factor(2, levels = c(1, 2, 3)), #as the individual is aged 33
height = 67,
weight = 160,
pill = factor(1, levels = c(1, 2)), #not a pill user
alb = 4,
calc = 9.3,
uric = 4.8
)
pred <- predict(reg_n, newdata = new_individual, type = "response", se.fit = TRUE)
#Predicted probability
pred_prob <- pred$fit
# Standard error
se_fit <- pred$se.fit
z_value <- qnorm(0.975)
# Calculate the confidence interval
lower_bound <- pred_prob - z_value * se_fit
upper_bound <- pred_prob + z_value * se_fit
# Print the results
cat("Predicted probability: ", round(pred_prob, 4), "\n")
## Predicted probability: 0.1313
cat("95% Confidence Interval: (", round(lower_bound, 4), ", ", round(upper_bound, 4), ")\n")
## 95% Confidence Interval: ( 7e-04 , 0.2618 )
reg_n_1 <- glm(chol_level ~ calc + pill + height + weight + alb + age_groups, data=my_data_clear, family = binomial)
summary(reg_n)
##
## Call:
## glm(formula = chol_level ~ calc + uric + pill + height + weight +
## alb + age_groups, family = binomial, data = my_data_clear)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -8.624339 5.550406 -1.554 0.120227
## calc 1.957410 0.470958 4.156 3.24e-05 ***
## uric 0.339832 0.181049 1.877 0.060516 .
## pill2 0.264791 0.365375 0.725 0.468630
## height -0.209260 0.085092 -2.459 0.013924 *
## weight 0.008943 0.011271 0.793 0.427529
## alb -0.265945 0.591903 -0.449 0.653211
## age_groups2 0.552956 0.435053 1.271 0.203726
## age_groups3 1.605175 0.438548 3.660 0.000252 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 250.25 on 180 degrees of freedom
## Residual deviance: 197.58 on 172 degrees of freedom
## AIC: 215.58
##
## Number of Fisher Scoring iterations: 4
summary(reg_n_1)
##
## Call:
## glm(formula = chol_level ~ calc + pill + height + weight + alb +
## age_groups, family = binomial, data = my_data_clear)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -8.27185 5.42697 -1.524 0.1275
## calc 1.98570 0.46234 4.295 1.75e-05 ***
## pill2 0.25489 0.36151 0.705 0.4808
## height -0.21166 0.08396 -2.521 0.0117 *
## weight 0.01444 0.01089 1.327 0.1846
## alb -0.16456 0.58386 -0.282 0.7781
## age_groups2 0.45235 0.42633 1.061 0.2887
## age_groups3 1.69318 0.43199 3.919 8.87e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 250.25 on 180 degrees of freedom
## Residual deviance: 201.38 on 173 degrees of freedom
## AIC: 217.38
##
## Number of Fisher Scoring iterations: 4
stepwise_model <- step(reg_n, direction = "backward")
## Start: AIC=215.58
## chol_level ~ calc + uric + pill + height + weight + alb + age_groups
##
## Df Deviance AIC
## - alb 1 197.79 213.79
## - pill 1 198.11 214.11
## - weight 1 198.22 214.22
## <none> 197.59 215.59
## - uric 1 201.38 217.38
## - height 1 204.02 220.02
## - age_groups 2 212.13 226.13
## - calc 1 217.71 233.71
##
## Step: AIC=213.79
## chol_level ~ calc + uric + pill + height + weight + age_groups
##
## Df Deviance AIC
## - pill 1 198.54 212.54
## - weight 1 198.78 212.78
## <none> 197.79 213.79
## - uric 1 201.46 215.46
## - height 1 204.69 218.69
## - age_groups 2 212.45 224.45
## - calc 1 220.51 234.51
##
## Step: AIC=212.54
## chol_level ~ calc + uric + height + weight + age_groups
##
## Df Deviance AIC
## - weight 1 199.53 211.53
## <none> 198.54 212.54
## - uric 1 202.12 214.12
## - height 1 204.90 216.90
## - age_groups 2 213.24 223.24
## - calc 1 220.72 232.72
##
## Step: AIC=211.53
## chol_level ~ calc + uric + height + age_groups
##
## Df Deviance AIC
## <none> 199.53 211.53
## - uric 1 204.37 214.37
## - height 1 204.93 214.93
## - age_groups 2 216.32 224.32
## - calc 1 221.35 231.35
summary(stepwise_model)
##
## Call:
## glm(formula = chol_level ~ calc + uric + height + age_groups,
## family = binomial, data = my_data_clear)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -9.88823 5.40847 -1.828 0.0675 .
## calc 1.81215 0.42167 4.298 1.73e-05 ***
## uric 0.36604 0.17260 2.121 0.0339 *
## height -0.16646 0.07388 -2.253 0.0243 *
## age_groups2 0.58193 0.42922 1.356 0.1752
## age_groups3 1.68292 0.43018 3.912 9.15e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 250.25 on 180 degrees of freedom
## Residual deviance: 199.53 on 175 degrees of freedom
## AIC: 211.53
##
## Number of Fisher Scoring iterations: 4
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: aarch64-apple-darwin20
## Running under: macOS 15.0.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Asia/Nicosia
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] openxlsx_4.2.7.1 readxl_1.4.3 corrplot_0.95 skimr_2.1.5
## [5] gmodels_2.19.1 mctest_1.3.1 Hmisc_5.2-1 car_3.1-3
## [9] carData_3.0-5 GGally_2.2.1 broom_1.0.7 naniar_1.1.0
## [13] lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1 purrr_1.0.2
## [17] readr_2.1.5 tidyr_1.3.1 tibble_3.2.1 ggplot2_3.5.1
## [21] tidyverse_2.0.0 dplyr_1.1.4
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.1 farver_2.1.2 fastmap_1.2.0 digest_0.6.37
## [5] rpart_4.1.23 timechange_0.3.0 lifecycle_1.0.4 cluster_2.1.6
## [9] gdata_3.0.1 magrittr_2.0.3 compiler_4.4.1 rlang_1.1.4
## [13] sass_0.4.9 tools_4.4.1 utf8_1.2.4 yaml_2.3.10
## [17] data.table_1.16.2 knitr_1.48 labeling_0.4.3 htmlwidgets_1.6.4
## [21] plyr_1.8.9 repr_1.1.7 RColorBrewer_1.1-3 abind_1.4-8
## [25] withr_3.0.1 foreign_0.8-86 nnet_7.3-19 grid_4.4.1
## [29] fansi_1.0.6 colorspace_2.1-1 scales_1.3.0 gtools_3.9.5
## [33] MASS_7.3-61 cli_3.6.3 rmarkdown_2.28 generics_0.1.3
## [37] rstudioapi_0.16.0 tzdb_0.4.0 cachem_1.1.0 splines_4.4.1
## [41] cellranger_1.1.0 base64enc_0.1-3 vctrs_0.6.5 Matrix_1.7-0
## [45] jsonlite_1.8.8 hms_1.1.3 visdat_0.6.0 Formula_1.2-5
## [49] htmlTable_2.4.3 jquerylib_0.1.4 glue_1.7.0 ggstats_0.7.0
## [53] stringi_1.8.4 gtable_0.3.5 munsell_0.5.1 pillar_1.9.0
## [57] htmltools_0.5.8.1 R6_2.5.1 lattice_0.22-6 evaluate_1.0.0
## [61] highr_0.11 backports_1.5.0 bslib_0.8.0 Rcpp_1.0.13
## [65] zip_2.3.1 nlme_3.1-164 gridExtra_2.3 checkmate_2.3.2
## [69] mgcv_1.9-1 xfun_0.47 pkgconfig_2.0.3