Biostatistics

R Markdown

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   3.5.1     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(naniar)
library(readr)
library(ggplot2)
library(broom)
library(GGally)

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

library(tidyr)
library(car)

## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:purrr':
## 
##     some
## 
## The following object is masked from 'package:dplyr':
## 
##     recode

library(Hmisc)

## 
## Attaching package: 'Hmisc'
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units

library(mctest)
library(gmodels)
library(skimr)

## 
## Attaching package: 'skimr'
## 
## The following object is masked from 'package:naniar':
## 
##     n_complete

library(corrplot)

## corrplot 0.95 loaded

library(readxl)
library(openxlsx)

my_data <- read_excel("/Users/anna/Downloads/DataExam3Fall2024.xls")
View(my_data)

my_data$alb <- na_if(my_data$alb, 99)
my_data$calc <- na_if(my_data$calc, 99)
my_data$uric <- na_if(my_data$uric, 99)
my_data$height <- na_if(my_data$height, 999)
my_data$weight <- na_if(my_data$weight, 999)

pct_complete(my_data)

## [1] 99.40898

summary(my_data)

##        id              age            height          weight           pill    
##  Min.   :   3.0   Min.   :19.00   Min.   :57.00   Min.   : 94.0   Min.   :1.0  
##  1st Qu.: 563.2   1st Qu.:25.00   1st Qu.:63.00   1st Qu.:118.0   1st Qu.:1.0  
##  Median :1742.5   Median :32.00   Median :64.50   Median :130.0   Median :1.5  
##  Mean   :1599.0   Mean   :33.82   Mean   :64.51   Mean   :131.7   Mean   :1.5  
##  3rd Qu.:2454.5   3rd Qu.:42.00   3rd Qu.:66.00   3rd Qu.:140.0   3rd Qu.:2.0  
##  Max.   :3519.0   Max.   :55.00   Max.   :71.00   Max.   :215.0   Max.   :2.0  
##                                   NA's   :2       NA's   :2                    
##       chol            alb             calc             uric      
##  Min.   : 50.0   Min.   :3.200   Min.   : 8.600   Min.   :2.200  
##  1st Qu.:200.0   1st Qu.:3.900   1st Qu.: 9.600   1st Qu.:4.000  
##  Median :235.0   Median :4.100   Median :10.000   Median :4.600  
##  Mean   :237.1   Mean   :4.111   Mean   : 9.962   Mean   :4.771  
##  3rd Qu.:260.0   3rd Qu.:4.400   3rd Qu.:10.300   3rd Qu.:5.300  
##  Max.   :600.0   Max.   :5.000   Max.   :11.100   Max.   :9.900  
##                  NA's   :2       NA's   :3        NA's   :1

dim(my_data)

## [1] 188   9

my_data_clear <- na.omit(my_data)
View(my_data_clear)
dim(my_data_clear)

## [1] 181   9

my_data_clear$age <- as.numeric(my_data_clear$age)
my_data_clear$age_groups <- ifelse(my_data_clear$age < 30, 1, 
                        ifelse(my_data_clear$age >= 40 , 3 , 2))
my_data_clear$age_groups <- as.factor(my_data_clear$age_groups)

View(my_data_clear)

my_data_clear$pill <- as.factor(my_data_clear$pill)
table(my_data_clear$pill)

## 
##  1  2 
## 89 92

#Pie chart for Pill taking distribution 
pie(table(my_data_clear$pill),
    main = "Pill Distribution",
    col = c("pink", "lightgreen"))

table(my_data_clear$age_groups)

## 
##  1  2  3 
## 73 52 56

#Pie chart for Age  distribution 
pie(table(my_data_clear$age_groups),
    main = "Age Distribution",
    col = c("blue", "lightgreen", "red"))

ggplot(my_data_clear, aes(y = height)) +
  geom_boxplot(fill = "skyblue", color = "black") +
  ylim(55,75)+
  labs(title = "Boxplot of Heights", 
       y = "Height", 
       x = "") +
  theme_minimal()

ggplot(my_data_clear, aes(x = height)) +
  geom_histogram(binwidth = 2, fill = "lightgreen", color = "black", alpha = 0.9) +  

  labs(title = "Distribution of Height", x = "Height", y = "Frequency") + 
  theme_minimal() +  
  theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),  
        axis.title = element_text(size = 12))

#summary 
summary(my_data_clear$height)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   57.00   63.00   64.00   64.49   66.00   71.00

stats <- boxplot.stats(my_data_clear$height)
# Outliers
outliers <- stats$out
# Print outliers
print(paste("Outliers:", paste(outliers, collapse = ", ")))

## [1] "Outliers: 57, 58, 71, 71"

sd_height <- sd(my_data_clear$height)
print(paste("Standard Deviation:", round(sd_height, 2)))

## [1] "Standard Deviation: 2.49"

ggplot(my_data_clear, aes(y = weight)) +
  geom_boxplot(fill = "skyblue", color = "black") +
  labs(title = "Boxplot of Weights", 
       y = "Weight", 
       x = "") +
  theme_minimal()

ggplot(my_data_clear, aes(x = weight)) +
  geom_histogram(binwidth = 10, fill = "lightgreen", color = "black", alpha = 0.9) +  

  labs(title = "Distribution of Weight", x = "Weight", y = "Frequency") + 
  theme_minimal() +  
  theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),  
        axis.title = element_text(size = 12))

#summary 
summary(my_data_clear$weight)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    94.0   118.0   129.0   131.2   140.0   215.0

stats2 <- boxplot.stats(my_data_clear$weight)
# Outliers
outliers2 <- stats2$out
# Print outliers
print(paste("Outliers:", paste(outliers2, collapse = ", ")))

## [1] "Outliers: 175, 180, 203, 195, 215, 180, 190"

sd_weight <- sd(my_data_clear$weight)
print(paste("Standard Deviation:", round(sd_weight, 2)))

## [1] "Standard Deviation: 20.49"

ggplot(my_data_clear, aes(y = chol)) +
  geom_boxplot(fill = "skyblue", color = "black") +
  labs(title = "Boxplot of Cholesterol", 
       y = "Cholesterol", 
       x = "") +
  theme_minimal()

ggplot(my_data_clear, aes(x = chol)) +
  geom_histogram(binwidth = 20, fill = "lightgreen", color = "black", alpha = 0.9) +  

  labs(title = "Distribution of Cholesterol", x = "Cholesterol", y = "Frequency") + 
  theme_minimal() +  
  theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),  
        axis.title = element_text(size = 12))

#summary 
summary(my_data_clear$chol)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    50.0   200.0   235.0   234.8   260.0   390.0

stats3 <- boxplot.stats(my_data_clear$chol)
# Outliers
outliers3 <- stats3$out
# Print outliers
print(paste("Outliers:", paste(outliers3, collapse = ", ")))

## [1] "Outliers: 50, 390"

sd_chol <- sd(my_data_clear$chol)
print(paste("Standard Deviation:", round(sd_chol, 2)))

## [1] "Standard Deviation: 44.81"

ggplot(my_data_clear, aes(y = alb)) +
  geom_boxplot(fill = "skyblue", color = "black") +
  labs(title = "Boxplot of Albumin", 
       y = "Albumin", 
       x = "") +
  theme_minimal()

ggplot(my_data_clear, aes(x = alb)) +
  geom_histogram(binwidth = 0.2, fill = "lightgreen", color = "black", alpha = 0.9) +  

  labs(title = "Distribution of Albumin", x = "Albumin", y = "Frequency") + 
  theme_minimal() +  
  theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),  
        axis.title = element_text(size = 12))

#summary 
summary(my_data_clear$alb)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   3.200   3.900   4.100   4.119   4.400   5.000

stats4 <- boxplot.stats(my_data_clear$alb)
# Outliers
outliers4 <- stats4$out
# Print outliers
print(paste("Outliers:", paste(outliers4, collapse = ", ")))

## [1] "Outliers: "

sd_alb <- sd(my_data_clear$alb)
print(paste("Standard Deviation:", round(sd_alb, 2)))

## [1] "Standard Deviation: 0.36"

ggplot(my_data_clear, aes(y = calc)) +
  geom_boxplot(fill = "skyblue", color = "black") +
  labs(title = "Boxplot of Calcium", 
       y = "Calcium", 
       x = "") +
  theme_minimal()

ggplot(my_data_clear, aes(x = calc)) +
  geom_histogram(binwidth = 0.2, fill = "lightgreen", color = "black", alpha = 0.9) +  

  labs(title = "Distribution of Calcium", x = "Calcium", y = "Frequency") + 
  theme_minimal() +  
  theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),  
        axis.title = element_text(size = 12))

#summary 
summary(my_data_clear$calc)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   8.800   9.600   9.900   9.966  10.300  11.100

stats5 <- boxplot.stats(my_data_clear$calc)
# Outliers
outliers5 <- stats5$out
# Print outliers
print(paste("Outliers:", paste(outliers5, collapse = ", ")))

## [1] "Outliers: "

sd_calc <- sd(my_data_clear$calc)
print(paste("Standard Deviation:", round(sd_calc, 2)))

## [1] "Standard Deviation: 0.47"

ggplot(my_data_clear, aes(y = uric)) +
  geom_boxplot(fill = "skyblue", color = "black") +
  labs(title = "Boxplot of Uric acid", 
       y = "Uric acid", 
       x = "") +
  theme_minimal()

ggplot(my_data_clear, aes(x = uric)) +
  geom_histogram(binwidth = 0.5, fill = "lightgreen", color = "black", alpha = 0.9) +  

  labs(title = "Distribution of Uric acid", x = "Uric acid", y = "Frequency") + 
  theme_minimal() +  
  theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),  
        axis.title = element_text(size = 12))

#summary 
summary(my_data_clear$uric)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.200   4.000   4.600   4.746   5.300   9.900

stats6 <- boxplot.stats(my_data_clear$uric)
# Outliers
outliers6 <- stats6$out
# Print outliers
print(paste("Outliers:", paste(outliers6, collapse = ", ")))

## [1] "Outliers: 8.3, 7.5, 8.4, 9.9, 8.5, 7.8"

sd_uric <- sd(my_data_clear$uric)
print(paste("Standard Deviation:", round(sd_uric, 2)))

## [1] "Standard Deviation: 1.13"

my_data_clear$bmi <- round(my_data_clear$weight * 703 / (my_data_clear$height^2), 1)

my_data_clear$bmi_groups <- ifelse(my_data_clear$bmi < 18.5, "Underweight", 
                        ifelse(my_data_clear$bmi >= 30, "Overweight" , "Normal"))

my_data_clear$bmi_groups <- as.factor(my_data_clear$bmi_groups)

table(my_data_clear$bmi_groups)

## 
##      Normal  Overweight Underweight 
##         166           6           9

ggplot(my_data_clear, aes(y = bmi)) +
  geom_boxplot(fill = "skyblue", color = "black") +
  labs(title = "Boxplot of BMI", 
       y = "BMI", 
       x = "") +
  theme_minimal()

ggplot(my_data_clear, aes(x = bmi)) +
  geom_histogram(binwidth = 2, fill = "lightgreen", color = "black", alpha = 0.9) +  

  labs(title = "Distribution of BMI", x = "BMI", y = "Frequency") + 
  theme_minimal() +  
  theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),  
        axis.title = element_text(size = 12))

#summary 
summary(my_data_clear$bmi)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   17.70   20.00   21.60   22.15   23.20   34.50

stats7 <- boxplot.stats(my_data_clear$bmi)
# Outliers
outliers7 <- stats7$out
# Print outliers
print(paste("Outliers:", paste(outliers7, collapse = ", ")))

## [1] "Outliers: 28.3, 30.9, 30.9, 28.3, 28.3, 34.5, 32.7, 28.3, 28.2, 31.6, 33.2"

sd_bmi <- sd(my_data_clear$bmi)
print(paste("Standard Deviation:", round(sd_bmi, 2)))

## [1] "Standard Deviation: 3.05"

222222222222222222________##################

reg1 <- lm(chol ~ calc, data=my_data_clear)
summary(reg1)

## 
## Call:
## lm(formula = chol ~ calc, data = my_data_clear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -175.797  -28.656   -2.515   22.414  161.738 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -10.818     68.320  -0.158 0.874368    
## calc          24.647      6.848   3.599 0.000413 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 43.39 on 179 degrees of freedom
## Multiple R-squared:  0.06749,    Adjusted R-squared:  0.06228 
## F-statistic: 12.95 on 1 and 179 DF,  p-value: 0.0004129

reg2 <- lm(chol ~ uric, data=my_data_clear)
summary(reg2)

## 
## Call:
## lm(formula = chol ~ uric, data = my_data_clear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -164.337  -33.039    1.034   28.726  146.343 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  179.153     13.866  12.920  < 2e-16 ***
## uric          11.728      2.843   4.125 5.68e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 42.94 on 179 degrees of freedom
## Multiple R-squared:  0.0868, Adjusted R-squared:  0.0817 
## F-statistic: 17.01 on 1 and 179 DF,  p-value: 5.676e-05

reg3 <- lm(chol ~ alb, data=my_data_clear)
summary(reg3)

## 
## Call:
## lm(formula = chol ~ alb, data = my_data_clear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -181.822  -34.326    0.055   25.674  150.674 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  196.174     38.511   5.094 8.84e-07 ***
## alb            9.381      9.315   1.007    0.315    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 44.81 on 179 degrees of freedom
## Multiple R-squared:  0.005634,   Adjusted R-squared:  7.886e-05 
## F-statistic: 1.014 on 1 and 179 DF,  p-value: 0.3153

reg4 <- lm(chol ~ bmi, data=my_data_clear)
summary(reg4)

## 
## Call:
## lm(formula = chol ~ bmi, data = my_data_clear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -186.199  -32.990    1.155   25.727  154.229 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  187.422     24.267   7.723 7.76e-13 ***
## bmi            2.139      1.085   1.971   0.0502 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 44.45 on 179 degrees of freedom
## Multiple R-squared:  0.02125,    Adjusted R-squared:  0.01578 
## F-statistic: 3.886 on 1 and 179 DF,  p-value: 0.05024

my_data_clear$age_groups <- as.factor(my_data_clear$age_groups)

reg5 <- lm(chol ~ age_groups, data=my_data_clear)
summary(reg5)

## 
## Call:
## lm(formula = chol ~ age_groups, data = my_data_clear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -170.384  -26.589   -0.384   28.411  128.411 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  220.384      4.823  45.698  < 2e-16 ***
## age_groups2    5.847      7.477   0.782    0.435    
## age_groups3   41.206      7.319   5.630 6.92e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 41.2 on 178 degrees of freedom
## Multiple R-squared:  0.1638, Adjusted R-squared:  0.1544 
## F-statistic: 17.43 on 2 and 178 DF,  p-value: 1.223e-07

reg6 <- lm(chol ~ pill, data=my_data_clear)
summary(reg6)

## 
## Call:
## lm(formula = chol ~ pill, data = my_data_clear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -187.641  -32.641   -1.888   25.359  152.359 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  231.888      4.753  48.788   <2e-16 ***
## pill2          5.754      6.667   0.863    0.389    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 44.84 on 179 degrees of freedom
## Multiple R-squared:  0.004144,   Adjusted R-squared:  -0.001419 
## F-statistic: 0.7449 on 1 and 179 DF,  p-value: 0.3893

reg_f <- lm(chol ~ calc + uric + age_groups, data=my_data_clear)
summary(reg_f)

## 
## Call:
## lm(formula = chol ~ calc + uric + age_groups, data = my_data_clear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -151.299  -24.637   -0.645   23.571  132.992 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -14.629     62.786  -0.233  0.81604    
## calc          20.369      6.329   3.218  0.00154 ** 
## uric           6.795      2.761   2.461  0.01480 *  
## age_groups2    9.922      7.201   1.378  0.16997    
## age_groups3   36.685      7.169   5.117 8.06e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 39.35 on 176 degrees of freedom
## Multiple R-squared:  0.246,  Adjusted R-squared:  0.2289 
## F-statistic: 14.36 on 4 and 176 DF,  p-value: 3.649e-10

vif(reg_f)

##                GVIF Df GVIF^(1/(2*Df))
## calc       1.038918  1        1.019273
## uric       1.122662  1        1.059557
## age_groups 1.106847  2        1.025704

confint(reg_f)

##                   2.5 %    97.5 %
## (Intercept) -138.539947 109.28259
## calc           7.877583  32.86033
## uric           1.346797  12.24336
## age_groups2   -4.288745  24.13314
## age_groups3   22.536945  50.83236

plot(reg_f, 1)

plot(reg_f, 2)

ggplot(my_data_clear, aes(x = calc, y = chol, color = as.factor(pill))) +
  geom_point(alpha = 0.6) +  
  geom_smooth(method = "lm", se = FALSE, formula = y ~ x) + 
  labs(
    x = "Calcium Intake (calc)",
    y = "Cholesterol Levels (chol)",
    color = "Pill Usage"
  ) +
  theme_minimal()

int_model <- lm(chol ~ calc * pill + uric + age_groups , data=my_data_clear)
summary(int_model)

## 
## Call:
## lm(formula = chol ~ calc * pill + uric + age_groups, data = my_data_clear)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -156.414  -26.868   -0.779   23.566  128.207 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -82.585     86.514  -0.955  0.34111    
## calc          26.886      8.669   3.101  0.00225 ** 
## pill2        135.262    125.962   1.074  0.28438    
## uric           6.788      2.760   2.459  0.01491 *  
## age_groups2    9.211      7.234   1.273  0.20458    
## age_groups3   37.019      7.195   5.145 7.15e-07 ***
## calc:pill2   -12.993     12.630  -1.029  0.30503    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 39.34 on 174 degrees of freedom
## Multiple R-squared:  0.2548, Adjusted R-squared:  0.2291 
## F-statistic: 9.914 on 6 and 174 DF,  p-value: 2.104e-09

confint(int_model)

##                   2.5 %    97.5 %
## (Intercept) -253.337273  88.16683
## calc           9.776345  43.99648
## pill2       -113.348108 383.87267
## uric           1.339882  12.23640
## age_groups2   -5.065637  23.48797
## age_groups3   22.819554  51.21941
## calc:pill2   -37.921780  11.93491

my_data_clear$chol_level <- ifelse(my_data_clear$chol< 240, 0, 1)
my_data_clear$chol_level <- as.factor(my_data_clear$chol_level)

table(my_data_clear$chol_level)

## 
##  0  1 
## 96 85

#logistic regression model
reg_n <- glm(chol_level ~ calc + uric + pill + height + weight + alb + age_groups, data=my_data_clear, family = binomial)

new_individual <- data.frame(
  age_groups = factor(2, levels = c(1, 2, 3)),  #as the individual is aged 33
  height = 67,
  weight = 160,
  pill = factor(1, levels = c(1, 2)), #not a pill user
  alb = 4,
  calc = 9.3,
  uric = 4.8
)

pred <- predict(reg_n, newdata = new_individual, type = "response", se.fit = TRUE)

#Predicted probability
pred_prob <- pred$fit

# Standard error
se_fit <- pred$se.fit

z_value <- qnorm(0.975)

# Calculate the confidence interval
lower_bound <- pred_prob - z_value * se_fit
upper_bound <- pred_prob + z_value * se_fit

# Print the results
cat("Predicted probability: ", round(pred_prob, 4), "\n")

## Predicted probability:  0.1313

cat("95% Confidence Interval: (", round(lower_bound, 4), ", ", round(upper_bound, 4), ")\n")

## 95% Confidence Interval: ( 7e-04 ,  0.2618 )

reg_n_1 <- glm(chol_level ~ calc + pill + height + weight + alb + age_groups, data=my_data_clear, family = binomial)
summary(reg_n)

## 
## Call:
## glm(formula = chol_level ~ calc + uric + pill + height + weight + 
##     alb + age_groups, family = binomial, data = my_data_clear)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -8.624339   5.550406  -1.554 0.120227    
## calc         1.957410   0.470958   4.156 3.24e-05 ***
## uric         0.339832   0.181049   1.877 0.060516 .  
## pill2        0.264791   0.365375   0.725 0.468630    
## height      -0.209260   0.085092  -2.459 0.013924 *  
## weight       0.008943   0.011271   0.793 0.427529    
## alb         -0.265945   0.591903  -0.449 0.653211    
## age_groups2  0.552956   0.435053   1.271 0.203726    
## age_groups3  1.605175   0.438548   3.660 0.000252 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 250.25  on 180  degrees of freedom
## Residual deviance: 197.58  on 172  degrees of freedom
## AIC: 215.58
## 
## Number of Fisher Scoring iterations: 4

summary(reg_n_1)

## 
## Call:
## glm(formula = chol_level ~ calc + pill + height + weight + alb + 
##     age_groups, family = binomial, data = my_data_clear)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -8.27185    5.42697  -1.524   0.1275    
## calc         1.98570    0.46234   4.295 1.75e-05 ***
## pill2        0.25489    0.36151   0.705   0.4808    
## height      -0.21166    0.08396  -2.521   0.0117 *  
## weight       0.01444    0.01089   1.327   0.1846    
## alb         -0.16456    0.58386  -0.282   0.7781    
## age_groups2  0.45235    0.42633   1.061   0.2887    
## age_groups3  1.69318    0.43199   3.919 8.87e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 250.25  on 180  degrees of freedom
## Residual deviance: 201.38  on 173  degrees of freedom
## AIC: 217.38
## 
## Number of Fisher Scoring iterations: 4

stepwise_model <- step(reg_n, direction = "backward")

## Start:  AIC=215.58
## chol_level ~ calc + uric + pill + height + weight + alb + age_groups
## 
##              Df Deviance    AIC
## - alb         1   197.79 213.79
## - pill        1   198.11 214.11
## - weight      1   198.22 214.22
## <none>            197.59 215.59
## - uric        1   201.38 217.38
## - height      1   204.02 220.02
## - age_groups  2   212.13 226.13
## - calc        1   217.71 233.71
## 
## Step:  AIC=213.79
## chol_level ~ calc + uric + pill + height + weight + age_groups
## 
##              Df Deviance    AIC
## - pill        1   198.54 212.54
## - weight      1   198.78 212.78
## <none>            197.79 213.79
## - uric        1   201.46 215.46
## - height      1   204.69 218.69
## - age_groups  2   212.45 224.45
## - calc        1   220.51 234.51
## 
## Step:  AIC=212.54
## chol_level ~ calc + uric + height + weight + age_groups
## 
##              Df Deviance    AIC
## - weight      1   199.53 211.53
## <none>            198.54 212.54
## - uric        1   202.12 214.12
## - height      1   204.90 216.90
## - age_groups  2   213.24 223.24
## - calc        1   220.72 232.72
## 
## Step:  AIC=211.53
## chol_level ~ calc + uric + height + age_groups
## 
##              Df Deviance    AIC
## <none>            199.53 211.53
## - uric        1   204.37 214.37
## - height      1   204.93 214.93
## - age_groups  2   216.32 224.32
## - calc        1   221.35 231.35

summary(stepwise_model)

## 
## Call:
## glm(formula = chol_level ~ calc + uric + height + age_groups, 
##     family = binomial, data = my_data_clear)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -9.88823    5.40847  -1.828   0.0675 .  
## calc         1.81215    0.42167   4.298 1.73e-05 ***
## uric         0.36604    0.17260   2.121   0.0339 *  
## height      -0.16646    0.07388  -2.253   0.0243 *  
## age_groups2  0.58193    0.42922   1.356   0.1752    
## age_groups3  1.68292    0.43018   3.912 9.15e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 250.25  on 180  degrees of freedom
## Residual deviance: 199.53  on 175  degrees of freedom
## AIC: 211.53
## 
## Number of Fisher Scoring iterations: 4

sessionInfo()

## R version 4.4.1 (2024-06-14)
## Platform: aarch64-apple-darwin20
## Running under: macOS 15.0.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Asia/Nicosia
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] openxlsx_4.2.7.1 readxl_1.4.3     corrplot_0.95    skimr_2.1.5     
##  [5] gmodels_2.19.1   mctest_1.3.1     Hmisc_5.2-1      car_3.1-3       
##  [9] carData_3.0-5    GGally_2.2.1     broom_1.0.7      naniar_1.1.0    
## [13] lubridate_1.9.3  forcats_1.0.0    stringr_1.5.1    purrr_1.0.2     
## [17] readr_2.1.5      tidyr_1.3.1      tibble_3.2.1     ggplot2_3.5.1   
## [21] tidyverse_2.0.0  dplyr_1.1.4     
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.1   farver_2.1.2       fastmap_1.2.0      digest_0.6.37     
##  [5] rpart_4.1.23       timechange_0.3.0   lifecycle_1.0.4    cluster_2.1.6     
##  [9] gdata_3.0.1        magrittr_2.0.3     compiler_4.4.1     rlang_1.1.4       
## [13] sass_0.4.9         tools_4.4.1        utf8_1.2.4         yaml_2.3.10       
## [17] data.table_1.16.2  knitr_1.48         labeling_0.4.3     htmlwidgets_1.6.4 
## [21] plyr_1.8.9         repr_1.1.7         RColorBrewer_1.1-3 abind_1.4-8       
## [25] withr_3.0.1        foreign_0.8-86     nnet_7.3-19        grid_4.4.1        
## [29] fansi_1.0.6        colorspace_2.1-1   scales_1.3.0       gtools_3.9.5      
## [33] MASS_7.3-61        cli_3.6.3          rmarkdown_2.28     generics_0.1.3    
## [37] rstudioapi_0.16.0  tzdb_0.4.0         cachem_1.1.0       splines_4.4.1     
## [41] cellranger_1.1.0   base64enc_0.1-3    vctrs_0.6.5        Matrix_1.7-0      
## [45] jsonlite_1.8.8     hms_1.1.3          visdat_0.6.0       Formula_1.2-5     
## [49] htmlTable_2.4.3    jquerylib_0.1.4    glue_1.7.0         ggstats_0.7.0     
## [53] stringi_1.8.4      gtable_0.3.5       munsell_0.5.1      pillar_1.9.0      
## [57] htmltools_0.5.8.1  R6_2.5.1           lattice_0.22-6     evaluate_1.0.0    
## [61] highr_0.11         backports_1.5.0    bslib_0.8.0        Rcpp_1.0.13       
## [65] zip_2.3.1          nlme_3.1-164       gridExtra_2.3      checkmate_2.3.2   
## [69] mgcv_1.9-1         xfun_0.47          pkgconfig_2.0.3

Biostatistics_Exam

2024-12-19

R Markdown