COD_week7_1_MGK_BTE3207

Minsik Kim

2023-10-09

Before begin..

Let’s load the SBP dataset.

dataset_sbp <- read.csv(file = "/Users/minsikkim/Dropbox (Personal)/Inha/5_Lectures/Advanced biostatistics/scripts/BTE3207_Advanced_Biostatistics/dataset/sbp_dataset_korea_2013-2014.csv")

head(dataset_sbp)
##   SEX BTH_G SBP DBP FBS DIS  BMI
## 1   1     1 116  78  94   4 16.6
## 2   1     1 100  60  79   4 22.3
## 3   1     1 100  60  87   4 21.9
## 4   1     1 111  70  72   4 20.2
## 5   1     1 120  80  98   4 20.0
## 6   1     1 115  79  95   4 23.1

Making a new variable hypertension

dataset_sbp$hypertension <- ifelse(dataset_sbp$SBP > 130 | dataset_sbp$DBP > 80,
                                   T,
                                   F)

norm functions

rnorm() - random sampling

rnorm(5)
## [1]  0.96893326  0.79296110  0.49994485 -0.05475673  1.29812150

pnorm() - probability of given z-score (z-score table results)

pnorm(2)
## [1] 0.9772499

qnorm() - gets z-score of given probability

qnorm(0.97724985)
## [1] 2

pnorm() and qnorm() are reverse!

pnorm(qnorm(0))
## [1] 0

dnorm() - probability density function, the height of normal curve.

dnorm(0)
## [1] 0.3989423
dnorm(1)
## [1] 0.2419707
dnorm(-1)
## [1] 0.2419707

Ploting of dnorm result

plot(dnorm(seq(-4, 4, by = 0.5))~seq(-4, 4, by = 0.5), 
     ylab = "dnorm() result",
     xlab = "dnorm() input",)

Ploting of normal curve

#Create a sequence of 100 equally spaced numbers between -4 and 4
x <- seq(-4, 4, length=100)

#create a vector of values that shows the height of the probability distribution
#for each value in x
y <- dnorm(x)

#plot x and y as a scatterplot with connected lines (type = "l") and add
#an x-axis with custom labels
plot(x,y, type = "l", lwd = 2, axes = FALSE, xlab = "", ylab = "") +
        axis(1, at = -3:3, labels = c("-3s", "-2s", "-1s", "mean", "1s", "2s", "3s")) +
        abline(v = 2, col = "red") +
        abline(v = -2, col = "red")

## numeric(0)

Statistical tests

t-test

getting confidence interval of some data

result <- t.test(dataset_sbp$SBP) # Extract the confidence interval 

result$conf.int
## [1] 121.8432 121.9003
## attr(,"conf.level")
## [1] 0.95

comparing groups with t-test

result <- t.test(SBP ~ SEX, data = dataset_sbp) # Extract the confidence interval 

result
## 
##  Welch Two Sample t-test
## 
## data:  SBP by SEX
## t = 170.86, df = 977496, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
##  4.860551 4.973355
## sample estimates:
## mean in group 1 mean in group 2 
##         124.280         119.363

z.test()

one-sample z-test

install.packages("BSDA")
## 
## The downloaded binary packages are in
##  /var/folders/qj/y615pkf53ms_hcxg1f9zjkyc0000gp/T//RtmpZUdYnj/downloaded_packages
library(BSDA)


result <- z.test(dataset_sbp$hypertension, 
       sigma.x = mean(dataset_sbp$hypertension) * (1-mean(dataset_sbp$hypertension)))


result
## 
##  One-sample z-Test
## 
## data:  dataset_sbp$hypertension
## z = 1463.7, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.3163788 0.3172272
## sample estimates:
## mean of x 
##  0.316803

Hypertention table

table(dataset_sbp$SEX, dataset_sbp$hypertension)
##    
##      FALSE   TRUE
##   1 322199 188028
##   2 360998 128775

two-sample z-test

#Maknig subsets of data
dataset_male <- dataset_sbp %>% subset(., .$SEX == 1)
dataset_female <-  dataset_sbp %>% subset(., .$SEX == 2)

result <- z.test(x = dataset_male$hypertension,
                 y = dataset_female$hypertension,
                 sigma.x = mean(dataset_male$hypertension) * (1-mean(dataset_male$hypertension)),
                 sigma.y = mean(dataset_female$hypertension) * (1-mean(dataset_female$hypertension))
                 )


result
## 
##  Two-sample z-Test
## 
## data:  dataset_male$hypertension and dataset_female$hypertension
## z = 246.95, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.1047524 0.1064284
## sample estimates:
## mean of x mean of y 
## 0.3685183 0.2629279

Chi-square test

Chi-square test for two groups

result <- chisq.test(x = dataset_sbp$hypertension,
           y = dataset_sbp$SEX)

result
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  dataset_sbp$hypertension and dataset_sbp$SEX
## X-squared = 12872, df = 1, p-value < 2.2e-16

p-value is the same as z-score test

Chi-square test for multiple groups

table(dataset_sbp$SEX, dataset_sbp$DIS)
##    
##          1      2      3      4
##   1  27979  79892  23900 378456
##   2  25419  82934  19214 362206
result <- chisq.test(x = dataset_sbp$DIS,
           y = dataset_sbp$SEX)

result
## 
##  Pearson's Chi-squared test
## 
## data:  dataset_sbp$DIS and dataset_sbp$SEX
## X-squared = 627.3, df = 3, p-value < 2.2e-16

Exact test

#Author DataFlair

data_frame <- read.csv("https://goo.gl/j6lRXD")

#Reading CSV
data <- table(data_frame$treatment, data_frame$improvement)


fisher.test(data)
## 
##  Fisher's Exact Test for Count Data
## 
## data:  data
## p-value = 0.02889
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.1582959 0.9212947
## sample estimates:
## odds ratio 
##  0.3878601
#Author DataFlair

fisher.test(table(dataset_sbp$SEX,
                  dataset_sbp$hypertension))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(dataset_sbp$SEX, dataset_sbp$hypertension)
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.6060831 0.6165366
## sample estimates:
## odds ratio 
##   0.611277

As sample number is high, Fisher’s exact test results are having the same result as Chi-square / z-test.

Confidence interval calculations, statistical tests

epitools pacakge automates all the calculations and tests!

install.packages("epitools")
## 
## The downloaded binary packages are in
##  /var/folders/qj/y615pkf53ms_hcxg1f9zjkyc0000gp/T//RtmpZUdYnj/downloaded_packages
library(epitools)


#Confidence interval of RR
riskratio.wald(data)
## $data
##              
##               improved not-improved Total
##   not-treated       26           29    55
##   treated           35           15    50
##   Total             61           44   105
## 
## $measure
##              risk ratio with 95% C.I.
##                estimate     lower    upper
##   not-treated 1.0000000        NA       NA
##   treated     0.5689655 0.3479293 0.930424
## 
## $p.value
##              two-sided
##               midp.exact fisher.exact chi.square
##   not-treated         NA           NA         NA
##   treated     0.02013718   0.02888541 0.01840777
## 
## $correction
## [1] FALSE
## 
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"
#Confidence interval of RR
oddsratio.wald(data)
## $data
##              
##               improved not-improved Total
##   not-treated       26           29    55
##   treated           35           15    50
##   Total             61           44   105
## 
## $measure
##              odds ratio with 95% C.I.
##                estimate     lower     upper
##   not-treated 1.0000000        NA        NA
##   treated     0.3842365 0.1719968 0.8583743
## 
## $p.value
##              two-sided
##               midp.exact fisher.exact chi.square
##   not-treated         NA           NA         NA
##   treated     0.02013718   0.02888541 0.01840777
## 
## $correction
## [1] FALSE
## 
## attr(,"method")
## [1] "Unconditional MLE & normal approximation (Wald) CI"

Bibliography

## Computing. R Foundation for Statistical Computing, Vienna, Austria. <https://www.R-project.org/>. We have invested a lot of time and effort in creating R, please cite it when using it for data analysis. See also 'citation("pkgname")' for citing R packages.
## Grolemund G, Hayes A, Henry L, Hester J, Kuhn M, Pedersen TL, Miller E, Bache SM, Müller K, Ooms J, Robinson D, Seidel DP, Spinu V, Takahashi K, Vaughan D, Wilke C, Woo K, Yutani H (2019). "Welcome to the tidyverse." Journal of Open Source Software_, *4*(43), 1686. doi:10.21105/joss.01686 <https://doi.org/10.21105/joss.01686>.
## R. version 0.5.0. Buffalo, New York. http://github.com/trinker/pacman
## J, reikoch, Beasley W, O'Connor B, Warnes GR, Quinn M, Kamvar ZN (2023). yaml: Methods to Convert R Data to YAML and Back_. R package version 2.3.7, <https://CRAN.R-project.org/package=yaml>. ATTENTION: This citation information has been auto-generated from the package DESCRIPTION file and may need manual editing, see 'help("citation")'.