DAY 3-BAI TAP-FINAL

1.Đọc dữ liệu vào R

bw=read.csv("D:\\R\\DU LIEU THUC HANH TS ThACH GUI\\birthwt.csv")

##2.Phân tích mô tả ###2.1 Mô tả đặc điểm tuổi mẹ, cân nặng mẹ và cân nặng con

library(table1)

## 
## Attaching package: 'table1'

## The following objects are masked from 'package:base':
## 
##     units, units<-

table1(~ age + lwt + bwt, data = bw)

	Overall (N=189)
age
Mean (SD)	23.2 (5.30)
Median [Min, Max]	23.0 [14.0, 45.0]
lwt
Mean (SD)	130 (30.6)
Median [Min, Max]	121 [80.0, 250]
bwt
Mean (SD)	2940 (729)
Median [Min, Max]	2980 [709, 4990]

library(lessR)

## 
## lessR 4.4.3                         feedback: gerbing@pdx.edu 
## --------------------------------------------------------------
## > d <- Read("")  Read data file, many formats available, e.g., Excel
##   d is default data frame, data= in analysis routines optional
## 
## Many examples of reading, writing, and manipulating data, 
## graphics, testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation from pivot tables
##   Enter: browseVignettes("lessR")
## 
## View lessR updates, now including time series forecasting
##   Enter: news(package="lessR")
## 
## Interactive data analysis
##   Enter: interact()

## 
## Attaching package: 'lessR'

## The following object is masked from 'package:table1':
## 
##     label

## The following object is masked from 'package:base':
## 
##     sort_by

head(bw)

##   id low age lwt race smoke ptl ht ui ftv  bwt
## 1 85   0  19 182    2     0   0  0  1   0 2523
## 2 86   0  33 155    3     0   0  0  0   3 2551
## 3 87   0  20 105    1     1   0  0  0   1 2557
## 4 88   0  21 108    1     1   0  0  1   2 2594
## 5 89   0  18 107    1     1   0  0  1   0 2600
## 6 91   0  21 124    3     0   0  0  0   0 2622

bw2 <- bw
table1(~ age + lwt + bwt, data = bw2)

	Overall (N=189)
age
Mean (SD)	23.2 (5.30)
Median [Min, Max]	23.0 [14.0, 45.0]
lwt
Mean (SD)	130 (30.6)
Median [Min, Max]	121 [80.0, 250]
bwt
Mean (SD)	2940 (729)
Median [Min, Max]	2980 [709, 4990]

###2.2 Mô tả đặc điểm tuổi mẹ

bw$race.c = as.character(factor(bw$race, levels = c(1, 2, 3), labels = c("White", "Black", "Other")))
bw$smoke.c = as.character(factor(bw$smoke, levels = c(0, 1), labels = c("No", "Yes")))
bw$low.c = as.character(factor(bw$low, levels = c(0, 1), labels = c("No LBW", "Low birthweight")))
table1(~ age + lwt + smoke.c + race.c + bwt | low.c, data = bw)

	Low birthweight (N=59)	No LBW (N=130)	Overall (N=189)
age
Mean (SD)	22.3 (4.51)	23.7 (5.58)	23.2 (5.30)
Median [Min, Max]	22.0 [14.0, 34.0]	23.0 [14.0, 45.0]	23.0 [14.0, 45.0]
lwt
Mean (SD)	122 (26.6)	133 (31.7)	130 (30.6)
Median [Min, Max]	120 [80.0, 200]	124 [85.0, 250]	121 [80.0, 250]
smoke.c
No	29 (49.2%)	86 (66.2%)	115 (60.8%)
Yes	30 (50.8%)	44 (33.8%)	74 (39.2%)
race.c
Black	11 (18.6%)	15 (11.5%)	26 (13.8%)
Other	25 (42.4%)	42 (32.3%)	67 (35.4%)
White	23 (39.0%)	73 (56.2%)	96 (50.8%)
bwt
Mean (SD)	2100 (391)	3330 (478)	2940 (729)
Median [Min, Max]	2210 [709, 2500]	3270 [2520, 4990]	2980 [709, 4990]

###2.2 GPT

bw2$low <- factor(bw2$low, levels = c(0, 1), labels = c("Normal weight", "Low birth weight"))
bw2$smoke <- factor(bw2$smoke, levels = c(0, 1), labels = c("Non-smoker", "Smoker"))
bw2$race <- factor(bw2$race, levels = c(1, 2, 3), labels = c("White", "Black", "Other"))
table1(~ age + lwt + smoke + race + bwt | low, data = bw2)

	Normal weight (N=130)	Low birth weight (N=59)	Overall (N=189)
age
Mean (SD)	23.7 (5.58)	22.3 (4.51)	23.2 (5.30)
Median [Min, Max]	23.0 [14.0, 45.0]	22.0 [14.0, 34.0]	23.0 [14.0, 45.0]
lwt
Mean (SD)	133 (31.7)	122 (26.6)	130 (30.6)
Median [Min, Max]	124 [85.0, 250]	120 [80.0, 200]	121 [80.0, 250]
smoke
Non-smoker	86 (66.2%)	29 (49.2%)	115 (60.8%)
Smoker	44 (33.8%)	30 (50.8%)	74 (39.2%)
race
White	73 (56.2%)	23 (39.0%)	96 (50.8%)
Black	15 (11.5%)	11 (18.6%)	26 (13.8%)
Other	42 (32.3%)	25 (42.4%)	67 (35.4%)
bwt
Mean (SD)	3330 (478)	2100 (391)	2940 (729)
Median [Min, Max]	3270 [2520, 4990]	2210 [709, 2500]	2980 [709, 4990]

##3 So sánh mật độ xương giữa nam và nữ ###3.13.1 Đọc dữ liệu vào R-file Đưa file lên file.choose ( Bone data)

df=read.csv("D:\\R\\DU LIEU THUC HANH TS ThACH GUI\\Bone data.csv")

###3.2Vẽ histogram đánh giá phân bố mật độ xương- vẽ chart

library(lessR)
Histogram(fnbmd, fill = "blue", xlab = "Mật độ xương (g/cm2)", ylab = "Frequency",  data = df)

## >>> Suggestions 
## bin_width: set the width of each bin 
## bin_start: set the start of the first bin 
## bin_end: set the end of the last bin 
## Histogram(fnbmd, density=TRUE)  # smoothed curve + histogram 
## Plot(fnbmd)  # Violin/Box/Scatterplot (VBS) plot 
## 
## --- fnbmd --- 
##  
##        n    miss      mean        sd       min       mdn       max 
##      2122      40     0.829     0.155     0.280     0.820     1.510 
##  
## 
##   
## --- Outliers ---     from the box plot: 33 
##  
## Small      Large 
## -----      ----- 
##  0.3      1.5 
##  0.3      1.5 
##  0.4      1.4 
##  0.4      1.4 
##  0.4      1.4 
##  0.4      1.4 
##  0.4      1.4 
##  0.4      1.4 
##  0.4      1.3 
##  0.4      1.3 
##  0.4      1.3 
##           1.3 
##           1.3 
##           1.3 
##           1.3 
##           1.2 
##           1.2 
##           1.2 
## 
## + 15 more outliers 
## 
## 
## Bin Width: 0.1 
## Number of Bins: 14 
##  
##        Bin  Midpnt  Count    Prop  Cumul.c  Cumul.p 
## --------------------------------------------------- 
##  0.2 > 0.3    0.25      1    0.00        1     0.00 
##  0.3 > 0.4    0.35      9    0.00       10     0.00 
##  0.4 > 0.5    0.45     15    0.01       25     0.01 
##  0.5 > 0.6    0.55    103    0.05      128     0.06 
##  0.6 > 0.7    0.65    306    0.14      434     0.20 
##  0.7 > 0.8    0.75    522    0.24      956     0.44 
##  0.8 > 0.9    0.85    534    0.25     1490     0.69 
##  0.9 > 1.0    0.95    371    0.17     1861     0.86 
##  1.0 > 1.1    1.05    183    0.08     2044     0.95 
##  1.1 > 1.2    1.15     48    0.02     2092     0.97 
##  1.2 > 1.3    1.25     21    0.01     2113     0.98 
##  1.3 > 1.4    1.35      6    0.00     2119     0.98 
##  1.4 > 1.5    1.45      2    0.00     2121     0.98 
##  1.5 > 1.6    1.55      1    0.00     2122     0.98 
##

###3.3 So sánh mật độ xương cổ xương đùi giữa nam và nữ

library(table1)
table1(~ fnbmd | sex, data = df)

	Female (N=1317)	Male (N=845)	Overall (N=2162)
fnbmd
Mean (SD)	0.778 (0.132)	0.910 (0.153)	0.829 (0.155)
Median [Min, Max]	0.770 [0.280, 1.31]	0.900 [0.340, 1.51]	0.820 [0.280, 1.51]
Missing	17 (1.3%)	23 (2.7%)	40 (1.9%)

t.test(fnbmd ~ sex, data = df)

## 
##  Welch Two Sample t-test
## 
## data:  fnbmd by sex
## t = -20.407, df = 1561, p-value < 0.00000000000000022
## alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
## 95 percent confidence interval:
##  -0.1448770 -0.1194686
## sample estimates:
## mean in group Female   mean in group Male 
##            0.7775231            0.9096959

library(lessR)
ttest(fnbmd ~ sex, data = df)

## 
## Compare fnbmd across sex with levels Male and Female 
## Grouping Variable:  sex
## Response Variable:  fnbmd
## 
## 
## ------ Describe ------
## 
## fnbmd for sex Male:  n.miss = 23,  n = 822,  mean = 0.910,  sd = 0.153
## fnbmd for sex Female:  n.miss = 17,  n = 1300,  mean = 0.778,  sd = 0.132
## 
## Mean Difference of fnbmd:  0.132
## 
## Weighted Average Standard Deviation:   0.141 
## 
## 
## ------ Assumptions ------
## 
## Note: These hypothesis tests can perform poorly, and the 
##       t-test is typically robust to violations of assumptions. 
##       Use as heuristic guides instead of interpreting literally. 
## 
## Null hypothesis, for each group, is a normal distribution of fnbmd.
## Group Male: Sample mean assumed normal because n > 30, so no test needed.
## Group Female: Sample mean assumed normal because n > 30, so no test needed.
## 
## Null hypothesis is equal variances of fnbmd, homogeneous.
## Variance Ratio test:  F = 0.023/0.018 = 1.336,  df = 821;1299,  p-value = 0.000
## Levene's test, Brown-Forsythe:  t = 3.449,  df = 2120,  p-value = 0.001
## 
## 
## ------ Infer ------
## 
## --- Assume equal population variances of fnbmd for each sex 
## 
## t-cutoff for 95% range of variation: tcut =  1.961 
## Standard Error of Mean Difference: SE =  0.006 
## 
## Hypothesis Test of 0 Mean Diff:  t-value = 21.080,  df = 2120,  p-value = 0.000
## 
## Margin of Error for 95% Confidence Level:  0.012
## 95% Confidence Interval for Mean Difference:  0.120 to 0.144
## 
## 
## --- Do not assume equal population variances of fnbmd for each sex 
## 
## t-cutoff: tcut =  1.961 
## Standard Error of Mean Difference: SE =  0.006 
## 
## Hypothesis Test of 0 Mean Diff:  t = 20.407,  df = 1560.981, p-value = 0.000
## 
## Margin of Error for 95% Confidence Level:  0.013
## 95% Confidence Interval for Mean Difference:  0.119 to 0.145
## 
## 
## ------ Effect Size ------
## 
## --- Assume equal population variances of fnbmd for each sex 
## 
## Standardized Mean Difference of fnbmd, Cohen's d:  0.939
## 
## 
## ------ Practical Importance ------
## 
## Minimum Mean Difference of practical importance: mmd
## Minimum Standardized Mean Difference of practical importance: msmd
## Neither value specified, so no analysis
## 
## 
## ------ Graphics Smoothing Parameter ------
## 
## Density bandwidth for sex Male: 0.044
## Density bandwidth for sex Female: 0.034

###3.4 GPT- prompt 12

table1(~ fnbmd | sex, data = df)

	Female (N=1317)	Male (N=845)	Overall (N=2162)
fnbmd
Mean (SD)	0.778 (0.132)	0.910 (0.153)	0.829 (0.155)
Median [Min, Max]	0.770 [0.280, 1.31]	0.900 [0.340, 1.51]	0.820 [0.280, 1.51]
Missing	17 (1.3%)	23 (2.7%)	40 (1.9%)

t_test_result <- t.test(fnbmd ~ sex, data = df)
print(t_test_result)

## 
##  Welch Two Sample t-test
## 
## data:  fnbmd by sex
## t = -20.407, df = 1561, p-value < 0.00000000000000022
## alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
## 95 percent confidence interval:
##  -0.1448770 -0.1194686
## sample estimates:
## mean in group Female   mean in group Male 
##            0.7775231            0.9096959

###3.4 GPT- prompt 3

ttest(fnbmd ~ sex, data = df, descriptives = TRUE)

## 
## Compare fnbmd across sex with levels Male and Female 
## Grouping Variable:  sex
## Response Variable:  fnbmd
## 
## 
## ------ Describe ------
## 
## fnbmd for sex Male:  n.miss = 23,  n = 822,  mean = 0.910,  sd = 0.153
## fnbmd for sex Female:  n.miss = 17,  n = 1300,  mean = 0.778,  sd = 0.132
## 
## Mean Difference of fnbmd:  0.132
## 
## Weighted Average Standard Deviation:   0.141 
## 
## 
## ------ Assumptions ------
## 
## Note: These hypothesis tests can perform poorly, and the 
##       t-test is typically robust to violations of assumptions. 
##       Use as heuristic guides instead of interpreting literally. 
## 
## Null hypothesis, for each group, is a normal distribution of fnbmd.
## Group Male: Sample mean assumed normal because n > 30, so no test needed.
## Group Female: Sample mean assumed normal because n > 30, so no test needed.
## 
## Null hypothesis is equal variances of fnbmd, homogeneous.
## Variance Ratio test:  F = 0.023/0.018 = 1.336,  df = 821;1299,  p-value = 0.000
## Levene's test, Brown-Forsythe:  t = 3.449,  df = 2120,  p-value = 0.001
## 
## 
## ------ Infer ------
## 
## --- Assume equal population variances of fnbmd for each sex 
## 
## t-cutoff for 95% range of variation: tcut =  1.961 
## Standard Error of Mean Difference: SE =  0.006 
## 
## Hypothesis Test of 0 Mean Diff:  t-value = 21.080,  df = 2120,  p-value = 0.000
## 
## Margin of Error for 95% Confidence Level:  0.012
## 95% Confidence Interval for Mean Difference:  0.120 to 0.144
## 
## 
## --- Do not assume equal population variances of fnbmd for each sex 
## 
## t-cutoff: tcut =  1.961 
## Standard Error of Mean Difference: SE =  0.006 
## 
## Hypothesis Test of 0 Mean Diff:  t = 20.407,  df = 1560.981, p-value = 0.000
## 
## Margin of Error for 95% Confidence Level:  0.013
## 95% Confidence Interval for Mean Difference:  0.119 to 0.145
## 
## 
## ------ Effect Size ------
## 
## --- Assume equal population variances of fnbmd for each sex 
## 
## Standardized Mean Difference of fnbmd, Cohen's d:  0.939
## 
## 
## ------ Practical Importance ------
## 
## Minimum Mean Difference of practical importance: mmd
## Minimum Standardized Mean Difference of practical importance: msmd
## Neither value specified, so no analysis
## 
## 
## ------ Graphics Smoothing Parameter ------
## 
## Density bandwidth for sex Male: 0.044
## Density bandwidth for sex Female: 0.034

##4Đánh giá ảnh hưởng của cafe lên RER ###4.1 Nhập nhanh dữ liệu RER

placebo = c(105, 119, 100, 97, 96, 101, 94, 95, 98)
coffee = c(96, 99, 94, 89, 96, 93, 88, 105, 88)

###4.2 Đánh giá ảnh hưởng của cafe lên RER bằng kiểm định t

t.test(placebo, coffee)

## 
##  Welch Two Sample t-test
## 
## data:  placebo and coffee
## t = 1.9948, df = 14.624, p-value = 0.06505
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.4490961 13.1157627
## sample estimates:
## mean of x mean of y 
## 100.55556  94.22222

###4.3 Đánh giá ảnh hưởng của cafe lên RER bằng bootstrap test

library(simpleboot)

## Simple Bootstrap Routines (1.1-8)

library(boot)
b = two.boot(placebo, coffee, mean, R = 500)
boot.ci(b)

## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 500 bootstrap replicates
## 
## CALL : 
## boot.ci(boot.out = b)
## 
## Intervals : 
## Level      Normal              Basic         
## 95%   ( 0.525, 12.016 )   (-0.163, 11.607 )  
## 
## Level     Percentile            BCa          
## 95%   ( 1.059, 12.830 )   ( 1.170, 12.936 )  
## Calculations and Intervals on Original Scale

hist(b, breaks = 50)

###4.4 GPT

DAY 3-BAI TAP-FINAL

NGUYEN VAN PHUC

2025-05-12

1.Đọc dữ liệu vào R