bw=read.csv("D:/Learning/CME/R Statistic 2025/DỮ LIỆU THỰC HÀNH (TS Thạch gửi)/birthwt.csv")
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1(~age+lwt+bwt,data=bw)
| Overall (N=189) |
|
|---|---|
| age | |
| Mean (SD) | 23.2 (5.30) |
| Median [Min, Max] | 23.0 [14.0, 45.0] |
| lwt | |
| Mean (SD) | 130 (30.6) |
| Median [Min, Max] | 121 [80.0, 250] |
| bwt | |
| Mean (SD) | 2940 (729) |
| Median [Min, Max] | 2980 [709, 4990] |
table1(~age+lwt+smoke+race+bwt|low,data=bw)
## Warning in table1.formula(~age + lwt + smoke + race + bwt | low, data = bw):
## Terms to the right of '|' in formula 'x' define table columns and are expected
## to be factors with meaningful labels.
| 0 (N=130) |
1 (N=59) |
Overall (N=189) |
|
|---|---|---|---|
| age | |||
| Mean (SD) | 23.7 (5.58) | 22.3 (4.51) | 23.2 (5.30) |
| Median [Min, Max] | 23.0 [14.0, 45.0] | 22.0 [14.0, 34.0] | 23.0 [14.0, 45.0] |
| lwt | |||
| Mean (SD) | 133 (31.7) | 122 (26.6) | 130 (30.6) |
| Median [Min, Max] | 124 [85.0, 250] | 120 [80.0, 200] | 121 [80.0, 250] |
| smoke | |||
| Mean (SD) | 0.338 (0.475) | 0.508 (0.504) | 0.392 (0.489) |
| Median [Min, Max] | 0 [0, 1.00] | 1.00 [0, 1.00] | 0 [0, 1.00] |
| race | |||
| Mean (SD) | 1.76 (0.913) | 2.03 (0.909) | 1.85 (0.918) |
| Median [Min, Max] | 1.00 [1.00, 3.00] | 2.00 [1.00, 3.00] | 1.00 [1.00, 3.00] |
| bwt | |||
| Mean (SD) | 3330 (478) | 2100 (391) | 2940 (729) |
| Median [Min, Max] | 3270 [2520, 4990] | 2210 [709, 2500] | 2980 [709, 4990] |
table1(~age+lwt+as.factor(smoke)+as.factor(race)+bwt|low,data=bw)
## Warning in table1.formula(~age + lwt + as.factor(smoke) + as.factor(race) + :
## Terms to the right of '|' in formula 'x' define table columns and are expected
## to be factors with meaningful labels.
| 0 (N=130) |
1 (N=59) |
Overall (N=189) |
|
|---|---|---|---|
| age | |||
| Mean (SD) | 23.7 (5.58) | 22.3 (4.51) | 23.2 (5.30) |
| Median [Min, Max] | 23.0 [14.0, 45.0] | 22.0 [14.0, 34.0] | 23.0 [14.0, 45.0] |
| lwt | |||
| Mean (SD) | 133 (31.7) | 122 (26.6) | 130 (30.6) |
| Median [Min, Max] | 124 [85.0, 250] | 120 [80.0, 200] | 121 [80.0, 250] |
| as.factor(smoke) | |||
| 0 | 86 (66.2%) | 29 (49.2%) | 115 (60.8%) |
| 1 | 44 (33.8%) | 30 (50.8%) | 74 (39.2%) |
| as.factor(race) | |||
| 1 | 73 (56.2%) | 23 (39.0%) | 96 (50.8%) |
| 2 | 15 (11.5%) | 11 (18.6%) | 26 (13.8%) |
| 3 | 42 (32.3%) | 25 (42.4%) | 67 (35.4%) |
| bwt | |||
| Mean (SD) | 3330 (478) | 2100 (391) | 2940 (729) |
| Median [Min, Max] | 3270 [2520, 4990] | 2210 [709, 2500] | 2980 [709, 4990] |
df=read.csv("D:/Learning/CME/R Statistic 2025/DỮ LIỆU THỰC HÀNH (TS Thạch gửi)/Bone data.csv")
library(lessR)
##
## lessR 4.4.3 feedback: gerbing@pdx.edu
## --------------------------------------------------------------
## > d <- Read("") Read data file, many formats available, e.g., Excel
## d is default data frame, data= in analysis routines optional
##
## Many examples of reading, writing, and manipulating data,
## graphics, testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation from pivot tables
## Enter: browseVignettes("lessR")
##
## View lessR updates, now including time series forecasting
## Enter: news(package="lessR")
##
## Interactive data analysis
## Enter: interact()
##
## Attaching package: 'lessR'
## The following object is masked from 'package:table1':
##
## label
## The following object is masked from 'package:base':
##
## sort_by
Histogram(data=df,fnbmd)
## >>> Suggestions
## bin_width: set the width of each bin
## bin_start: set the start of the first bin
## bin_end: set the end of the last bin
## Histogram(fnbmd, density=TRUE) # smoothed curve + histogram
## Plot(fnbmd) # Violin/Box/Scatterplot (VBS) plot
##
## --- fnbmd ---
##
## n miss mean sd min mdn max
## 2122 40 0.829 0.155 0.280 0.820 1.510
##
##
##
## --- Outliers --- from the box plot: 33
##
## Small Large
## ----- -----
## 0.3 1.5
## 0.3 1.5
## 0.4 1.4
## 0.4 1.4
## 0.4 1.4
## 0.4 1.4
## 0.4 1.4
## 0.4 1.4
## 0.4 1.3
## 0.4 1.3
## 0.4 1.3
## 1.3
## 1.3
## 1.3
## 1.3
## 1.2
## 1.2
## 1.2
##
## + 15 more outliers
##
##
## Bin Width: 0.1
## Number of Bins: 14
##
## Bin Midpnt Count Prop Cumul.c Cumul.p
## ---------------------------------------------------
## 0.2 > 0.3 0.25 1 0.00 1 0.00
## 0.3 > 0.4 0.35 9 0.00 10 0.00
## 0.4 > 0.5 0.45 15 0.01 25 0.01
## 0.5 > 0.6 0.55 103 0.05 128 0.06
## 0.6 > 0.7 0.65 306 0.14 434 0.20
## 0.7 > 0.8 0.75 522 0.24 956 0.44
## 0.8 > 0.9 0.85 534 0.25 1490 0.69
## 0.9 > 1.0 0.95 371 0.17 1861 0.86
## 1.0 > 1.1 1.05 183 0.08 2044 0.95
## 1.1 > 1.2 1.15 48 0.02 2092 0.97
## 1.2 > 1.3 1.25 21 0.01 2113 0.98
## 1.3 > 1.4 1.35 6 0.00 2119 0.98
## 1.4 > 1.5 1.45 2 0.00 2121 0.98
## 1.5 > 1.6 1.55 1 0.00 2122 0.98
##
ttest(fnbmd~sex,data=df)
##
## Compare fnbmd across sex with levels Male and Female
## Grouping Variable: sex
## Response Variable: fnbmd
##
##
## ------ Describe ------
##
## fnbmd for sex Male: n.miss = 23, n = 822, mean = 0.910, sd = 0.153
## fnbmd for sex Female: n.miss = 17, n = 1300, mean = 0.778, sd = 0.132
##
## Mean Difference of fnbmd: 0.132
##
## Weighted Average Standard Deviation: 0.141
##
##
## ------ Assumptions ------
##
## Note: These hypothesis tests can perform poorly, and the
## t-test is typically robust to violations of assumptions.
## Use as heuristic guides instead of interpreting literally.
##
## Null hypothesis, for each group, is a normal distribution of fnbmd.
## Group Male: Sample mean assumed normal because n > 30, so no test needed.
## Group Female: Sample mean assumed normal because n > 30, so no test needed.
##
## Null hypothesis is equal variances of fnbmd, homogeneous.
## Variance Ratio test: F = 0.023/0.018 = 1.336, df = 821;1299, p-value = 0.000
## Levene's test, Brown-Forsythe: t = 3.449, df = 2120, p-value = 0.001
##
##
## ------ Infer ------
##
## --- Assume equal population variances of fnbmd for each sex
##
## t-cutoff for 95% range of variation: tcut = 1.961
## Standard Error of Mean Difference: SE = 0.006
##
## Hypothesis Test of 0 Mean Diff: t-value = 21.080, df = 2120, p-value = 0.000
##
## Margin of Error for 95% Confidence Level: 0.012
## 95% Confidence Interval for Mean Difference: 0.120 to 0.144
##
##
## --- Do not assume equal population variances of fnbmd for each sex
##
## t-cutoff: tcut = 1.961
## Standard Error of Mean Difference: SE = 0.006
##
## Hypothesis Test of 0 Mean Diff: t = 20.407, df = 1560.981, p-value = 0.000
##
## Margin of Error for 95% Confidence Level: 0.013
## 95% Confidence Interval for Mean Difference: 0.119 to 0.145
##
##
## ------ Effect Size ------
##
## --- Assume equal population variances of fnbmd for each sex
##
## Standardized Mean Difference of fnbmd, Cohen's d: 0.939
##
##
## ------ Practical Importance ------
##
## Minimum Mean Difference of practical importance: mmd
## Minimum Standardized Mean Difference of practical importance: msmd
## Neither value specified, so no analysis
##
##
## ------ Graphics Smoothing Parameter ------
##
## Density bandwidth for sex Male: 0.044
## Density bandwidth for sex Female: 0.034
##So sánh 2 nhóm nam nữ
men=subset(df,sex=="Male")
ttest(fnbmd~smoking,data=men)
##
## Compare fnbmd across smoking with levels 0 and 1
## Grouping Variable: smoking
## Response Variable: fnbmd
##
##
## ------ Describe ------
##
## fnbmd for smoking 0: n.miss = 8, n = 312, mean = 0.932, sd = 0.151
## fnbmd for smoking 1: n.miss = 15, n = 510, mean = 0.896, sd = 0.153
##
## Mean Difference of fnbmd: 0.036
##
## Weighted Average Standard Deviation: 0.152
##
##
## ------ Assumptions ------
##
## Note: These hypothesis tests can perform poorly, and the
## t-test is typically robust to violations of assumptions.
## Use as heuristic guides instead of interpreting literally.
##
## Null hypothesis, for each group, is a normal distribution of fnbmd.
## Group 0: Sample mean assumed normal because n > 30, so no test needed.
## Group 1: Sample mean assumed normal because n > 30, so no test needed.
##
## Null hypothesis is equal variances of fnbmd, homogeneous.
## Variance Ratio test: F = 0.023/0.023 = 1.028, df = 509;311, p-value = 0.795
## Levene's test, Brown-Forsythe: t = -1.122, df = 820, p-value = 0.262
##
##
## ------ Infer ------
##
## --- Assume equal population variances of fnbmd for each smoking
##
## t-cutoff for 95% range of variation: tcut = 1.963
## Standard Error of Mean Difference: SE = 0.011
##
## Hypothesis Test of 0 Mean Diff: t-value = 3.316, df = 820, p-value = 0.001
##
## Margin of Error for 95% Confidence Level: 0.021
## 95% Confidence Interval for Mean Difference: 0.015 to 0.058
##
##
## --- Do not assume equal population variances of fnbmd for each smoking
##
## t-cutoff: tcut = 1.964
## Standard Error of Mean Difference: SE = 0.011
##
## Hypothesis Test of 0 Mean Diff: t = 3.327, df = 664.491, p-value = 0.001
##
## Margin of Error for 95% Confidence Level: 0.021
## 95% Confidence Interval for Mean Difference: 0.015 to 0.058
##
##
## ------ Effect Size ------
##
## --- Assume equal population variances of fnbmd for each smoking
##
## Standardized Mean Difference of fnbmd, Cohen's d: 0.238
##
##
## ------ Practical Importance ------
##
## Minimum Mean Difference of practical importance: mmd
## Minimum Standardized Mean Difference of practical importance: msmd
## Neither value specified, so no analysis
##
##
## ------ Graphics Smoothing Parameter ------
##
## Density bandwidth for smoking 0: 0.043
## Density bandwidth for smoking 1: 0.050
## Mô tả lại mật độ xương và smoking theo giới
table1(~fnbmd + as.factor(smoking)|sex,data=df)
| Female (N=1317) |
Male (N=845) |
Overall (N=2162) |
|
|---|---|---|---|
| fnbmd | |||
| Mean (SD) | 0.778 (0.132) | 0.910 (0.153) | 0.829 (0.155) |
| Median [Min, Max] | 0.770 [0.280, 1.31] | 0.900 [0.340, 1.51] | 0.820 [0.280, 1.51] |
| Missing | 17 (1.3%) | 23 (2.7%) | 40 (1.9%) |
| as.factor(smoking) | |||
| 0 | 923 (70.1%) | 320 (37.9%) | 1243 (57.5%) |
| 1 | 394 (29.9%) | 525 (62.1%) | 919 (42.5%) |
placebo=c(105, 119, 100, 97, 96, 101, 94, 95, 98)
coffee=c(96, 99, 94, 89, 96, 93, 88, 105, 88 )
library(boot); library(simpleboot)
## Simple Bootstrap Routines (1.1-8)
b = two.boot(placebo, coffee, mean, R = 500) #so sánh 2 biến bằng lệnh two.boot
boot.ci(b) #tính khoảng khác biệt giữa 2 biến,thường dùng kết quả của phân bố BCa: Bias correction analysis, nếu các phân bố khác như normal, basic hay percentile cùng hướng thì càng tốt
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 500 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = b)
##
## Intervals :
## Level Normal Basic
## 95% ( 0.523, 12.042 ) ( 0.444, 11.444 )
##
## Level Percentile BCa
## 95% ( 1.222, 12.222 ) ( 1.287, 12.271 )
## Calculations and Intervals on Original Scale
hist(b, breaks=50) #breaks có "s", trục hoành là sự khác nhau giữa 2 nhóm
median(b$t) #thể hiện giá trị median, "t" là nơi chứa giá trị chênh lệch
## [1] 6.166667
quantile(b$t,probs=c(0.25,0.5,0.75),na.rm=TRUE) #thể hiện giá trị Q1,median,Q3
## 25% 50% 75%
## 4.305556 6.166667 8.444444
mean(b$t) #thể hiện giá trị trung bình
## [1] 6.384222
table1(~ as.factor(fx) | sex, data = df)
| Female (N=1317) |
Male (N=845) |
Overall (N=2162) |
|
|---|---|---|---|
| as.factor(fx) | |||
| 0 | 916 (69.6%) | 701 (83.0%) | 1617 (74.8%) |
| 1 | 401 (30.4%) | 144 (17.0%) | 545 (25.2%) |
chisq.test(df$fx, df$sex)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: df$fx and df$sex
## X-squared = 48.363, df = 1, p-value = 0.000000000003542