ob <- read.csv("C:\\Users\\DELL\\Downloads\\tap huan khoa hoc\\Obesity data.csv")
head(ob)
## id gender height weight bmi age WBBMC wbbmd fat lean pcfat hypertension
## 1 1 F 150 49 21.8 53 1312 0.88 17802 28600 37.3 0
## 2 2 M 165 52 19.1 65 1309 0.84 8381 40229 16.8 1
## 3 3 F 157 57 23.1 64 1230 0.84 19221 36057 34.0 1
## 4 4 F 156 53 21.8 56 1171 0.80 17472 33094 33.8 1
## 5 5 M 160 51 19.9 54 1681 0.98 7336 40621 14.8 0
## 6 6 F 153 47 20.1 52 1358 0.91 14904 30068 32.2 1
## diabetes
## 1 1
## 2 0
## 3 0
## 4 0
## 5 0
## 6 0
summary(ob[c("age", "weight", "height", "pcfat")])
## age weight height pcfat
## Min. :13.00 Min. :34.00 Min. :136.0 Min. : 9.2
## 1st Qu.:35.00 1st Qu.:49.00 1st Qu.:151.0 1st Qu.:27.0
## Median :48.00 Median :54.00 Median :155.0 Median :32.4
## Mean :47.15 Mean :55.14 Mean :156.7 Mean :31.6
## 3rd Qu.:58.00 3rd Qu.:61.00 3rd Qu.:162.0 3rd Qu.:36.8
## Max. :88.00 Max. :95.00 Max. :185.0 Max. :48.4
table(ob$hypertension)
table(ob$diabetes)
quantile(ob$age, probs = c(0.25, 0.5, 0.75))
## 25% 50% 75%
## 35 48 58
quantile(ob$weight, probs = c(0.25, 0.5, 0.75))
## 25% 50% 75%
## 49 54 61
quantile(ob$height, probs = c(0.25, 0.5, 0.75))
## 25% 50% 75%
## 151 155 162
quantile(ob$pcfat, probs = c(0.25, 0.5, 0.75))
## 25% 50% 75%
## 27.0 32.4 36.8
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the plot. ##
Mô tả đặc điểm tuổi, cân nặng theo giới tính
aggregate(cbind(age, weight, height, pcfat) ~ gender, data = ob, FUN = mean)
## gender age weight height pcfat
## 1 F 48.57309 52.31090 153.2912 34.67241
## 2 M 43.70141 62.02254 165.0592 24.15607
aggregate(cbind(hypertension, diabetes) ~ gender, data = ob, FUN = mean)
## gender hypertension diabetes
## 1 F 0.5011601 0.11832947
## 2 M 0.5211268 0.09295775
t.test(age ~ gender, data = ob)
##
## Welch Two Sample t-test
##
## data: age by gender
## t = 4.2608, df = 587.49, p-value = 2.372e-05
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
## 2.626083 7.117272
## sample estimates:
## mean in group F mean in group M
## 48.57309 43.70141
t.test(weight ~ gender, data = ob)
##
## Welch Two Sample t-test
##
## data: weight by gender
## t = -16.952, df = 551.85, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
## -10.836942 -8.586319
## sample estimates:
## mean in group F mean in group M
## 52.31090 62.02254
t.test(height ~ gender, data = ob)
##
## Welch Two Sample t-test
##
## data: height by gender
## t = -29.125, df = 562.31, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
## -12.56161 -10.97433
## sample estimates:
## mean in group F mean in group M
## 153.2912 165.0592
t.test(pcfat ~ gender, data = ob)
##
## Welch Two Sample t-test
##
## data: pcfat by gender
## t = 29.768, df = 602.01, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
## 9.822548 11.210140
## sample estimates:
## mean in group F mean in group M
## 34.67241 24.15607
chisq.test(table(ob$gender, ob$hypertension))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table(ob$gender, ob$hypertension)
## X-squared = 0.32515, df = 1, p-value = 0.5685
chisq.test(table(ob$gender, ob$diabetes))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table(ob$gender, ob$diabetes)
## X-squared = 1.394, df = 1, p-value = 0.2377
Nhom_A <- c(14, 4, 10, 6, 3, 11, 12)
Nhom_B <- c(16, 17, 13, 12, 7, 16, 11, 8, 7)
shapiro.test(Nhom_A)
##
## Shapiro-Wilk normality test
##
## data: Nhom_A
## W = 0.92541, p-value = 0.5126
shapiro.test(Nhom_B)
##
## Shapiro-Wilk normality test
##
## data: Nhom_B
## W = 0.89641, p-value = 0.2319
summary(Nhom_A)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.000 5.000 10.000 8.571 11.500 14.000
summary(Nhom_B)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7.00 8.00 12.00 11.89 16.00 17.00
t.test(Nhom_A, Nhom_B)
##
## Welch Two Sample t-test
##
## data: Nhom_A and Nhom_B
## t = -1.6, df = 12.554, p-value = 0.1345
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -7.813114 1.178194
## sample estimates:
## mean of x mean of y
## 8.571429 11.888889
set.seed(123)
bootstrap_diff_mean <- replicate(1000, {
sample_A <- sample(Nhom_A, replace = TRUE)
sample_B <- sample(Nhom_B, replace = TRUE)
mean(sample_A) - mean(sample_B)
})
quantile(bootstrap_diff_mean, c(0.025, 0.5, 0.975))
## 2.5% 50% 97.5%
## -7.0952381 -3.3174603 0.4924603
hist(bootstrap_diff_mean, main = "Bootstrap Difference in Means", xlab = "Mean Difference")
set.seed(123)
bootstrap_diff_median <- replicate(1000, {
sample_A <- sample(Nhom_A, replace = TRUE)
sample_B <- sample(Nhom_B, replace = TRUE)
median(sample_A) - median(sample_B)
})
quantile(bootstrap_diff_median, c(0.025, 0.5, 0.975))
## 2.5% 50% 97.5%
## -10 -2 4
hist(bootstrap_diff_median, main = "Bootstrap Difference in Medians", xlab = "Median Difference")