ob <- read.csv("C:/Users/Admin/Desktop/TH phan tich du lieu/lethuytienDAU - day1/Obesity data.csv")
summary(ob)
## id gender height weight
## Min. : 1.0 Length:1217 Min. :136.0 Min. :34.00
## 1st Qu.: 309.0 Class :character 1st Qu.:151.0 1st Qu.:49.00
## Median : 615.0 Mode :character Median :155.0 Median :54.00
## Mean : 614.5 Mean :156.7 Mean :55.14
## 3rd Qu.: 921.0 3rd Qu.:162.0 3rd Qu.:61.00
## Max. :1227.0 Max. :185.0 Max. :95.00
## bmi age WBBMC wbbmd fat
## Min. :14.5 Min. :13.00 Min. : 695 Min. :0.650 Min. : 4277
## 1st Qu.:20.2 1st Qu.:35.00 1st Qu.:1481 1st Qu.:0.930 1st Qu.:13768
## Median :22.2 Median :48.00 Median :1707 Median :1.010 Median :16955
## Mean :22.4 Mean :47.15 Mean :1725 Mean :1.009 Mean :17288
## 3rd Qu.:24.3 3rd Qu.:58.00 3rd Qu.:1945 3rd Qu.:1.090 3rd Qu.:20325
## Max. :37.1 Max. :88.00 Max. :3040 Max. :1.350 Max. :40825
## lean pcfat hypertension diabetes
## Min. :19136 Min. : 9.2 Min. :0.000 Min. :0.0000
## 1st Qu.:30325 1st Qu.:27.0 1st Qu.:0.000 1st Qu.:0.0000
## Median :33577 Median :32.4 Median :1.000 Median :0.0000
## Mean :35463 Mean :31.6 Mean :0.507 Mean :0.1109
## 3rd Qu.:39761 3rd Qu.:36.8 3rd Qu.:1.000 3rd Qu.:0.0000
## Max. :63059 Max. :48.4 Max. :1.000 Max. :1.0000
table(ob$hypertension)
##
## 0 1
## 600 617
prop.table(table(ob$hypertension)) * 100
##
## 0 1
## 49.30156 50.69844
table(ob$diabetes)
##
## 0 1
## 1082 135
prop.table(table(ob$diabetes)) * 100
##
## 0 1
## 88.90715 11.09285
nam <- subset(ob, gender == "M")
nu <- subset(ob, gender == "F")
summary(nam)
## id gender height weight
## Min. : 2.0 Length:355 Min. :146.0 Min. :38.00
## 1st Qu.: 288.5 Class :character 1st Qu.:160.0 1st Qu.:55.00
## Median : 531.0 Mode :character Median :165.0 Median :62.00
## Mean : 558.0 Mean :165.1 Mean :62.02
## 3rd Qu.: 844.5 3rd Qu.:169.0 3rd Qu.:68.00
## Max. :1219.0 Max. :185.0 Max. :95.00
## bmi age WBBMC wbbmd fat
## Min. :14.50 Min. :13.0 Min. :1194 Min. :0.78 Min. : 4277
## 1st Qu.:20.80 1st Qu.:24.0 1st Qu.:1805 1st Qu.:0.99 1st Qu.:11428
## Median :22.50 Median :44.0 Median :2026 Median :1.06 Median :15149
## Mean :22.73 Mean :43.7 Mean :2030 Mean :1.06 Mean :14978
## 3rd Qu.:24.85 3rd Qu.:56.0 3rd Qu.:2252 3rd Qu.:1.13 3rd Qu.:18182
## Max. :34.70 Max. :88.0 Max. :3040 Max. :1.34 Max. :29944
## lean pcfat hypertension diabetes
## Min. :28587 Min. : 9.20 Min. :0.0000 Min. :0.00000
## 1st Qu.:40324 1st Qu.:20.35 1st Qu.:0.0000 1st Qu.:0.00000
## Median :43391 Median :24.60 Median :1.0000 Median :0.00000
## Mean :43762 Mean :24.16 Mean :0.5211 Mean :0.09296
## 3rd Qu.:47563 3rd Qu.:28.00 3rd Qu.:1.0000 3rd Qu.:0.00000
## Max. :63059 Max. :39.00 Max. :1.0000 Max. :1.00000
summary(nu)
## id gender height weight
## Min. : 1.0 Length:862 Min. :136.0 Min. :34.00
## 1st Qu.: 318.0 Class :character 1st Qu.:150.0 1st Qu.:47.00
## Median : 646.5 Mode :character Median :153.0 Median :51.00
## Mean : 637.8 Mean :153.3 Mean :52.31
## 3rd Qu.: 967.8 3rd Qu.:157.0 3rd Qu.:57.00
## Max. :1227.0 Max. :170.0 Max. :95.00
## bmi age WBBMC wbbmd
## Min. :15.20 Min. :14.00 Min. : 695 Min. :0.6500
## 1st Qu.:20.10 1st Qu.:39.00 1st Qu.:1408 1st Qu.:0.9100
## Median :22.10 Median :49.00 Median :1614 Median :0.9900
## Mean :22.26 Mean :48.57 Mean :1599 Mean :0.9876
## 3rd Qu.:24.10 3rd Qu.:59.00 3rd Qu.:1802 3rd Qu.:1.0700
## Max. :37.10 Max. :85.00 Max. :2663 Max. :1.3500
## fat lean pcfat hypertension
## Min. : 6222 Min. :19136 Min. :14.60 Min. :0.0000
## 1st Qu.:14795 1st Qu.:29336 1st Qu.:31.50 1st Qu.:0.0000
## Median :17695 Median :31504 Median :34.70 Median :1.0000
## Mean :18240 Mean :32045 Mean :34.67 Mean :0.5012
## 3rd Qu.:21120 3rd Qu.:34464 3rd Qu.:38.30 3rd Qu.:1.0000
## Max. :40825 Max. :53350 Max. :48.40 Max. :1.0000
## diabetes
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.1183
## 3rd Qu.:0.0000
## Max. :1.0000
prop.table(table(nam$hypertension))*100
##
## 0 1
## 47.88732 52.11268
prop.table(table(nu$hypertension))*100
##
## 0 1
## 49.88399 50.11601
t.test(nam$age, nu$age)
##
## Welch Two Sample t-test
##
## data: nam$age and nu$age
## t = -4.2608, df = 587.49, p-value = 2.372e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -7.117272 -2.626083
## sample estimates:
## mean of x mean of y
## 43.70141 48.57309
wilcox.test(nam$weight, nu$weight)
##
## Wilcoxon rank sum test with continuity correction
##
## data: nam$weight and nu$weight
## W = 241840, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
bang_so_sanh <- table(ob$gender, ob$hypertension)
print(bang_so_sanh)
##
## 0 1
## F 430 432
## M 170 185
chisq.test(bang_so_sanh)
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: bang_so_sanh
## X-squared = 0.32515, df = 1, p-value = 0.5685
Nhóm A (n= 7): 14, 4, 10, 6, 3, 11, 12 Nhóm B (n= 9): 16, 17, 13, 12, 7, 16, 11, 8, 7
NhomA <- c(14, 4, 10, 6, 3, 11, 12)
NhomB <- c(16, 17, 13, 12, 7, 16, 11, 8, 7)
shapiro.test(NhomA)
##
## Shapiro-Wilk normality test
##
## data: NhomA
## W = 0.92541, p-value = 0.5126
shapiro.test(NhomB)
##
## Shapiro-Wilk normality test
##
## data: NhomB
## W = 0.89641, p-value = 0.2319
qqnorm(NhomA)
qqline(NhomA)
## 2.3 Mô tả đặc điểm về tải trọng giữa 2 nhóm ### Xem mô tả tóm tắt của
Nhóm A:
summary(NhomA)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.000 5.000 10.000 8.571 11.500 14.000
summary(NhomB)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7.00 8.00 12.00 11.89 16.00 17.00
sd(NhomA)
## [1] 4.237025
sd(NhomB)
## [1] 3.95109
t.test(NhomA, NhomB)
##
## Welch Two Sample t-test
##
## data: NhomA and NhomB
## t = -1.6, df = 12.554, p-value = 0.1345
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -7.813114 1.178194
## sample estimates:
## mean of x mean of y
## 8.571429 11.888889
library(boot)
install.packages("boot")
## Warning: package 'boot' is in use and will not be installed
library(boot)
NhomA <- c(14, 4, 10, 6, 3, 11, 12)
NhomB <- c(16, 17, 13, 12, 7, 16, 11, 8, 7)
dataA <- data.frame(value = NhomA, group = "A")
dataB <- data.frame(value = NhomB, group = "B")
all_data <- rbind(dataA, dataB)
mean_diff <- function(data, indices) {d <- data[indices, ] }
median_diff <- function(data, indices) { d <- data[indices, ]
median_A <- median(d$value[d$group == "A"])
median_B <- median(d$value[d$group == "B"])
return(median_A - median_B)}