ob = read.csv("D:\\HUYEN\\DAU\\Tap huan R\\Obesity data.csv")
dim(ob)
## [1] 1217 13
head(ob)
tail(ob)
summary(ob)
## id gender height weight
## Min. : 1.0 Length:1217 Min. :136.0 Min. :34.00
## 1st Qu.: 309.0 Class :character 1st Qu.:151.0 1st Qu.:49.00
## Median : 615.0 Mode :character Median :155.0 Median :54.00
## Mean : 614.5 Mean :156.7 Mean :55.14
## 3rd Qu.: 921.0 3rd Qu.:162.0 3rd Qu.:61.00
## Max. :1227.0 Max. :185.0 Max. :95.00
## bmi age WBBMC wbbmd fat
## Min. :14.5 Min. :13.00 Min. : 695 Min. :0.650 Min. : 4277
## 1st Qu.:20.2 1st Qu.:35.00 1st Qu.:1481 1st Qu.:0.930 1st Qu.:13768
## Median :22.2 Median :48.00 Median :1707 Median :1.010 Median :16955
## Mean :22.4 Mean :47.15 Mean :1725 Mean :1.009 Mean :17288
## 3rd Qu.:24.3 3rd Qu.:58.00 3rd Qu.:1945 3rd Qu.:1.090 3rd Qu.:20325
## Max. :37.1 Max. :88.00 Max. :3040 Max. :1.350 Max. :40825
## lean pcfat hypertension diabetes
## Min. :19136 Min. : 9.2 Min. :0.000 Min. :0.0000
## 1st Qu.:30325 1st Qu.:27.0 1st Qu.:0.000 1st Qu.:0.0000
## Median :33577 Median :32.4 Median :1.000 Median :0.0000
## Mean :35463 Mean :31.6 Mean :0.507 Mean :0.1109
## 3rd Qu.:39761 3rd Qu.:36.8 3rd Qu.:1.000 3rd Qu.:0.0000
## Max. :63059 Max. :48.4 Max. :1.000 Max. :1.0000
##5.1 Mã hóa giới tính
ob$sex[ob$gender == "F"] = 1
ob$sex[ob$gender == "M"] = 0
table(ob$sex, ob$gender)
##
## F M
## 0 0 355
## 1 862 0
ob$sex.b = ifelse(ob$gender== "F", 1, 0)
table(ob$sex.b, ob$gender)
##
## F M
## 0 0 355
## 1 862 0
head(ob)
ob$obese[ob$bmi< 18.5] = "Underweight"
ob$obese[ob$bmi>= 18.5 & ob$bmi< 25] = "Normal"
ob$obese[ob$bmi>= 25 & ob$bmi< 30] = "Overweight"
ob$obese[ob$bmi>= 30] = "Obese"
ob$lean.kg = ob$lean/1000
ob$fat.kg = ob$fat/1000
men.overweight = subset(ob, gender == "M" & bmi>= 25)
dim(men.overweight)
## [1] 85 18
table(men.overweight$obese)
##
## Obese Overweight
## 4 81
Demo = subset(ob, select = c(id, age, gender, weight, height, pcfat))
dim(Demo)
## [1] 1217 6
head(Demo)
Demo.2 = ob[, c("id", "age", "gender", "weight", "height", "pcfat")]
head(Demo.2)