#Việc 3. Đọc dữ liệu "Obesity data.csv" vào R và gọi dữ liệu là "ob" (dùng hàm read.csv)
library(readr)
Obesity_data <- read_csv("C:/Users/YEN VINH/OneDrive - Truong Dai Hoc Kien Truc Da Nang/Desktop/PHAN TICH DU LIEU R/DATA/Obesity data.csv")
## Rows: 1217 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): gender
## dbl (12): id, height, weight, bmi, age, WBBMC, wbbmd, fat, lean, pcfat, hype...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Việc 4. Thông tin về dữ liệu ob này
head(Obesity_data)
## # A tibble: 6 × 13
## id gender height weight bmi age WBBMC wbbmd fat lean pcfat
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 F 150 49 21.8 53 1312 0.88 17802 28600 37.3
## 2 2 M 165 52 19.1 65 1309 0.84 8381 40229 16.8
## 3 3 F 157 57 23.1 64 1230 0.84 19221 36057 34
## 4 4 F 156 53 21.8 56 1171 0.8 17472 33094 33.8
## 5 5 M 160 51 19.9 54 1681 0.98 7336 40621 14.8
## 6 6 F 153 47 20.1 52 1358 0.91 14904 30068 32.2
## # ℹ 2 more variables: hypertension <dbl>, diabetes <dbl>
# Kiểm tra kích thước dữ liệu
dim(Obesity_data)
## [1] 1217 13
head(Obesity_data)
## # A tibble: 6 × 13
## id gender height weight bmi age WBBMC wbbmd fat lean pcfat
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 F 150 49 21.8 53 1312 0.88 17802 28600 37.3
## 2 2 M 165 52 19.1 65 1309 0.84 8381 40229 16.8
## 3 3 F 157 57 23.1 64 1230 0.84 19221 36057 34
## 4 4 F 156 53 21.8 56 1171 0.8 17472 33094 33.8
## 5 5 M 160 51 19.9 54 1681 0.98 7336 40621 14.8
## 6 6 F 153 47 20.1 52 1358 0.91 14904 30068 32.2
## # ℹ 2 more variables: hypertension <dbl>, diabetes <dbl>
tail(Obesity_data)
## # A tibble: 6 × 13
## id gender height weight bmi age WBBMC wbbmd fat lean pcfat
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1222 F 153 50 21.4 59 1309 0.87 18328 29147 37.6
## 2 1223 F 150 44 19.6 44 1474 0.95 12906 28534 30.1
## 3 1224 F 148 51 23.3 58 1522 0.97 14938 33931 29.6
## 4 1225 F 149 50 22.5 57 1409 0.93 16777 30598 34.4
## 5 1226 F 144 49 23.6 67 1266 0.9 20094 27272 41.3
## 6 1227 F 141 45 22.6 58 1228 0.91 14567 28111 33.2
## # ℹ 2 more variables: hypertension <dbl>, diabetes <dbl>
summary(Obesity_data)
## id gender height weight
## Min. : 1.0 Length:1217 Min. :136.0 Min. :34.00
## 1st Qu.: 309.0 Class :character 1st Qu.:151.0 1st Qu.:49.00
## Median : 615.0 Mode :character Median :155.0 Median :54.00
## Mean : 614.5 Mean :156.7 Mean :55.14
## 3rd Qu.: 921.0 3rd Qu.:162.0 3rd Qu.:61.00
## Max. :1227.0 Max. :185.0 Max. :95.00
## bmi age WBBMC wbbmd fat
## Min. :14.5 Min. :13.00 Min. : 695 Min. :0.650 Min. : 4277
## 1st Qu.:20.2 1st Qu.:35.00 1st Qu.:1481 1st Qu.:0.930 1st Qu.:13768
## Median :22.2 Median :48.00 Median :1707 Median :1.010 Median :16955
## Mean :22.4 Mean :47.15 Mean :1725 Mean :1.009 Mean :17288
## 3rd Qu.:24.3 3rd Qu.:58.00 3rd Qu.:1945 3rd Qu.:1.090 3rd Qu.:20325
## Max. :37.1 Max. :88.00 Max. :3040 Max. :1.350 Max. :40825
## lean pcfat hypertension diabetes
## Min. :19136 Min. : 9.2 Min. :0.000 Min. :0.0000
## 1st Qu.:30325 1st Qu.:27.0 1st Qu.:0.000 1st Qu.:0.0000
## Median :33577 Median :32.4 Median :1.000 Median :0.0000
## Mean :35463 Mean :31.6 Mean :0.507 Mean :0.1109
## 3rd Qu.:39761 3rd Qu.:36.8 3rd Qu.:1.000 3rd Qu.:0.0000
## Max. :63059 Max. :48.4 Max. :1.000 Max. :1.0000
#Việc 5. Biên tập dữ liệu
Obesity_data$sex <- ifelse(Obesity_data$gender == "F", 1, 0)
Obesity_data$obese <- ifelse(Obesity_data$bmi < 18.5, "Underweight",
ifelse(Obesity_data$bmi < 25.0, "Normal",
ifelse(Obesity_data$bmi < 30.0, "Overweight", "Obese")))
Obesity_data$lean.kg <- Obesity_data$weight * Obesity_data$lean / 100
Obesity_data$fat.kg <- Obesity_data$weight * Obesity_data$fat / 100
men.overweight <- subset(Obesity_data, gender == "M" & bmi >= 25.0)
Demo <- Obesity_data[, c("id", "age", "gender", "height", "weight", "pcfat")]
head(Demo) # Xem 6 quan sát đầu tiên
## # A tibble: 6 × 6
## id age gender height weight pcfat
## <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 1 53 F 150 49 37.3
## 2 2 65 M 165 52 16.8
## 3 3 64 F 157 57 34
## 4 4 56 F 156 53 33.8
## 5 5 54 M 160 51 14.8
## 6 6 52 F 153 47 32.2
dim(Demo) # Xem số quan sát và số biến
## [1] 1217 6
names(Demo) # Kiểm tra lại tên biến
## [1] "id" "age" "gender" "height" "weight" "pcfat"