library(lessR);
## 
## lessR 4.4.5                         feedback: gerbing@pdx.edu 
## --------------------------------------------------------------
## > d <- Read("")  Read data file, many formats available, e.g., Excel
##   d is default data frame, data= in analysis routines optional
## 
## Many examples of reading, writing, and manipulating data, 
## graphics, testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation from pivot tables
##   Enter: browseVignettes("lessR")
## 
## View lessR updates, now including time series forecasting
##   Enter: news(package="lessR")
## 
## Interactive data analysis
##   Enter: interact()
library(table1)
## 
## Attaching package: 'table1'
## The following object is masked from 'package:lessR':
## 
##     label
## The following objects are masked from 'package:base':
## 
##     units, units<-
library(lessR)
library(readr)

Obesity_data <- read_csv("C:/Users/DELL/Downloads/Obesity data.csv")
## Rows: 1217 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): gender
## dbl (12): id, height, weight, bmi, age, WBBMC, wbbmd, fat, lean, pcfat, hype...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(Obesity_data)
ob=Obesity_data
View(ob)
# Xem số quan sát (số dòng)
nrow(ob)
## [1] 1217
# Xem số biến (số cột)
ncol(ob)
## [1] 13
# Hoặc kết hợp lại
dim(ob)
## [1] 1217   13
head(ob, 6)
## # A tibble: 6 × 13
##      id gender height weight   bmi   age WBBMC wbbmd   fat  lean pcfat
##   <dbl> <chr>   <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1     1 F         150     49  21.8    53  1312  0.88 17802 28600  37.3
## 2     2 M         165     52  19.1    65  1309  0.84  8381 40229  16.8
## 3     3 F         157     57  23.1    64  1230  0.84 19221 36057  34  
## 4     4 F         156     53  21.8    56  1171  0.8  17472 33094  33.8
## 5     5 M         160     51  19.9    54  1681  0.98  7336 40621  14.8
## 6     6 F         153     47  20.1    52  1358  0.91 14904 30068  32.2
## # ℹ 2 more variables: hypertension <dbl>, diabetes <dbl>
tail(ob, 6)
## # A tibble: 6 × 13
##      id gender height weight   bmi   age WBBMC wbbmd   fat  lean pcfat
##   <dbl> <chr>   <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1  1222 F         153     50  21.4    59  1309  0.87 18328 29147  37.6
## 2  1223 F         150     44  19.6    44  1474  0.95 12906 28534  30.1
## 3  1224 F         148     51  23.3    58  1522  0.97 14938 33931  29.6
## 4  1225 F         149     50  22.5    57  1409  0.93 16777 30598  34.4
## 5  1226 F         144     49  23.6    67  1266  0.9  20094 27272  41.3
## 6  1227 F         141     45  22.6    58  1228  0.91 14567 28111  33.2
## # ℹ 2 more variables: hypertension <dbl>, diabetes <dbl>
summary(ob)
##        id            gender              height          weight     
##  Min.   :   1.0   Length:1217        Min.   :136.0   Min.   :34.00  
##  1st Qu.: 309.0   Class :character   1st Qu.:151.0   1st Qu.:49.00  
##  Median : 615.0   Mode  :character   Median :155.0   Median :54.00  
##  Mean   : 614.5                      Mean   :156.7   Mean   :55.14  
##  3rd Qu.: 921.0                      3rd Qu.:162.0   3rd Qu.:61.00  
##  Max.   :1227.0                      Max.   :185.0   Max.   :95.00  
##       bmi            age            WBBMC          wbbmd            fat       
##  Min.   :14.5   Min.   :13.00   Min.   : 695   Min.   :0.650   Min.   : 4277  
##  1st Qu.:20.2   1st Qu.:35.00   1st Qu.:1481   1st Qu.:0.930   1st Qu.:13768  
##  Median :22.2   Median :48.00   Median :1707   Median :1.010   Median :16955  
##  Mean   :22.4   Mean   :47.15   Mean   :1725   Mean   :1.009   Mean   :17288  
##  3rd Qu.:24.3   3rd Qu.:58.00   3rd Qu.:1945   3rd Qu.:1.090   3rd Qu.:20325  
##  Max.   :37.1   Max.   :88.00   Max.   :3040   Max.   :1.350   Max.   :40825  
##       lean           pcfat       hypertension      diabetes     
##  Min.   :19136   Min.   : 9.2   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:30325   1st Qu.:27.0   1st Qu.:0.000   1st Qu.:0.0000  
##  Median :33577   Median :32.4   Median :1.000   Median :0.0000  
##  Mean   :35463   Mean   :31.6   Mean   :0.507   Mean   :0.1109  
##  3rd Qu.:39761   3rd Qu.:36.8   3rd Qu.:1.000   3rd Qu.:0.0000  
##  Max.   :63059   Max.   :48.4   Max.   :1.000   Max.   :1.0000
#viec5
ob$Sex <-ifelse(ob$gender == "F", 1, 0)
ob$obese <- cut(
  ob$bmi,
  breaks = c(-Inf, 18.5, 25, 30, Inf),
  labels = c("Underweight", "Normal", "Overweight", "Obese"),
  right = FALSE  # để khoảng [18.5, 25.0) đúng như bạn yêu cầu
)
#viec5.3

ob$fat.kg <- ob$weight * ob$pcfat / 100
ob$lean.kg <- ob$weight - ob$fat.kg
#viec5.4
# Kiểm tra tên biến gender trong dữ liệu
unique(ob$gender)
## [1] "F" "M"
# Lọc ra nam giới với BMI >= 25.0
men.overweight <- subset(ob, gender == "M" & bmi >= 25.0)

# Hoặc nếu biến gender là "Male", bạn chỉnh lại
# men.overweight <- subset(ob, gender == "Male" & bmi >= 25.0)
dim(men.overweight)
## [1] 85 17
#viec5.5
Demo <- ob[, c("id", "age", "gender", "height", "weight", "pcfat")]