Bài 1

Việc 1: Đọc dữ liệu

ob=read.csv("D:\\DuLieu\\Tap huan\\Nam 2025\\Phuong phap NCKH\\Obesity data.csv")
head(ob)
##   id gender height weight  bmi age WBBMC wbbmd   fat  lean pcfat hypertension
## 1  1      F    150     49 21.8  53  1312  0.88 17802 28600  37.3            0
## 2  2      M    165     52 19.1  65  1309  0.84  8381 40229  16.8            1
## 3  3      F    157     57 23.1  64  1230  0.84 19221 36057  34.0            1
## 4  4      F    156     53 21.8  56  1171  0.80 17472 33094  33.8            1
## 5  5      M    160     51 19.9  54  1681  0.98  7336 40621  14.8            0
## 6  6      F    153     47 20.1  52  1358  0.91 14904 30068  32.2            1
##   diabetes
## 1        1
## 2        0
## 3        0
## 4        0
## 5        0
## 6        0

Việc 2: Mô tả đặc điểm

library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
table1( ~ age + gender + weight + height + pcfat + hypertension + diabetes, data=ob)
Overall
(N=1217)
age
Mean (SD) 47.2 (17.3)
Median [Min, Max] 48.0 [13.0, 88.0]
gender
F 862 (70.8%)
M 355 (29.2%)
weight
Mean (SD) 55.1 (9.40)
Median [Min, Max] 54.0 [34.0, 95.0]
height
Mean (SD) 157 (7.98)
Median [Min, Max] 155 [136, 185]
pcfat
Mean (SD) 31.6 (7.18)
Median [Min, Max] 32.4 [9.20, 48.4]
hypertension
Mean (SD) 0.507 (0.500)
Median [Min, Max] 1.00 [0, 1.00]
diabetes
Mean (SD) 0.111 (0.314)
Median [Min, Max] 0 [0, 1.00]

Việc 3: Tạo biến số định tính

ob$hyper=as.factor(ob$hypertension)
ob$dm=as.factor(ob$diabetes)
table1( ~ age + gender + weight + height + pcfat + hyper + dm, data=ob)
Overall
(N=1217)
age
Mean (SD) 47.2 (17.3)
Median [Min, Max] 48.0 [13.0, 88.0]
gender
F 862 (70.8%)
M 355 (29.2%)
weight
Mean (SD) 55.1 (9.40)
Median [Min, Max] 54.0 [34.0, 95.0]
height
Mean (SD) 157 (7.98)
Median [Min, Max] 155 [136, 185]
pcfat
Mean (SD) 31.6 (7.18)
Median [Min, Max] 32.4 [9.20, 48.4]
hyper
0 600 (49.3%)
1 617 (50.7%)
dm
0 1082 (88.9%)
1 135 (11.1%)

Việc 4:So sánh Nam và Nữ

library(compareGroups) createTable(compareGroups(gender ~ age + weight + height + pcfat + hyper + dm, data = ob)) ## Việc 5:Mô tả theo giới tính

table1(~ age + weight + height + pcfat + hyper + dm | gender, data = ob)
F
(N=862)
M
(N=355)
Overall
(N=1217)
age
Mean (SD) 48.6 (16.4) 43.7 (18.8) 47.2 (17.3)
Median [Min, Max] 49.0 [14.0, 85.0] 44.0 [13.0, 88.0] 48.0 [13.0, 88.0]
weight
Mean (SD) 52.3 (7.72) 62.0 (9.59) 55.1 (9.40)
Median [Min, Max] 51.0 [34.0, 95.0] 62.0 [38.0, 95.0] 54.0 [34.0, 95.0]
height
Mean (SD) 153 (5.55) 165 (6.73) 157 (7.98)
Median [Min, Max] 153 [136, 170] 165 [146, 185] 155 [136, 185]
pcfat
Mean (SD) 34.7 (5.19) 24.2 (5.76) 31.6 (7.18)
Median [Min, Max] 34.7 [14.6, 48.4] 24.6 [9.20, 39.0] 32.4 [9.20, 48.4]
hyper
0 430 (49.9%) 170 (47.9%) 600 (49.3%)
1 432 (50.1%) 185 (52.1%) 617 (50.7%)
dm
0 760 (88.2%) 322 (90.7%) 1082 (88.9%)
1 102 (11.8%) 33 (9.3%) 135 (11.1%)

Bài 2

Việc 1 : Dữ liệu tải trọng

A = c(14, 4, 10, 6, 3, 11, 12)
B = c(16, 17, 13, 12, 7, 16, 11, 8, 7)
wt = c(A, B)
group = c(rep("A", 7), rep("B", 9))
df = data.frame(wt, group)
dim(df)
## [1] 16  2

Việc 2: Đánh giá phân bố

library(lessR)
## 
## lessR 4.4.5                         feedback: gerbing@pdx.edu 
## --------------------------------------------------------------
## > d <- Read("")  Read data file, many formats available, e.g., Excel
##   d is default data frame, data= in analysis routines optional
## 
## Many examples of reading, writing, and manipulating data, 
## graphics, testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation from pivot tables
##   Enter: browseVignettes("lessR")
## 
## View lessR updates, now including time series forecasting
##   Enter: news(package="lessR")
## 
## Interactive data analysis
##   Enter: interact()
## 
## Attaching package: 'lessR'
## The following object is masked from 'package:table1':
## 
##     label
Histogram(wt, data = df)
## >>> Note: wt is not in a data frame (table)
## >>> Note: wt is not in a data frame (table)

## >>> Suggestions 
## bin_width: set the width of each bin 
## bin_start: set the start of the first bin 
## bin_end: set the end of the last bin 
## Histogram(wt, density=TRUE)  # smoothed curve + histogram 
## Plot(wt)  # Violin/Box/Scatterplot (VBS) plot 
## 
## --- wt --- 
##  
##      n   miss     mean       sd      min      mdn      max 
##      16      0    10.44     4.29     3.00    11.00    17.00 
##  
## 
## No (Box plot) outliers 
## 
## 
## Bin Width: 2 
## Number of Bins: 8 
##  
##      Bin  Midpnt  Count    Prop  Cumul.c  Cumul.p 
## ------------------------------------------------- 
##   2 >  4       3      2    0.12        2     0.12 
##   4 >  6       5      1    0.06        3     0.19 
##   6 >  8       7      3    0.19        6     0.38 
##   8 > 10       9      1    0.06        7     0.44 
##  10 > 12      11      4    0.25       11     0.69 
##  12 > 14      13      2    0.12       13     0.81 
##  14 > 16      15      2    0.12       15     0.94 
##  16 > 18      17      1    0.06       16     1.00 
##