library(lessR)
## Warning: package 'lessR' was built under R version 4.4.3
##
## lessR 4.4.3 feedback: gerbing@pdx.edu
## --------------------------------------------------------------
## > d <- Read("") Read data file, many formats available, e.g., Excel
## d is default data frame, data= in analysis routines optional
##
## Many examples of reading, writing, and manipulating data,
## graphics, testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation from pivot tables
## Enter: browseVignettes("lessR")
##
## View lessR updates, now including time series forecasting
## Enter: news(package="lessR")
##
## Interactive data analysis
## Enter: interact()
##
## Attaching package: 'lessR'
## The following object is masked from 'package:base':
##
## sort_by
library(table1)
##
## Attaching package: 'table1'
## The following object is masked from 'package:lessR':
##
## label
## The following objects are masked from 'package:base':
##
## units, units<-
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:lessR':
##
## order_by, recode, rename
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
bw <- read.csv("C:/Users/lehoa/Desktop/thuế/học như một NCS/R với thầy Tuấn/birthwt.csv",header=TRUE)
### 4.1: Số biến và số quan sát
dim(bw)
## [1] 189 11
### 4.2: 6 quan sát đầu tiên
head(bw)
## id low age lwt race smoke ptl ht ui ftv bwt
## 1 85 0 19 182 2 0 0 0 1 0 2523
## 2 86 0 33 155 3 0 0 0 0 3 2551
## 3 87 0 20 105 1 1 0 0 0 1 2557
## 4 88 0 21 108 1 1 0 0 1 2 2594
## 5 89 0 18 107 1 1 0 0 1 0 2600
## 6 91 0 21 124 3 0 0 0 0 0 2622
## Biên tập dữ liệu
### 5.1: Cân nặng mẹ (kg)
bw$mwt <- round(bw$lwt * 0.453592, 2)
head(bw)
## id low age lwt race smoke ptl ht ui ftv bwt mwt
## 1 85 0 19 182 2 0 0 0 1 0 2523 82.55
## 2 86 0 33 155 3 0 0 0 0 3 2551 70.31
## 3 87 0 20 105 1 1 0 0 0 1 2557 47.63
## 4 88 0 21 108 1 1 0 0 1 2 2594 48.99
## 5 89 0 18 107 1 1 0 0 1 0 2600 48.53
## 6 91 0 21 124 3 0 0 0 0 0 2622 56.25
### 5.2: Biến ethnicity từ race
bw$ethnicity <- factor(bw$race,
levels = c(1, 2, 3),
labels = c("White", "Black", "Other"))
### 5.3: Tập dữ liệu bw1
bw1 <- bw[, c("id", "low", "bwt")]
dim(bw1)
## [1] 189 3
head(bw1,20)
## id low bwt
## 1 85 0 2523
## 2 86 0 2551
## 3 87 0 2557
## 4 88 0 2594
## 5 89 0 2600
## 6 91 0 2622
## 7 92 0 2637
## 8 93 0 2637
## 9 94 0 2663
## 10 95 0 2665
## 11 96 0 2722
## 12 97 0 2733
## 13 98 0 2751
## 14 99 0 2750
## 15 100 0 2769
## 16 101 0 2769
## 17 102 0 2778
## 18 103 0 2782
## 19 104 0 2807
## 20 105 0 2821
### 5.4: bw2: chỉ các mẹ có trẻ thiếu cân
bw2 <- filter(bw, low == 1)
dim(bw2)
## [1] 59 13
head(bw2,20)
## id low age lwt race smoke ptl ht ui ftv bwt mwt ethnicity
## 1 4 1 28 120 3 1 1 0 1 0 709 54.43 Other
## 2 10 1 29 130 1 0 0 0 1 2 1021 58.97 White
## 3 11 1 34 187 2 1 0 1 0 0 1135 84.82 Black
## 4 13 1 25 105 3 0 1 1 0 0 1330 47.63 Other
## 5 15 1 25 85 3 0 0 0 1 0 1474 38.56 Other
## 6 16 1 27 150 3 0 0 0 0 0 1588 68.04 Other
## 7 17 1 23 97 3 0 0 0 1 1 1588 44.00 Other
## 8 18 1 24 128 2 0 1 0 0 1 1701 58.06 Black
## 9 19 1 24 132 3 0 0 1 0 0 1729 59.87 Other
## 10 20 1 21 165 1 1 0 1 0 1 1790 74.84 White
## 11 22 1 32 105 1 1 0 0 0 0 1818 47.63 White
## 12 23 1 19 91 1 1 2 0 1 0 1885 41.28 White
## 13 24 1 25 115 3 0 0 0 0 0 1893 52.16 Other
## 14 25 1 16 130 3 0 0 0 0 1 1899 58.97 Other
## 15 26 1 25 92 1 1 0 0 0 0 1928 41.73 White
## 16 27 1 20 150 1 1 0 0 0 2 1928 68.04 White
## 17 28 1 21 200 2 0 0 0 1 2 1928 90.72 Black
## 18 29 1 24 155 1 1 1 0 0 0 1936 70.31 White
## 19 30 1 21 103 3 0 0 0 0 0 1970 46.72 Other
## 20 31 1 20 125 3 0 0 0 1 0 2055 56.70 Other
### 5.5: bw3: thiếu cân và mẹ hút thuốc
bw3 <- filter(bw, low == 1 & smoke == 1)
dim(bw3)
## [1] 30 13
head(bw3,20)
## id low age lwt race smoke ptl ht ui ftv bwt mwt ethnicity
## 1 4 1 28 120 3 1 1 0 1 0 709 54.43 Other
## 2 11 1 34 187 2 1 0 1 0 0 1135 84.82 Black
## 3 20 1 21 165 1 1 0 1 0 1 1790 74.84 White
## 4 22 1 32 105 1 1 0 0 0 0 1818 47.63 White
## 5 23 1 19 91 1 1 2 0 1 0 1885 41.28 White
## 6 26 1 25 92 1 1 0 0 0 0 1928 41.73 White
## 7 27 1 20 150 1 1 0 0 0 2 1928 68.04 White
## 8 29 1 24 155 1 1 1 0 0 0 1936 70.31 White
## 9 34 1 19 112 1 1 0 0 1 0 2084 50.80 White
## 10 35 1 26 117 1 1 1 0 0 0 2084 53.07 White
## 11 37 1 17 130 3 1 1 0 1 0 2125 58.97 Other
## 12 40 1 20 120 2 1 0 0 0 3 2126 54.43 Black
## 13 42 1 22 130 1 1 1 0 1 1 2187 58.97 White
## 14 44 1 20 80 3 1 0 0 1 0 2211 36.29 Other
## 15 45 1 17 110 1 1 0 0 0 0 2225 49.90 White
## 16 50 1 18 110 2 1 1 0 0 0 2296 49.90 Black
## 17 51 1 20 121 1 1 1 0 1 0 2296 54.88 White
## 18 56 1 31 102 1 1 1 0 0 1 2353 46.27 White
## 19 59 1 23 187 2 1 0 0 0 1 2367 84.82 Black
## 20 60 1 20 122 2 1 0 0 0 0 2381 55.34 Black
## 6.1: Mô tả age, lwt, bwt
table1(~age + lwt + bwt, data=bw)
| Overall (N=189) |
|
|---|---|
| age | |
| Mean (SD) | 23.2 (5.30) |
| Median [Min, Max] | 23.0 [14.0, 45.0] |
| lwt | |
| Mean (SD) | 130 (30.6) |
| Median [Min, Max] | 121 [80.0, 250] |
| bwt | |
| Mean (SD) | 2940 (729) |
| Median [Min, Max] | 2980 [709, 4990] |
## 6.2: Mô tả theo nhóm "low"
bw$smoking= ifelse(bw$smoke==1,"yes","no")
table1(~ age + lwt + smoking + ethnicity + bwt | factor(low), data = bw)
| 0 (N=130) |
1 (N=59) |
Overall (N=189) |
|
|---|---|---|---|
| age | |||
| Mean (SD) | 23.7 (5.58) | 22.3 (4.51) | 23.2 (5.30) |
| Median [Min, Max] | 23.0 [14.0, 45.0] | 22.0 [14.0, 34.0] | 23.0 [14.0, 45.0] |
| lwt | |||
| Mean (SD) | 133 (31.7) | 122 (26.6) | 130 (30.6) |
| Median [Min, Max] | 124 [85.0, 250] | 120 [80.0, 200] | 121 [80.0, 250] |
| smoking | |||
| no | 86 (66.2%) | 29 (49.2%) | 115 (60.8%) |
| yes | 44 (33.8%) | 30 (50.8%) | 74 (39.2%) |
| ethnicity | |||
| White | 73 (56.2%) | 23 (39.0%) | 96 (50.8%) |
| Black | 15 (11.5%) | 11 (18.6%) | 26 (13.8%) |
| Other | 42 (32.3%) | 25 (42.4%) | 67 (35.4%) |
| bwt | |||
| Mean (SD) | 3330 (478) | 2100 (391) | 2940 (729) |
| Median [Min, Max] | 3270 [2520, 4990] | 2210 [709, 2500] | 2980 [709, 4990] |
## 7.1 Biểu đồ phân tán (scatter plot) giữa cân nặng mẹ và cân nặng con
plot(bw$mwt, bw$bwt,
main = "Mối liên hệ giữa cân nặng mẹ và cân nặng trẻ",
xlab = "Cân nặng mẹ (kg)",
ylab = "Cân nặng trẻ lúc sinh (g)",
col = "blue", pch = 16)
## 7.2 Biểu đồ histogram cho cân nặng trẻ (bwt)
hist(bw$bwt,
main = "Phân bố cân nặng trẻ lúc sinh",
xlab = "Cân nặng trẻ (g)",ylab = "tỉ lệ (%)",
col = "lightgreen", border = "white")
## 7.3 Histogram với lessR::Histogram() có đường cong chuẩn
library(lessR)
Histogram(bwt, data = bw, fill = "skyblue")
## >>> Suggestions
## bin_width: set the width of each bin
## bin_start: set the start of the first bin
## bin_end: set the end of the last bin
## Histogram(bwt, density=TRUE) # smoothed curve + histogram
## Plot(bwt) # Violin/Box/Scatterplot (VBS) plot
##
## --- bwt ---
##
## n miss mean sd min mdn max
## 189 0 2944.59 729.21 709.00 2977.00 4990.00
##
##
##
## --- Outliers --- from the box plot: 1
##
## Small Large
## ----- -----
## 709.0
##
##
## Bin Width: 500
## Number of Bins: 9
##
## Bin Midpnt Count Prop Cumul.c Cumul.p
## -----------------------------------------------------
## 500 > 1000 750 1 0.01 1 0.01
## 1000 > 1500 1250 4 0.02 5 0.03
## 1500 > 2000 1750 14 0.07 19 0.10
## 2000 > 2500 2250 40 0.21 59 0.31
## 2500 > 3000 2750 38 0.20 97 0.51
## 3000 > 3500 3250 45 0.24 142 0.75
## 3500 > 4000 3750 38 0.20 180 0.95
## 4000 > 4500 4250 7 0.04 187 0.99
## 4500 > 5000 4750 2 0.01 189 1.00
##
## 7.4 Biểu đồ cột (bar chart) cho biến phân loại smoke và ethnicity
### Bar chart: hút thuốc
barplot(table(bw$smoke),
main = "Tình trạng hút thuốc trong thai kỳ",
names.arg = c("Không hút", "Có hút"),
col = c("gray70", "tomato"))
### Bar chart: sắc tộc
barplot(table(bw$ethnicity),
main = "Phân bố sắc tộc",
col = c("orange", "lightblue", "gray"))
##7.5 Biểu đồ ghép (boxplot): cân nặng trẻ theo tình trạng hút thuốc
boxplot(bwt ~ smoke,
data = bw,
main = "So sánh cân nặng trẻ theo tình trạng hút thuốc",
xlab = "Hút thuốc",
ylab = "Cân nặng trẻ (g)",
names = c("Không hút", "Có hút"),
col = c("green", "red"))