#1. Đọc dữ liệu vào R và đặt tên là bw
file.choose()
## [1] "D:\\HOC TAP\\TAP HUAN UNG DUNG AI TRONG PT DU LIEU SU DUNG R\\THUC HANH TAI LOP\\hocRngay2\\bt1Chatgpt.Rmd"
read.csv("D:\\HOC TAP\\TAP HUAN UNG DUNG AI TRONG PT DU LIEU SU DUNG R\\thuc hanh\\birthwt.csv")
## id low age lwt race smoke ptl ht ui ftv bwt
## 1 85 0 19 182 2 0 0 0 1 0 2523
## 2 86 0 33 155 3 0 0 0 0 3 2551
## 3 87 0 20 105 1 1 0 0 0 1 2557
## 4 88 0 21 108 1 1 0 0 1 2 2594
## 5 89 0 18 107 1 1 0 0 1 0 2600
## 6 91 0 21 124 3 0 0 0 0 0 2622
## 7 92 0 22 118 1 0 0 0 0 1 2637
## 8 93 0 17 103 3 0 0 0 0 1 2637
## 9 94 0 29 123 1 1 0 0 0 1 2663
## 10 95 0 26 113 1 1 0 0 0 0 2665
## 11 96 0 19 95 3 0 0 0 0 0 2722
## 12 97 0 19 150 3 0 0 0 0 1 2733
## 13 98 0 22 95 3 0 0 1 0 0 2751
## 14 99 0 30 107 3 0 1 0 1 2 2750
## 15 100 0 18 100 1 1 0 0 0 0 2769
## 16 101 0 18 100 1 1 0 0 0 0 2769
## 17 102 0 15 98 2 0 0 0 0 0 2778
## 18 103 0 25 118 1 1 0 0 0 3 2782
## 19 104 0 20 120 3 0 0 0 1 0 2807
## 20 105 0 28 120 1 1 0 0 0 1 2821
## 21 106 0 32 121 3 0 0 0 0 2 2835
## 22 107 0 31 100 1 0 0 0 1 3 2835
## 23 108 0 36 202 1 0 0 0 0 1 2836
## 24 109 0 28 120 3 0 0 0 0 0 2863
## 25 111 0 25 120 3 0 0 0 1 2 2877
## 26 112 0 28 167 1 0 0 0 0 0 2877
## 27 113 0 17 122 1 1 0 0 0 0 2906
## 28 114 0 29 150 1 0 0 0 0 2 2920
## 29 115 0 26 168 2 1 0 0 0 0 2920
## 30 116 0 17 113 2 0 0 0 0 1 2920
## 31 117 0 17 113 2 0 0 0 0 1 2920
## 32 118 0 24 90 1 1 1 0 0 1 2948
## 33 119 0 35 121 2 1 1 0 0 1 2948
## 34 120 0 25 155 1 0 0 0 0 1 2977
## 35 121 0 25 125 2 0 0 0 0 0 2977
## 36 123 0 29 140 1 1 0 0 0 2 2977
## 37 124 0 19 138 1 1 0 0 0 2 2977
## 38 125 0 27 124 1 1 0 0 0 0 2922
## 39 126 0 31 215 1 1 0 0 0 2 3005
## 40 127 0 33 109 1 1 0 0 0 1 3033
## 41 128 0 21 185 2 1 0 0 0 2 3042
## 42 129 0 19 189 1 0 0 0 0 2 3062
## 43 130 0 23 130 2 0 0 0 0 1 3062
## 44 131 0 21 160 1 0 0 0 0 0 3062
## 45 132 0 18 90 1 1 0 0 1 0 3062
## 46 133 0 18 90 1 1 0 0 1 0 3062
## 47 134 0 32 132 1 0 0 0 0 4 3080
## 48 135 0 19 132 3 0 0 0 0 0 3090
## 49 136 0 24 115 1 0 0 0 0 2 3090
## 50 137 0 22 85 3 1 0 0 0 0 3090
## 51 138 0 22 120 1 0 0 1 0 1 3100
## 52 139 0 23 128 3 0 0 0 0 0 3104
## 53 140 0 22 130 1 1 0 0 0 0 3132
## 54 141 0 30 95 1 1 0 0 0 2 3147
## 55 142 0 19 115 3 0 0 0 0 0 3175
## 56 143 0 16 110 3 0 0 0 0 0 3175
## 57 144 0 21 110 3 1 0 0 1 0 3203
## 58 145 0 30 153 3 0 0 0 0 0 3203
## 59 146 0 20 103 3 0 0 0 0 0 3203
## 60 147 0 17 119 3 0 0 0 0 0 3225
## 61 148 0 17 119 3 0 0 0 0 0 3225
## 62 149 0 23 119 3 0 0 0 0 2 3232
## 63 150 0 24 110 3 0 0 0 0 0 3232
## 64 151 0 28 140 1 0 0 0 0 0 3234
## 65 154 0 26 133 3 1 2 0 0 0 3260
## 66 155 0 20 169 3 0 1 0 1 1 3274
## 67 156 0 24 115 3 0 0 0 0 2 3274
## 68 159 0 28 250 3 1 0 0 0 6 3303
## 69 160 0 20 141 1 0 2 0 1 1 3317
## 70 161 0 22 158 2 0 1 0 0 2 3317
## 71 162 0 22 112 1 1 2 0 0 0 3317
## 72 163 0 31 150 3 1 0 0 0 2 3321
## 73 164 0 23 115 3 1 0 0 0 1 3331
## 74 166 0 16 112 2 0 0 0 0 0 3374
## 75 167 0 16 135 1 1 0 0 0 0 3374
## 76 168 0 18 229 2 0 0 0 0 0 3402
## 77 169 0 25 140 1 0 0 0 0 1 3416
## 78 170 0 32 134 1 1 1 0 0 4 3430
## 79 172 0 20 121 2 1 0 0 0 0 3444
## 80 173 0 23 190 1 0 0 0 0 0 3459
## 81 174 0 22 131 1 0 0 0 0 1 3460
## 82 175 0 32 170 1 0 0 0 0 0 3473
## 83 176 0 30 110 3 0 0 0 0 0 3544
## 84 177 0 20 127 3 0 0 0 0 0 3487
## 85 179 0 23 123 3 0 0 0 0 0 3544
## 86 180 0 17 120 3 1 0 0 0 0 3572
## 87 181 0 19 105 3 0 0 0 0 0 3572
## 88 182 0 23 130 1 0 0 0 0 0 3586
## 89 183 0 36 175 1 0 0 0 0 0 3600
## 90 184 0 22 125 1 0 0 0 0 1 3614
## 91 185 0 24 133 1 0 0 0 0 0 3614
## 92 186 0 21 134 3 0 0 0 0 2 3629
## 93 187 0 19 235 1 1 0 1 0 0 3629
## 94 188 0 25 95 1 1 3 0 1 0 3637
## 95 189 0 16 135 1 1 0 0 0 0 3643
## 96 190 0 29 135 1 0 0 0 0 1 3651
## 97 191 0 29 154 1 0 0 0 0 1 3651
## 98 192 0 19 147 1 1 0 0 0 0 3651
## 99 193 0 19 147 1 1 0 0 0 0 3651
## 100 195 0 30 137 1 0 0 0 0 1 3699
## 101 196 0 24 110 1 0 0 0 0 1 3728
## 102 197 0 19 184 1 1 0 1 0 0 3756
## 103 199 0 24 110 3 0 1 0 0 0 3770
## 104 200 0 23 110 1 0 0 0 0 1 3770
## 105 201 0 20 120 3 0 0 0 0 0 3770
## 106 202 0 25 241 2 0 0 1 0 0 3790
## 107 203 0 30 112 1 0 0 0 0 1 3799
## 108 204 0 22 169 1 0 0 0 0 0 3827
## 109 205 0 18 120 1 1 0 0 0 2 3856
## 110 206 0 16 170 2 0 0 0 0 4 3860
## 111 207 0 32 186 1 0 0 0 0 2 3860
## 112 208 0 18 120 3 0 0 0 0 1 3884
## 113 209 0 29 130 1 1 0 0 0 2 3884
## 114 210 0 33 117 1 0 0 0 1 1 3912
## 115 211 0 20 170 1 1 0 0 0 0 3940
## 116 212 0 28 134 3 0 0 0 0 1 3941
## 117 213 0 14 135 1 0 0 0 0 0 3941
## 118 214 0 28 130 3 0 0 0 0 0 3969
## 119 215 0 25 120 1 0 0 0 0 2 3983
## 120 216 0 16 95 3 0 0 0 0 1 3997
## 121 217 0 20 158 1 0 0 0 0 1 3997
## 122 218 0 26 160 3 0 0 0 0 0 4054
## 123 219 0 21 115 1 0 0 0 0 1 4054
## 124 220 0 22 129 1 0 0 0 0 0 4111
## 125 221 0 25 130 1 0 0 0 0 2 4153
## 126 222 0 31 120 1 0 0 0 0 2 4167
## 127 223 0 35 170 1 0 1 0 0 1 4174
## 128 224 0 19 120 1 1 0 0 0 0 4238
## 129 225 0 24 116 1 0 0 0 0 1 4593
## 130 226 0 45 123 1 0 0 0 0 1 4990
## 131 4 1 28 120 3 1 1 0 1 0 709
## 132 10 1 29 130 1 0 0 0 1 2 1021
## 133 11 1 34 187 2 1 0 1 0 0 1135
## 134 13 1 25 105 3 0 1 1 0 0 1330
## 135 15 1 25 85 3 0 0 0 1 0 1474
## 136 16 1 27 150 3 0 0 0 0 0 1588
## 137 17 1 23 97 3 0 0 0 1 1 1588
## 138 18 1 24 128 2 0 1 0 0 1 1701
## 139 19 1 24 132 3 0 0 1 0 0 1729
## 140 20 1 21 165 1 1 0 1 0 1 1790
## 141 22 1 32 105 1 1 0 0 0 0 1818
## 142 23 1 19 91 1 1 2 0 1 0 1885
## 143 24 1 25 115 3 0 0 0 0 0 1893
## 144 25 1 16 130 3 0 0 0 0 1 1899
## 145 26 1 25 92 1 1 0 0 0 0 1928
## 146 27 1 20 150 1 1 0 0 0 2 1928
## 147 28 1 21 200 2 0 0 0 1 2 1928
## 148 29 1 24 155 1 1 1 0 0 0 1936
## 149 30 1 21 103 3 0 0 0 0 0 1970
## 150 31 1 20 125 3 0 0 0 1 0 2055
## 151 32 1 25 89 3 0 2 0 0 1 2055
## 152 33 1 19 102 1 0 0 0 0 2 2082
## 153 34 1 19 112 1 1 0 0 1 0 2084
## 154 35 1 26 117 1 1 1 0 0 0 2084
## 155 36 1 24 138 1 0 0 0 0 0 2100
## 156 37 1 17 130 3 1 1 0 1 0 2125
## 157 40 1 20 120 2 1 0 0 0 3 2126
## 158 42 1 22 130 1 1 1 0 1 1 2187
## 159 43 1 27 130 2 0 0 0 1 0 2187
## 160 44 1 20 80 3 1 0 0 1 0 2211
## 161 45 1 17 110 1 1 0 0 0 0 2225
## 162 46 1 25 105 3 0 1 0 0 1 2240
## 163 47 1 20 109 3 0 0 0 0 0 2240
## 164 49 1 18 148 3 0 0 0 0 0 2282
## 165 50 1 18 110 2 1 1 0 0 0 2296
## 166 51 1 20 121 1 1 1 0 1 0 2296
## 167 52 1 21 100 3 0 1 0 0 4 2301
## 168 54 1 26 96 3 0 0 0 0 0 2325
## 169 56 1 31 102 1 1 1 0 0 1 2353
## 170 57 1 15 110 1 0 0 0 0 0 2353
## 171 59 1 23 187 2 1 0 0 0 1 2367
## 172 60 1 20 122 2 1 0 0 0 0 2381
## 173 61 1 24 105 2 1 0 0 0 0 2381
## 174 62 1 15 115 3 0 0 0 1 0 2381
## 175 63 1 23 120 3 0 0 0 0 0 2410
## 176 65 1 30 142 1 1 1 0 0 0 2410
## 177 67 1 22 130 1 1 0 0 0 1 2410
## 178 68 1 17 120 1 1 0 0 0 3 2414
## 179 69 1 23 110 1 1 1 0 0 0 2424
## 180 71 1 17 120 2 0 0 0 0 2 2438
## 181 75 1 26 154 3 0 1 1 0 1 2442
## 182 76 1 20 105 3 0 0 0 0 3 2450
## 183 77 1 26 190 1 1 0 0 0 0 2466
## 184 78 1 14 101 3 1 1 0 0 0 2466
## 185 79 1 28 95 1 1 0 0 0 2 2466
## 186 81 1 14 100 3 0 0 0 0 2 2495
## 187 82 1 23 94 3 1 0 0 0 0 2495
## 188 83 1 17 142 2 0 0 1 0 0 2495
## 189 84 1 21 130 1 1 0 1 0 3 2495
# Đọc dữ liệu
bw <- read.csv("D:\\HOC TAP\\TAP HUAN UNG DUNG AI TRONG PT DU LIEU SU DUNG R\\thuc hanh\\birthwt.csv")
#2. Mã hóa biến race thành ethnicity
bw$ethnicity <- factor(bw$race,
levels = c(1, 2, 3),
labels = c("White", "Black", "Others"))
#3. Mã hóa biến smoke thành smoking
bw$smoking <- factor(bw$smoke,
levels = c(0, 1),
labels = c("No", "Yes"))
#4. Mã hóa biến low thành low.bw
bw$low.bw <- factor(bw$low,
levels = c(0, 1),
labels = c("Normal", "Low BW"))
#5. Tạo biến mới mwt là cân nặng mẹ chuyển sang kg (lwt * 0.45)
bw$mwt <- bw$lwt * 0.45
#6. Dùng table1 để mô tả các biến theo low.bw
library(table1)
## Warning: package 'table1' was built under R version 4.4.3
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
# Cài gói Hmisc nếu chưa có
if (!require(Hmisc)) install.packages("Hmisc")
## Loading required package: Hmisc
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:table1':
##
## label, label<-, units
## The following objects are masked from 'package:base':
##
## format.pval, units
library(Hmisc)
# Gán nhãn mô tả cho biến
label(bw$age) <- "Age of Mother"
label(bw$ethnicity) <- "Ethnicity"
label(bw$smoking) <- "Smoking Status"
label(bw$mwt) <- "Mother's Weight (kg)"
label(bw$bwt) <- "Birth Weight"
# Tạo bảng
table1(~ age + ethnicity + smoking + mwt + bwt | low.bw, data = bw)
| Normal (N=130) |
Low BW (N=59) |
Overall (N=189) |
|
|---|---|---|---|
| Age of Mother | |||
| Mean (SD) | 23.7 (5.58) | 22.3 (4.51) | 23.2 (5.30) |
| Median [Min, Max] | 23.0 [14.0, 45.0] | 22.0 [14.0, 34.0] | 23.0 [14.0, 45.0] |
| Ethnicity | |||
| White | 73 (56.2%) | 23 (39.0%) | 96 (50.8%) |
| Black | 15 (11.5%) | 11 (18.6%) | 26 (13.8%) |
| Others | 42 (32.3%) | 25 (42.4%) | 67 (35.4%) |
| Smoking Status | |||
| No | 86 (66.2%) | 29 (49.2%) | 115 (60.8%) |
| Yes | 44 (33.8%) | 30 (50.8%) | 74 (39.2%) |
| Mother's Weight (kg) | |||
| Mean (SD) | 60.0 (14.3) | 55.0 (12.0) | 58.4 (13.8) |
| Median [Min, Max] | 55.6 [38.3, 113] | 54.0 [36.0, 90.0] | 54.5 [36.0, 113] |
| Birth Weight | |||
| Mean (SD) | 3330 (478) | 2100 (391) | 2940 (729) |
| Median [Min, Max] | 3270 [2520, 4990] | 2210 [709, 2500] | 2980 [709, 4990] |
#7. Dùng lessR để vẽ histogram cho bwt
if (!require(lessR)) install.packages("lessR")
## Loading required package: lessR
## Warning: package 'lessR' was built under R version 4.4.3
##
## lessR 4.4.3 feedback: gerbing@pdx.edu
## --------------------------------------------------------------
## > d <- Read("") Read data file, many formats available, e.g., Excel
## d is default data frame, data= in analysis routines optional
##
## Many examples of reading, writing, and manipulating data,
## graphics, testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation from pivot tables
## Enter: browseVignettes("lessR")
##
## View lessR updates, now including time series forecasting
## Enter: news(package="lessR")
##
## Interactive data analysis
## Enter: interact()
##
## Attaching package: 'lessR'
## The following objects are masked from 'package:Hmisc':
##
## label, Merge
## The following object is masked from 'package:table1':
##
## label
## The following object is masked from 'package:base':
##
## sort_by
library(lessR)
# Histogram biến cân nặng trẻ sơ sinh
Histogram(bwt, data = bw, xlab="Birth Weight (grams)", main="Histogram of Birth Weight")
## >>> Suggestions
## bin_width: set the width of each bin
## bin_start: set the start of the first bin
## bin_end: set the end of the last bin
## Histogram(bwt, density=TRUE) # smoothed curve + histogram
## Plot(bwt) # Violin/Box/Scatterplot (VBS) plot
##
## --- bwt ---
##
## n miss mean sd min mdn max
## 189 0 2944.59 729.21 709.00 2977.00 4990.00
##
##
##
## --- Outliers --- from the box plot: 1
##
## Small Large
## ----- -----
## 709.0
##
##
## Bin Width: 500
## Number of Bins: 9
##
## Bin Midpnt Count Prop Cumul.c Cumul.p
## -----------------------------------------------------
## 500 > 1000 750 1 0.01 1 0.01
## 1000 > 1500 1250 4 0.02 5 0.03
## 1500 > 2000 1750 14 0.07 19 0.10
## 2000 > 2500 2250 40 0.21 59 0.31
## 2500 > 3000 2750 38 0.20 97 0.51
## 3000 > 3500 3250 45 0.24 142 0.75
## 3500 > 4000 3750 38 0.20 180 0.95
## 4000 > 4500 4250 7 0.04 187 0.99
## 4500 > 5000 4750 2 0.01 189 1.00
##
#8. Vẽ biểu đồ phân bố (barplot) cho ethnicity
BarChart(ethnicity, data = bw, main="Distribution of Ethnicity", xlab="Ethnicity")
## >>> Suggestions
## BarChart(ethnicity, horiz=TRUE) # horizontal bar chart
## BarChart(ethnicity, fill="reds") # red bars of varying lightness
## PieChart(ethnicity) # doughnut (ring) chart
## Plot(ethnicity) # bubble plot
## Plot(ethnicity, stat="count") # lollipop plot
##
## --- ethnicity ---
##
## Missing Values: 0
##
## White Black Others Total
## Frequencies: 96 26 67 189
## Proportions: 0.508 0.138 0.354 1.000
##
## Chi-squared test of null hypothesis of equal probabilities
## Chisq = 39.270, df = 2, p-value = 0.000
#9. Vẽ biểu đồ tương quan giữa mwt và bwt
ScatterPlot(mwt, bwt, data=bw,
xlab="Mother's Weight (kg)",
ylab="Birth Weight (grams)",
main="Scatterplot of Mother's Weight vs Birth Weight")
##
## >>> Suggestions or enter: style(suggest=FALSE)
## Plot(mwt, bwt, enhance=TRUE) # many options
## Plot(mwt, bwt, fill="skyblue") # interior fill color of points
## Plot(mwt, bwt, fit="lm", fit_se=c(.90,.99)) # fit line, stnd errors
## Plot(mwt, bwt, MD_cut=6) # Mahalanobis distance from center > 6 is an outlier
##
##
## >>> Pearson's product-moment correlation
##
## Number of paired values with neither missing, n = 189
## Sample Correlation of mwt and bwt: r = 0.186
##
## Hypothesis Test of 0 Correlation: t = 2.585, df = 187, p-value = 0.011
## 95% Confidence Interval for Correlation: 0.044 to 0.320
##