#1. Đọc dữ liệu vào R và đặt tên là bw

file.choose()
## [1] "D:\\HOC TAP\\TAP HUAN UNG DUNG AI TRONG PT DU LIEU SU DUNG R\\THUC HANH TAI LOP\\hocRngay2\\bt1Chatgpt.Rmd"
read.csv("D:\\HOC TAP\\TAP HUAN UNG DUNG AI TRONG PT DU LIEU SU DUNG R\\thuc hanh\\birthwt.csv")
##      id low age lwt race smoke ptl ht ui ftv  bwt
## 1    85   0  19 182    2     0   0  0  1   0 2523
## 2    86   0  33 155    3     0   0  0  0   3 2551
## 3    87   0  20 105    1     1   0  0  0   1 2557
## 4    88   0  21 108    1     1   0  0  1   2 2594
## 5    89   0  18 107    1     1   0  0  1   0 2600
## 6    91   0  21 124    3     0   0  0  0   0 2622
## 7    92   0  22 118    1     0   0  0  0   1 2637
## 8    93   0  17 103    3     0   0  0  0   1 2637
## 9    94   0  29 123    1     1   0  0  0   1 2663
## 10   95   0  26 113    1     1   0  0  0   0 2665
## 11   96   0  19  95    3     0   0  0  0   0 2722
## 12   97   0  19 150    3     0   0  0  0   1 2733
## 13   98   0  22  95    3     0   0  1  0   0 2751
## 14   99   0  30 107    3     0   1  0  1   2 2750
## 15  100   0  18 100    1     1   0  0  0   0 2769
## 16  101   0  18 100    1     1   0  0  0   0 2769
## 17  102   0  15  98    2     0   0  0  0   0 2778
## 18  103   0  25 118    1     1   0  0  0   3 2782
## 19  104   0  20 120    3     0   0  0  1   0 2807
## 20  105   0  28 120    1     1   0  0  0   1 2821
## 21  106   0  32 121    3     0   0  0  0   2 2835
## 22  107   0  31 100    1     0   0  0  1   3 2835
## 23  108   0  36 202    1     0   0  0  0   1 2836
## 24  109   0  28 120    3     0   0  0  0   0 2863
## 25  111   0  25 120    3     0   0  0  1   2 2877
## 26  112   0  28 167    1     0   0  0  0   0 2877
## 27  113   0  17 122    1     1   0  0  0   0 2906
## 28  114   0  29 150    1     0   0  0  0   2 2920
## 29  115   0  26 168    2     1   0  0  0   0 2920
## 30  116   0  17 113    2     0   0  0  0   1 2920
## 31  117   0  17 113    2     0   0  0  0   1 2920
## 32  118   0  24  90    1     1   1  0  0   1 2948
## 33  119   0  35 121    2     1   1  0  0   1 2948
## 34  120   0  25 155    1     0   0  0  0   1 2977
## 35  121   0  25 125    2     0   0  0  0   0 2977
## 36  123   0  29 140    1     1   0  0  0   2 2977
## 37  124   0  19 138    1     1   0  0  0   2 2977
## 38  125   0  27 124    1     1   0  0  0   0 2922
## 39  126   0  31 215    1     1   0  0  0   2 3005
## 40  127   0  33 109    1     1   0  0  0   1 3033
## 41  128   0  21 185    2     1   0  0  0   2 3042
## 42  129   0  19 189    1     0   0  0  0   2 3062
## 43  130   0  23 130    2     0   0  0  0   1 3062
## 44  131   0  21 160    1     0   0  0  0   0 3062
## 45  132   0  18  90    1     1   0  0  1   0 3062
## 46  133   0  18  90    1     1   0  0  1   0 3062
## 47  134   0  32 132    1     0   0  0  0   4 3080
## 48  135   0  19 132    3     0   0  0  0   0 3090
## 49  136   0  24 115    1     0   0  0  0   2 3090
## 50  137   0  22  85    3     1   0  0  0   0 3090
## 51  138   0  22 120    1     0   0  1  0   1 3100
## 52  139   0  23 128    3     0   0  0  0   0 3104
## 53  140   0  22 130    1     1   0  0  0   0 3132
## 54  141   0  30  95    1     1   0  0  0   2 3147
## 55  142   0  19 115    3     0   0  0  0   0 3175
## 56  143   0  16 110    3     0   0  0  0   0 3175
## 57  144   0  21 110    3     1   0  0  1   0 3203
## 58  145   0  30 153    3     0   0  0  0   0 3203
## 59  146   0  20 103    3     0   0  0  0   0 3203
## 60  147   0  17 119    3     0   0  0  0   0 3225
## 61  148   0  17 119    3     0   0  0  0   0 3225
## 62  149   0  23 119    3     0   0  0  0   2 3232
## 63  150   0  24 110    3     0   0  0  0   0 3232
## 64  151   0  28 140    1     0   0  0  0   0 3234
## 65  154   0  26 133    3     1   2  0  0   0 3260
## 66  155   0  20 169    3     0   1  0  1   1 3274
## 67  156   0  24 115    3     0   0  0  0   2 3274
## 68  159   0  28 250    3     1   0  0  0   6 3303
## 69  160   0  20 141    1     0   2  0  1   1 3317
## 70  161   0  22 158    2     0   1  0  0   2 3317
## 71  162   0  22 112    1     1   2  0  0   0 3317
## 72  163   0  31 150    3     1   0  0  0   2 3321
## 73  164   0  23 115    3     1   0  0  0   1 3331
## 74  166   0  16 112    2     0   0  0  0   0 3374
## 75  167   0  16 135    1     1   0  0  0   0 3374
## 76  168   0  18 229    2     0   0  0  0   0 3402
## 77  169   0  25 140    1     0   0  0  0   1 3416
## 78  170   0  32 134    1     1   1  0  0   4 3430
## 79  172   0  20 121    2     1   0  0  0   0 3444
## 80  173   0  23 190    1     0   0  0  0   0 3459
## 81  174   0  22 131    1     0   0  0  0   1 3460
## 82  175   0  32 170    1     0   0  0  0   0 3473
## 83  176   0  30 110    3     0   0  0  0   0 3544
## 84  177   0  20 127    3     0   0  0  0   0 3487
## 85  179   0  23 123    3     0   0  0  0   0 3544
## 86  180   0  17 120    3     1   0  0  0   0 3572
## 87  181   0  19 105    3     0   0  0  0   0 3572
## 88  182   0  23 130    1     0   0  0  0   0 3586
## 89  183   0  36 175    1     0   0  0  0   0 3600
## 90  184   0  22 125    1     0   0  0  0   1 3614
## 91  185   0  24 133    1     0   0  0  0   0 3614
## 92  186   0  21 134    3     0   0  0  0   2 3629
## 93  187   0  19 235    1     1   0  1  0   0 3629
## 94  188   0  25  95    1     1   3  0  1   0 3637
## 95  189   0  16 135    1     1   0  0  0   0 3643
## 96  190   0  29 135    1     0   0  0  0   1 3651
## 97  191   0  29 154    1     0   0  0  0   1 3651
## 98  192   0  19 147    1     1   0  0  0   0 3651
## 99  193   0  19 147    1     1   0  0  0   0 3651
## 100 195   0  30 137    1     0   0  0  0   1 3699
## 101 196   0  24 110    1     0   0  0  0   1 3728
## 102 197   0  19 184    1     1   0  1  0   0 3756
## 103 199   0  24 110    3     0   1  0  0   0 3770
## 104 200   0  23 110    1     0   0  0  0   1 3770
## 105 201   0  20 120    3     0   0  0  0   0 3770
## 106 202   0  25 241    2     0   0  1  0   0 3790
## 107 203   0  30 112    1     0   0  0  0   1 3799
## 108 204   0  22 169    1     0   0  0  0   0 3827
## 109 205   0  18 120    1     1   0  0  0   2 3856
## 110 206   0  16 170    2     0   0  0  0   4 3860
## 111 207   0  32 186    1     0   0  0  0   2 3860
## 112 208   0  18 120    3     0   0  0  0   1 3884
## 113 209   0  29 130    1     1   0  0  0   2 3884
## 114 210   0  33 117    1     0   0  0  1   1 3912
## 115 211   0  20 170    1     1   0  0  0   0 3940
## 116 212   0  28 134    3     0   0  0  0   1 3941
## 117 213   0  14 135    1     0   0  0  0   0 3941
## 118 214   0  28 130    3     0   0  0  0   0 3969
## 119 215   0  25 120    1     0   0  0  0   2 3983
## 120 216   0  16  95    3     0   0  0  0   1 3997
## 121 217   0  20 158    1     0   0  0  0   1 3997
## 122 218   0  26 160    3     0   0  0  0   0 4054
## 123 219   0  21 115    1     0   0  0  0   1 4054
## 124 220   0  22 129    1     0   0  0  0   0 4111
## 125 221   0  25 130    1     0   0  0  0   2 4153
## 126 222   0  31 120    1     0   0  0  0   2 4167
## 127 223   0  35 170    1     0   1  0  0   1 4174
## 128 224   0  19 120    1     1   0  0  0   0 4238
## 129 225   0  24 116    1     0   0  0  0   1 4593
## 130 226   0  45 123    1     0   0  0  0   1 4990
## 131   4   1  28 120    3     1   1  0  1   0  709
## 132  10   1  29 130    1     0   0  0  1   2 1021
## 133  11   1  34 187    2     1   0  1  0   0 1135
## 134  13   1  25 105    3     0   1  1  0   0 1330
## 135  15   1  25  85    3     0   0  0  1   0 1474
## 136  16   1  27 150    3     0   0  0  0   0 1588
## 137  17   1  23  97    3     0   0  0  1   1 1588
## 138  18   1  24 128    2     0   1  0  0   1 1701
## 139  19   1  24 132    3     0   0  1  0   0 1729
## 140  20   1  21 165    1     1   0  1  0   1 1790
## 141  22   1  32 105    1     1   0  0  0   0 1818
## 142  23   1  19  91    1     1   2  0  1   0 1885
## 143  24   1  25 115    3     0   0  0  0   0 1893
## 144  25   1  16 130    3     0   0  0  0   1 1899
## 145  26   1  25  92    1     1   0  0  0   0 1928
## 146  27   1  20 150    1     1   0  0  0   2 1928
## 147  28   1  21 200    2     0   0  0  1   2 1928
## 148  29   1  24 155    1     1   1  0  0   0 1936
## 149  30   1  21 103    3     0   0  0  0   0 1970
## 150  31   1  20 125    3     0   0  0  1   0 2055
## 151  32   1  25  89    3     0   2  0  0   1 2055
## 152  33   1  19 102    1     0   0  0  0   2 2082
## 153  34   1  19 112    1     1   0  0  1   0 2084
## 154  35   1  26 117    1     1   1  0  0   0 2084
## 155  36   1  24 138    1     0   0  0  0   0 2100
## 156  37   1  17 130    3     1   1  0  1   0 2125
## 157  40   1  20 120    2     1   0  0  0   3 2126
## 158  42   1  22 130    1     1   1  0  1   1 2187
## 159  43   1  27 130    2     0   0  0  1   0 2187
## 160  44   1  20  80    3     1   0  0  1   0 2211
## 161  45   1  17 110    1     1   0  0  0   0 2225
## 162  46   1  25 105    3     0   1  0  0   1 2240
## 163  47   1  20 109    3     0   0  0  0   0 2240
## 164  49   1  18 148    3     0   0  0  0   0 2282
## 165  50   1  18 110    2     1   1  0  0   0 2296
## 166  51   1  20 121    1     1   1  0  1   0 2296
## 167  52   1  21 100    3     0   1  0  0   4 2301
## 168  54   1  26  96    3     0   0  0  0   0 2325
## 169  56   1  31 102    1     1   1  0  0   1 2353
## 170  57   1  15 110    1     0   0  0  0   0 2353
## 171  59   1  23 187    2     1   0  0  0   1 2367
## 172  60   1  20 122    2     1   0  0  0   0 2381
## 173  61   1  24 105    2     1   0  0  0   0 2381
## 174  62   1  15 115    3     0   0  0  1   0 2381
## 175  63   1  23 120    3     0   0  0  0   0 2410
## 176  65   1  30 142    1     1   1  0  0   0 2410
## 177  67   1  22 130    1     1   0  0  0   1 2410
## 178  68   1  17 120    1     1   0  0  0   3 2414
## 179  69   1  23 110    1     1   1  0  0   0 2424
## 180  71   1  17 120    2     0   0  0  0   2 2438
## 181  75   1  26 154    3     0   1  1  0   1 2442
## 182  76   1  20 105    3     0   0  0  0   3 2450
## 183  77   1  26 190    1     1   0  0  0   0 2466
## 184  78   1  14 101    3     1   1  0  0   0 2466
## 185  79   1  28  95    1     1   0  0  0   2 2466
## 186  81   1  14 100    3     0   0  0  0   2 2495
## 187  82   1  23  94    3     1   0  0  0   0 2495
## 188  83   1  17 142    2     0   0  1  0   0 2495
## 189  84   1  21 130    1     1   0  1  0   3 2495
# Đọc dữ liệu
bw <- read.csv("D:\\HOC TAP\\TAP HUAN UNG DUNG AI TRONG PT DU LIEU SU DUNG R\\thuc hanh\\birthwt.csv")

#2. Mã hóa biến race thành ethnicity

bw$ethnicity <- factor(bw$race,
                       levels = c(1, 2, 3),
                       labels = c("White", "Black", "Others"))

#3. Mã hóa biến smoke thành smoking

bw$smoking <- factor(bw$smoke,
                     levels = c(0, 1),
                     labels = c("No", "Yes"))

#4. Mã hóa biến low thành low.bw

bw$low.bw <- factor(bw$low,
                    levels = c(0, 1),
                    labels = c("Normal", "Low BW"))

#5. Tạo biến mới mwt là cân nặng mẹ chuyển sang kg (lwt * 0.45)

bw$mwt <- bw$lwt * 0.45

#6. Dùng table1 để mô tả các biến theo low.bw

library(table1)
## Warning: package 'table1' was built under R version 4.4.3
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
# Cài gói Hmisc nếu chưa có
if (!require(Hmisc)) install.packages("Hmisc")
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:table1':
## 
##     label, label<-, units
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(Hmisc)

# Gán nhãn mô tả cho biến
label(bw$age) <- "Age of Mother"
label(bw$ethnicity) <- "Ethnicity"
label(bw$smoking) <- "Smoking Status"
label(bw$mwt) <- "Mother's Weight (kg)"
label(bw$bwt) <- "Birth Weight"

# Tạo bảng
table1(~ age + ethnicity + smoking + mwt + bwt | low.bw, data = bw)
Normal
(N=130)
Low BW
(N=59)
Overall
(N=189)
Age of Mother
Mean (SD) 23.7 (5.58) 22.3 (4.51) 23.2 (5.30)
Median [Min, Max] 23.0 [14.0, 45.0] 22.0 [14.0, 34.0] 23.0 [14.0, 45.0]
Ethnicity
White 73 (56.2%) 23 (39.0%) 96 (50.8%)
Black 15 (11.5%) 11 (18.6%) 26 (13.8%)
Others 42 (32.3%) 25 (42.4%) 67 (35.4%)
Smoking Status
No 86 (66.2%) 29 (49.2%) 115 (60.8%)
Yes 44 (33.8%) 30 (50.8%) 74 (39.2%)
Mother's Weight (kg)
Mean (SD) 60.0 (14.3) 55.0 (12.0) 58.4 (13.8)
Median [Min, Max] 55.6 [38.3, 113] 54.0 [36.0, 90.0] 54.5 [36.0, 113]
Birth Weight
Mean (SD) 3330 (478) 2100 (391) 2940 (729)
Median [Min, Max] 3270 [2520, 4990] 2210 [709, 2500] 2980 [709, 4990]

#7. Dùng lessR để vẽ histogram cho bwt

if (!require(lessR)) install.packages("lessR")
## Loading required package: lessR
## Warning: package 'lessR' was built under R version 4.4.3
## 
## lessR 4.4.3                         feedback: gerbing@pdx.edu 
## --------------------------------------------------------------
## > d <- Read("")  Read data file, many formats available, e.g., Excel
##   d is default data frame, data= in analysis routines optional
## 
## Many examples of reading, writing, and manipulating data, 
## graphics, testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation from pivot tables
##   Enter: browseVignettes("lessR")
## 
## View lessR updates, now including time series forecasting
##   Enter: news(package="lessR")
## 
## Interactive data analysis
##   Enter: interact()
## 
## Attaching package: 'lessR'
## The following objects are masked from 'package:Hmisc':
## 
##     label, Merge
## The following object is masked from 'package:table1':
## 
##     label
## The following object is masked from 'package:base':
## 
##     sort_by
library(lessR)

# Histogram biến cân nặng trẻ sơ sinh

Histogram(bwt, data = bw, xlab="Birth Weight (grams)", main="Histogram of Birth Weight")

## >>> Suggestions 
## bin_width: set the width of each bin 
## bin_start: set the start of the first bin 
## bin_end: set the end of the last bin 
## Histogram(bwt, density=TRUE)  # smoothed curve + histogram 
## Plot(bwt)  # Violin/Box/Scatterplot (VBS) plot 
## 
## --- bwt --- 
##  
##       n   miss       mean         sd        min        mdn        max 
##      189      0    2944.59     729.21     709.00    2977.00    4990.00 
##  
## 
##   
## --- Outliers ---     from the box plot: 1 
##  
## Small        Large 
## -----        ----- 
##  709.0            
## 
## 
## Bin Width: 500 
## Number of Bins: 9 
##  
##          Bin  Midpnt  Count    Prop  Cumul.c  Cumul.p 
## ----------------------------------------------------- 
##   500 > 1000     750      1    0.01        1     0.01 
##  1000 > 1500    1250      4    0.02        5     0.03 
##  1500 > 2000    1750     14    0.07       19     0.10 
##  2000 > 2500    2250     40    0.21       59     0.31 
##  2500 > 3000    2750     38    0.20       97     0.51 
##  3000 > 3500    3250     45    0.24      142     0.75 
##  3500 > 4000    3750     38    0.20      180     0.95 
##  4000 > 4500    4250      7    0.04      187     0.99 
##  4500 > 5000    4750      2    0.01      189     1.00 
## 

#8. Vẽ biểu đồ phân bố (barplot) cho ethnicity

BarChart(ethnicity, data = bw,  main="Distribution of Ethnicity", xlab="Ethnicity")

## >>> Suggestions
## BarChart(ethnicity, horiz=TRUE)  # horizontal bar chart
## BarChart(ethnicity, fill="reds")  # red bars of varying lightness
## PieChart(ethnicity)  # doughnut (ring) chart
## Plot(ethnicity)  # bubble plot
## Plot(ethnicity, stat="count")  # lollipop plot 
## 
## --- ethnicity --- 
## 
## Missing Values: 0 
## 
##                White  Black  Others     Total 
## Frequencies:      96     26      67       189 
## Proportions:   0.508  0.138   0.354     1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 39.270, df = 2, p-value = 0.000

#9. Vẽ biểu đồ tương quan giữa mwt và bwt

ScatterPlot(mwt, bwt, data=bw,
            xlab="Mother's Weight (kg)",
            ylab="Birth Weight (grams)",
            main="Scatterplot of Mother's Weight vs Birth Weight")

## 
## >>> Suggestions  or  enter: style(suggest=FALSE)
## Plot(mwt, bwt, enhance=TRUE)  # many options
## Plot(mwt, bwt, fill="skyblue")  # interior fill color of points
## Plot(mwt, bwt, fit="lm", fit_se=c(.90,.99))  # fit line, stnd errors
## Plot(mwt, bwt, MD_cut=6)  # Mahalanobis distance from center > 6 is an outlier 
## 
## 
## >>> Pearson's product-moment correlation 
##  
## Number of paired values with neither missing, n = 189 
## Sample Correlation of mwt and bwt: r = 0.186 
##   
## Hypothesis Test of 0 Correlation:  t = 2.585,  df = 187,  p-value = 0.011 
## 95% Confidence Interval for Correlation:  0.044 to 0.320 
##