#prom cho chat gpt
#Tôi có 1 file về cân nặng của trẻ em tên là “birthwt.csv”
#Hãy viết R codes cho R Markdown để làm những thao tác sau đây:

#1.Đọc dữ liệu vào R và gọi tên là “bw”
#2.Coding biến race={1,2,3} thành ethnicity={“White”,”Black”,”Others”}
#3.Coding biến smoke={1,2} thành smoking={“Yes”,”No”}
#4.Coding biến low={0,1} thành low.bw={“Normal”,”Low BW”}
#5.Tạo ra biến mới mwt=lwt*0.45
#6.Dùng package table 1 để mô tả các biến số sau đây theo low.bw, age, ethnicity, smoking, mother.wt, bwt
#7.Dùng package less R để vẽ biểu đồ histogram cho biến số bwt
#8.Dùng package less R để vẽ biểu đồ phân bố cho biến số ethnicity
#9.Dùng package less R để vẽ biểu đồ tương quan giữa mwt và bwt
# 1. Đọc dữ liệu vào R
pathfile="D:\\CuDiHoc\\TaiLieuHoc\\ThucHanh\\Data\\birthwt.csv"
bw <- read.csv(pathfile)

# 2. Coding biến race {1, 2, 3} thành ethnicity {"White","Black","Others"}
bw$ethnicity <- factor(bw$race,
                         levels = c(1, 2, 3),
                         labels = c("White", "Black", "Others"))

# 3. Coding biến smoke {1, 2} thành smoking {"Yes","No"}
bw$smoking <- factor(bw$smoke,
                      levels = c(1, 0),
                      labels = c("Yes", "No"))

# 4. Coding biến low {0, 1} thành low.bw {"Normal","Low BW"}
bw$low.bw <- factor(bw$low,
                     levels = c(0, 1),
                     labels = c("Normal", "Low BW"))

# 5. Tạo biến mwt = lwt * 0.45
bw$mwt <- bw$lwt * 0.45

# 6. Dùng table1 để mô tả các biến theo low.bw
#install.packages("table1")
library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
head(bw)
##   id low age lwt race smoke ptl ht ui ftv  bwt ethnicity smoking low.bw   mwt
## 1 85   0  19 182    2     0   0  0  1   0 2523     Black      No Normal 81.90
## 2 86   0  33 155    3     0   0  0  0   3 2551    Others      No Normal 69.75
## 3 87   0  20 105    1     1   0  0  0   1 2557     White     Yes Normal 47.25
## 4 88   0  21 108    1     1   0  0  1   2 2594     White     Yes Normal 48.60
## 5 89   0  18 107    1     1   0  0  1   0 2600     White     Yes Normal 48.15
## 6 91   0  21 124    3     0   0  0  0   0 2622    Others      No Normal 55.80
table1(~ age + ethnicity + smoking + mwt + bwt | low.bw, data=bw)
Normal
(N=130)
Low BW
(N=59)
Overall
(N=189)
age
Mean (SD) 23.7 (5.58) 22.3 (4.51) 23.2 (5.30)
Median [Min, Max] 23.0 [14.0, 45.0] 22.0 [14.0, 34.0] 23.0 [14.0, 45.0]
ethnicity
White 73 (56.2%) 23 (39.0%) 96 (50.8%)
Black 15 (11.5%) 11 (18.6%) 26 (13.8%)
Others 42 (32.3%) 25 (42.4%) 67 (35.4%)
smoking
Yes 44 (33.8%) 30 (50.8%) 74 (39.2%)
No 86 (66.2%) 29 (49.2%) 115 (60.8%)
mwt
Mean (SD) 60.0 (14.3) 55.0 (12.0) 58.4 (13.8)
Median [Min, Max] 55.6 [38.3, 113] 54.0 [36.0, 90.0] 54.5 [36.0, 113]
bwt
Mean (SD) 3330 (478) 2100 (391) 2940 (729)
Median [Min, Max] 3270 [2520, 4990] 2210 [709, 2500] 2980 [709, 4990]
# 7, 8, 9. Sử dụng lessR để vẽ Histogram, BarChart, Plot
#install.packages("lessR")
library(lessR)
## 
## lessR 4.4.3                         feedback: gerbing@pdx.edu 
## --------------------------------------------------------------
## > d <- Read("")  Read data file, many formats available, e.g., Excel
##   d is default data frame, data= in analysis routines optional
## 
## Many examples of reading, writing, and manipulating data, 
## graphics, testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation from pivot tables
##   Enter: browseVignettes("lessR")
## 
## View lessR updates, now including time series forecasting
##   Enter: news(package="lessR")
## 
## Interactive data analysis
##   Enter: interact()
## 
## Attaching package: 'lessR'
## The following object is masked from 'package:table1':
## 
##     label
## The following object is masked from 'package:base':
## 
##     sort_by
# 7. Histogram bwt bằng lessR
Histogram(bwt, data=bw)

## >>> Suggestions 
## bin_width: set the width of each bin 
## bin_start: set the start of the first bin 
## bin_end: set the end of the last bin 
## Histogram(bwt, density=TRUE)  # smoothed curve + histogram 
## Plot(bwt)  # Violin/Box/Scatterplot (VBS) plot 
## 
## --- bwt --- 
##  
##       n   miss       mean         sd        min        mdn        max 
##      189      0    2944.59     729.21     709.00    2977.00    4990.00 
##  
## 
##   
## --- Outliers ---     from the box plot: 1 
##  
## Small        Large 
## -----        ----- 
##  709.0            
## 
## 
## Bin Width: 500 
## Number of Bins: 9 
##  
##          Bin  Midpnt  Count    Prop  Cumul.c  Cumul.p 
## ----------------------------------------------------- 
##   500 > 1000     750      1    0.01        1     0.01 
##  1000 > 1500    1250      4    0.02        5     0.03 
##  1500 > 2000    1750     14    0.07       19     0.10 
##  2000 > 2500    2250     40    0.21       59     0.31 
##  2500 > 3000    2750     38    0.20       97     0.51 
##  3000 > 3500    3250     45    0.24      142     0.75 
##  3500 > 4000    3750     38    0.20      180     0.95 
##  4000 > 4500    4250      7    0.04      187     0.99 
##  4500 > 5000    4750      2    0.01      189     1.00 
## 
# 8. Bar chart phân bố ethnicity bằng lessR
BarChart(ethnicity, data=bw)

## >>> Suggestions
## BarChart(ethnicity, horiz=TRUE)  # horizontal bar chart
## BarChart(ethnicity, fill="reds")  # red bars of varying lightness
## PieChart(ethnicity)  # doughnut (ring) chart
## Plot(ethnicity)  # bubble plot
## Plot(ethnicity, stat="count")  # lollipop plot 
## 
## --- ethnicity --- 
## 
## Missing Values: 0 
## 
##                White  Black  Others     Total 
## Frequencies:      96     26      67       189 
## Proportions:   0.508  0.138   0.354     1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 39.270, df = 2, p-value = 0.000
# 9. Scatterplot mwt và bwt bằng lessR
Plot(bwt, mwt, data=bw)

## 
## >>> Suggestions  or  enter: style(suggest=FALSE)
## Plot(bwt, mwt, enhance=TRUE)  # many options
## Plot(bwt, mwt, fill="skyblue")  # interior fill color of points
## Plot(bwt, mwt, fit="lm", fit_se=c(.90,.99))  # fit line, stnd errors
## Plot(bwt, mwt, MD_cut=6)  # Mahalanobis distance from center > 6 is an outlier 
## 
## 
## >>> Pearson's product-moment correlation 
##  
## Number of paired values with neither missing, n = 189 
## Sample Correlation of bwt and mwt: r = 0.186 
##   
## Hypothesis Test of 0 Correlation:  t = 2.585,  df = 187,  p-value = 0.011 
## 95% Confidence Interval for Correlation:  0.044 to 0.320 
##