Khóa học sử dụng R - Ngày 1

Việc 3: đọc dữ liệu

Yêu cầu đọc dữ liệu birthwr.csv vào R

bw = read.csv("C:/Users/Pham Hong Chau Oanh/Downloads/DỮ LIỆU THỰC HÀNH (TS Thạch gửi)/birthwt.csv")

prompt chat GPT

bw2 = read.csv("C:/Users/Pham Hong Chau Oanh/Downloads/DỮ LIỆU THỰC HÀNH (TS Thạch gửi)/birthwt.csv", header = TRUE)

Việc 4: Thông tin

Biến số và quan sát

dim(bw)
## [1] 189  11

Liệt kê quan sát

head(bw, 5)
##   id low age lwt race smoke ptl ht ui ftv  bwt
## 1 85   0  19 182    2     0   0  0  1   0 2523
## 2 86   0  33 155    3     0   0  0  0   3 2551
## 3 87   0  20 105    1     1   0  0  0   1 2557
## 4 88   0  21 108    1     1   0  0  1   2 2594
## 5 89   0  18 107    1     1   0  0  1   0 2600

Prompt Chat GPT

tail(bw)
##     id low age lwt race smoke ptl ht ui ftv  bwt
## 184 78   1  14 101    3     1   1  0  0   0 2466
## 185 79   1  28  95    1     1   0  0  0   2 2466
## 186 81   1  14 100    3     0   0  0  0   2 2495
## 187 82   1  23  94    3     1   0  0  0   0 2495
## 188 83   1  17 142    2     0   0  1  0   0 2495
## 189 84   1  21 130    1     1   0  1  0   3 2495

Việc 5: Biên tập dữ liệu

Tạo biến mwt

bw$mwt = bw$lwt * 0.45

Biến số ethnicity

bw$ethnicity [bw$race==1]="White"
bw$ethnicity [bw$race==2]="Black"
bw$ethnicity [bw$race==3]="Other"

Dữ liệu bw1

bw1 = bw[, c('id',"low","bwt")]
dim(bw1)
## [1] 189   3

Tập bw3

bw3 = subset(bw, low==1)
dim(bw3)
## [1] 59 13

Tập bw4

bw4 = subset(bw, low==1 & smoke==1)
dim(bw4)
## [1] 30 13

Prompt Chat GPT

bw5 = subset(bw2, low == 1 & smoke == 1)
dim(bw5)
## [1] 30 11

Việc 6: Gói lệnh LessR

Histogram

library(lessR)
## 
## lessR 4.4.3                         feedback: gerbing@pdx.edu 
## --------------------------------------------------------------
## > d <- Read("")  Read data file, many formats available, e.g., Excel
##   d is default data frame, data= in analysis routines optional
## 
## Many examples of reading, writing, and manipulating data, 
## graphics, testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation from pivot tables
##   Enter: browseVignettes("lessR")
## 
## View lessR updates, now including time series forecasting
##   Enter: news(package="lessR")
## 
## Interactive data analysis
##   Enter: interact()
## 
## Attaching package: 'lessR'
## The following object is masked from 'package:base':
## 
##     sort_by
Histogram(bwt, data = bw)

## >>> Suggestions 
## bin_width: set the width of each bin 
## bin_start: set the start of the first bin 
## bin_end: set the end of the last bin 
## Histogram(bwt, density=TRUE)  # smoothed curve + histogram 
## Plot(bwt)  # Violin/Box/Scatterplot (VBS) plot 
## 
## --- bwt --- 
##  
##       n   miss       mean         sd        min        mdn        max 
##      189      0    2944.59     729.21     709.00    2977.00    4990.00 
##  
## 
##   
## --- Outliers ---     from the box plot: 1 
##  
## Small        Large 
## -----        ----- 
##  709.0            
## 
## 
## Bin Width: 500 
## Number of Bins: 9 
##  
##          Bin  Midpnt  Count    Prop  Cumul.c  Cumul.p 
## ----------------------------------------------------- 
##   500 > 1000     750      1    0.01        1     0.01 
##  1000 > 1500    1250      4    0.02        5     0.03 
##  1500 > 2000    1750     14    0.07       19     0.10 
##  2000 > 2500    2250     40    0.21       59     0.31 
##  2500 > 3000    2750     38    0.20       97     0.51 
##  3000 > 3500    3250     45    0.24      142     0.75 
##  3500 > 4000    3750     38    0.20      180     0.95 
##  4000 > 4500    4250      7    0.04      187     0.99 
##  4500 > 5000    4750      2    0.01      189     1.00 
##