R Markdown

Read in data

## Read in data
birth=read.csv("C:\\Users\\Luke Do\\Dropbox\\PC\\Downloads\\DỮ LIỆU ĐÍNH KÈM BÀI TẬP\\birthwt.csv", header=T)
birth <- na.omit(birth)

## Show classification of variables
str(birth)
## 'data.frame':    189 obs. of  11 variables:
##  $ id   : int  85 86 87 88 89 91 92 93 94 95 ...
##  $ low  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ age  : int  19 33 20 21 18 21 22 17 29 26 ...
##  $ lwt  : int  182 155 105 108 107 124 118 103 123 113 ...
##  $ race : int  2 3 1 1 1 3 1 3 1 1 ...
##  $ smoke: int  0 0 1 1 1 0 0 0 1 1 ...
##  $ ptl  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ ht   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ ui   : int  1 0 0 1 1 0 0 0 0 0 ...
##  $ ftv  : int  0 3 1 2 0 0 1 1 1 0 ...
##  $ bwt  : int  2523 2551 2557 2594 2600 2622 2637 2637 2663 2665 ...
head(birth)
##   id low age lwt race smoke ptl ht ui ftv  bwt
## 1 85   0  19 182    2     0   0  0  1   0 2523
## 2 86   0  33 155    3     0   0  0  0   3 2551
## 3 87   0  20 105    1     1   0  0  0   1 2557
## 4 88   0  21 108    1     1   0  0  1   2 2594
## 5 89   0  18 107    1     1   0  0  1   0 2600
## 6 91   0  21 124    3     0   0  0  0   0 2622
tail(birth)
##     id low age lwt race smoke ptl ht ui ftv  bwt
## 184 78   1  14 101    3     1   1  0  0   0 2466
## 185 79   1  28  95    1     1   0  0  0   2 2466
## 186 81   1  14 100    3     0   0  0  0   2 2495
## 187 82   1  23  94    3     1   0  0  0   0 2495
## 188 83   1  17 142    2     0   0  1  0   0 2495
## 189 84   1  21 130    1     1   0  1  0   3 2495
birth$mwt = birth$lwt * 0.453592 
head(birth)
##   id low age lwt race smoke ptl ht ui ftv  bwt      mwt
## 1 85   0  19 182    2     0   0  0  1   0 2523 82.55374
## 2 86   0  33 155    3     0   0  0  0   3 2551 70.30676
## 3 87   0  20 105    1     1   0  0  0   1 2557 47.62716
## 4 88   0  21 108    1     1   0  0  1   2 2594 48.98794
## 5 89   0  18 107    1     1   0  0  1   0 2600 48.53434
## 6 91   0  21 124    3     0   0  0  0   0 2622 56.24541
birth$ethnicity [birth$race==1] <- "White"
birth$ethnicity [birth$race==2] <- "Black"
birth$ethnicity [birth$race==3] <- "Other"
head(birth)
##   id low age lwt race smoke ptl ht ui ftv  bwt      mwt ethnicity
## 1 85   0  19 182    2     0   0  0  1   0 2523 82.55374     Black
## 2 86   0  33 155    3     0   0  0  0   3 2551 70.30676     Other
## 3 87   0  20 105    1     1   0  0  0   1 2557 47.62716     White
## 4 88   0  21 108    1     1   0  0  1   2 2594 48.98794     White
## 5 89   0  18 107    1     1   0  0  1   0 2600 48.53434     White
## 6 91   0  21 124    3     0   0  0  0   0 2622 56.24541     Other
birth$smoking = ifelse(birth$smoke==1, "Yes", "No")
head(birth)
##   id low age lwt race smoke ptl ht ui ftv  bwt      mwt ethnicity smoking
## 1 85   0  19 182    2     0   0  0  1   0 2523 82.55374     Black      No
## 2 86   0  33 155    3     0   0  0  0   3 2551 70.30676     Other      No
## 3 87   0  20 105    1     1   0  0  0   1 2557 47.62716     White     Yes
## 4 88   0  21 108    1     1   0  0  1   2 2594 48.98794     White     Yes
## 5 89   0  18 107    1     1   0  0  1   0 2600 48.53434     White     Yes
## 6 91   0  21 124    3     0   0  0  0   0 2622 56.24541     Other      No
birth1 = birth[, c("id", "low", "bwt")]
str(birth1)
## 'data.frame':    189 obs. of  3 variables:
##  $ id : int  85 86 87 88 89 91 92 93 94 95 ...
##  $ low: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ bwt: int  2523 2551 2557 2594 2600 2622 2637 2637 2663 2665 ...
birth2 = subset(birth, low==1)
str(birth2)
## 'data.frame':    59 obs. of  14 variables:
##  $ id       : int  4 10 11 13 15 16 17 18 19 20 ...
##  $ low      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ age      : int  28 29 34 25 25 27 23 24 24 21 ...
##  $ lwt      : int  120 130 187 105 85 150 97 128 132 165 ...
##  $ race     : int  3 1 2 3 3 3 3 2 3 1 ...
##  $ smoke    : int  1 0 1 0 0 0 0 0 0 1 ...
##  $ ptl      : int  1 0 0 1 0 0 0 1 0 0 ...
##  $ ht       : int  0 0 1 1 0 0 0 0 1 1 ...
##  $ ui       : int  1 1 0 0 1 0 1 0 0 0 ...
##  $ ftv      : int  0 2 0 0 0 0 1 1 0 1 ...
##  $ bwt      : int  709 1021 1135 1330 1474 1588 1588 1701 1729 1790 ...
##  $ mwt      : num  54.4 59 84.8 47.6 38.6 ...
##  $ ethnicity: chr  "Other" "White" "Black" "Other" ...
##  $ smoking  : chr  "Yes" "No" "Yes" "No" ...
birth3 = subset(birth, low==1 & smoke==1)
str(birth3)
## 'data.frame':    30 obs. of  14 variables:
##  $ id       : int  4 11 20 22 23 26 27 29 34 35 ...
##  $ low      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ age      : int  28 34 21 32 19 25 20 24 19 26 ...
##  $ lwt      : int  120 187 165 105 91 92 150 155 112 117 ...
##  $ race     : int  3 2 1 1 1 1 1 1 1 1 ...
##  $ smoke    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ ptl      : int  1 0 0 0 2 0 0 1 0 1 ...
##  $ ht       : int  0 1 1 0 0 0 0 0 0 0 ...
##  $ ui       : int  1 0 0 0 1 0 0 0 1 0 ...
##  $ ftv      : int  0 0 1 0 0 0 2 0 0 0 ...
##  $ bwt      : int  709 1135 1790 1818 1885 1928 1928 1936 2084 2084 ...
##  $ mwt      : num  54.4 84.8 74.8 47.6 41.3 ...
##  $ ethnicity: chr  "Other" "Black" "White" "White" ...
##  $ smoking  : chr  "Yes" "Yes" "Yes" "Yes" ...
library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
table1(~age + lwt + bwt, data=birth)
Overall
(N=189)
age
Mean (SD) 23.2 (5.30)
Median [Min, Max] 23.0 [14.0, 45.0]
lwt
Mean (SD) 130 (30.6)
Median [Min, Max] 121 [80.0, 250]
bwt
Mean (SD) 2940 (729)
Median [Min, Max] 2980 [709, 4990]
table1(~age + lwt + bwt | low, data=birth)
## Warning in table1.formula(~age + lwt + bwt | low, data = birth): Terms to the
## right of '|' in formula 'x' define table columns and are expected to be factors
## with meaningful labels.
0
(N=130)
1
(N=59)
Overall
(N=189)
age
Mean (SD) 23.7 (5.58) 22.3 (4.51) 23.2 (5.30)
Median [Min, Max] 23.0 [14.0, 45.0] 22.0 [14.0, 34.0] 23.0 [14.0, 45.0]
lwt
Mean (SD) 133 (31.7) 122 (26.6) 130 (30.6)
Median [Min, Max] 124 [85.0, 250] 120 [80.0, 200] 121 [80.0, 250]
bwt
Mean (SD) 3330 (478) 2100 (391) 2940 (729)
Median [Min, Max] 3270 [2520, 4990] 2210 [709, 2500] 2980 [709, 4990]
table1(~low + smoke + race, data=birth)
Overall
(N=189)
low
Mean (SD) 0.312 (0.465)
Median [Min, Max] 0 [0, 1.00]
smoke
Mean (SD) 0.392 (0.489)
Median [Min, Max] 0 [0, 1.00]
race
Mean (SD) 1.85 (0.918)
Median [Min, Max] 1.00 [1.00, 3.00]
library(lessR)
## 
## lessR 4.3.9                         feedback: gerbing@pdx.edu 
## --------------------------------------------------------------
## > d <- Read("")   Read text, Excel, SPSS, SAS, or R data file
##   d is default data frame, data= in analysis routines optional
## 
## Many examples of reading, writing, and manipulating data, 
## graphics, testing means and proportions, regression, factor analysis,
## customization, and descriptive statistics from pivot tables
##   Enter: browseVignettes("lessR")
## 
## View lessR updates, now including time series forecasting
##   Enter: news(package="lessR")
## 
## Interactive data analysis
##   Enter: interact()
## 
## Attaching package: 'lessR'
## The following object is masked from 'package:table1':
## 
##     label
## The following object is masked from 'package:base':
## 
##     sort_by
Histogram(bwt, fill="lightblue", xlab="Birthweight (g)", ylab="Frequency", data = birth)

## >>> Suggestions 
## bin_width: set the width of each bin 
## bin_start: set the start of the first bin 
## bin_end: set the end of the last bin 
## Histogram(bwt, density=TRUE)  # smoothed curve + histogram 
## Plot(bwt)  # Violin/Box/Scatterplot (VBS) plot 
## 
## --- bwt --- 
##  
##       n   miss       mean         sd        min        mdn        max 
##      189      0    2944.59     729.21     709.00    2977.00    4990.00 
## 
##   
## --- Outliers ---     from the box plot: 1 
##  
## Small        Large 
## -----        ----- 
##  709.0            
## 
## 
## Bin Width: 500 
## Number of Bins: 9 
##  
##          Bin  Midpnt  Count    Prop  Cumul.c  Cumul.p 
## ----------------------------------------------------- 
##   500 > 1000     750      1    0.01        1     0.01 
##  1000 > 1500    1250      4    0.02        5     0.03 
##  1500 > 2000    1750     14    0.07       19     0.10 
##  2000 > 2500    2250     40    0.21       59     0.31 
##  2500 > 3000    2750     38    0.20       97     0.51 
##  3000 > 3500    3250     45    0.24      142     0.75 
##  3500 > 4000    3750     38    0.20      180     0.95 
##  4000 > 4500    4250      7    0.04      187     0.99 
##  4500 > 5000    4750      2    0.01      189     1.00
BarChart(ethnicity, data=birth)

## >>> Suggestions
## BarChart(ethnicity, horiz=TRUE)  # horizontal bar chart
## BarChart(ethnicity, fill="reds")  # red bars of varying lightness
## PieChart(ethnicity)  # doughnut (ring) chart
## Plot(ethnicity)  # bubble plot
## Plot(ethnicity, stat="count")  # lollipop plot 
## 
## --- ethnicity --- 
## 
## Missing Values: 0 
## 
##                Black  Other  White     Total 
## Frequencies:      26     67     96       189 
## Proportions:   0.138  0.354  0.508     1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 39.270, df = 2, p-value = 0.000
plot(birth$lwt, birth$bwt)
abline(lm(birth$bwt ~ birth$lwt), col = "red")

plot(birth$lwt, birth$bwt, by=birth$ethnicity)
abline(lm(birth$bwt ~ birth$lwt), col = "red")