Read in data
## Read in data
birth=read.csv("C:\\Users\\Luke Do\\Dropbox\\PC\\Downloads\\DỮ LIỆU ĐÍNH KÈM BÀI TẬP\\birthwt.csv", header=T)
birth <- na.omit(birth)
## Show classification of variables
str(birth)
## 'data.frame': 189 obs. of 11 variables:
## $ id : int 85 86 87 88 89 91 92 93 94 95 ...
## $ low : int 0 0 0 0 0 0 0 0 0 0 ...
## $ age : int 19 33 20 21 18 21 22 17 29 26 ...
## $ lwt : int 182 155 105 108 107 124 118 103 123 113 ...
## $ race : int 2 3 1 1 1 3 1 3 1 1 ...
## $ smoke: int 0 0 1 1 1 0 0 0 1 1 ...
## $ ptl : int 0 0 0 0 0 0 0 0 0 0 ...
## $ ht : int 0 0 0 0 0 0 0 0 0 0 ...
## $ ui : int 1 0 0 1 1 0 0 0 0 0 ...
## $ ftv : int 0 3 1 2 0 0 1 1 1 0 ...
## $ bwt : int 2523 2551 2557 2594 2600 2622 2637 2637 2663 2665 ...
head(birth)
## id low age lwt race smoke ptl ht ui ftv bwt
## 1 85 0 19 182 2 0 0 0 1 0 2523
## 2 86 0 33 155 3 0 0 0 0 3 2551
## 3 87 0 20 105 1 1 0 0 0 1 2557
## 4 88 0 21 108 1 1 0 0 1 2 2594
## 5 89 0 18 107 1 1 0 0 1 0 2600
## 6 91 0 21 124 3 0 0 0 0 0 2622
tail(birth)
## id low age lwt race smoke ptl ht ui ftv bwt
## 184 78 1 14 101 3 1 1 0 0 0 2466
## 185 79 1 28 95 1 1 0 0 0 2 2466
## 186 81 1 14 100 3 0 0 0 0 2 2495
## 187 82 1 23 94 3 1 0 0 0 0 2495
## 188 83 1 17 142 2 0 0 1 0 0 2495
## 189 84 1 21 130 1 1 0 1 0 3 2495
birth$mwt = birth$lwt * 0.453592
head(birth)
## id low age lwt race smoke ptl ht ui ftv bwt mwt
## 1 85 0 19 182 2 0 0 0 1 0 2523 82.55374
## 2 86 0 33 155 3 0 0 0 0 3 2551 70.30676
## 3 87 0 20 105 1 1 0 0 0 1 2557 47.62716
## 4 88 0 21 108 1 1 0 0 1 2 2594 48.98794
## 5 89 0 18 107 1 1 0 0 1 0 2600 48.53434
## 6 91 0 21 124 3 0 0 0 0 0 2622 56.24541
birth$ethnicity [birth$race==1] <- "White"
birth$ethnicity [birth$race==2] <- "Black"
birth$ethnicity [birth$race==3] <- "Other"
head(birth)
## id low age lwt race smoke ptl ht ui ftv bwt mwt ethnicity
## 1 85 0 19 182 2 0 0 0 1 0 2523 82.55374 Black
## 2 86 0 33 155 3 0 0 0 0 3 2551 70.30676 Other
## 3 87 0 20 105 1 1 0 0 0 1 2557 47.62716 White
## 4 88 0 21 108 1 1 0 0 1 2 2594 48.98794 White
## 5 89 0 18 107 1 1 0 0 1 0 2600 48.53434 White
## 6 91 0 21 124 3 0 0 0 0 0 2622 56.24541 Other
birth$smoking = ifelse(birth$smoke==1, "Yes", "No")
head(birth)
## id low age lwt race smoke ptl ht ui ftv bwt mwt ethnicity smoking
## 1 85 0 19 182 2 0 0 0 1 0 2523 82.55374 Black No
## 2 86 0 33 155 3 0 0 0 0 3 2551 70.30676 Other No
## 3 87 0 20 105 1 1 0 0 0 1 2557 47.62716 White Yes
## 4 88 0 21 108 1 1 0 0 1 2 2594 48.98794 White Yes
## 5 89 0 18 107 1 1 0 0 1 0 2600 48.53434 White Yes
## 6 91 0 21 124 3 0 0 0 0 0 2622 56.24541 Other No
birth1 = birth[, c("id", "low", "bwt")]
str(birth1)
## 'data.frame': 189 obs. of 3 variables:
## $ id : int 85 86 87 88 89 91 92 93 94 95 ...
## $ low: int 0 0 0 0 0 0 0 0 0 0 ...
## $ bwt: int 2523 2551 2557 2594 2600 2622 2637 2637 2663 2665 ...
birth2 = subset(birth, low==1)
str(birth2)
## 'data.frame': 59 obs. of 14 variables:
## $ id : int 4 10 11 13 15 16 17 18 19 20 ...
## $ low : int 1 1 1 1 1 1 1 1 1 1 ...
## $ age : int 28 29 34 25 25 27 23 24 24 21 ...
## $ lwt : int 120 130 187 105 85 150 97 128 132 165 ...
## $ race : int 3 1 2 3 3 3 3 2 3 1 ...
## $ smoke : int 1 0 1 0 0 0 0 0 0 1 ...
## $ ptl : int 1 0 0 1 0 0 0 1 0 0 ...
## $ ht : int 0 0 1 1 0 0 0 0 1 1 ...
## $ ui : int 1 1 0 0 1 0 1 0 0 0 ...
## $ ftv : int 0 2 0 0 0 0 1 1 0 1 ...
## $ bwt : int 709 1021 1135 1330 1474 1588 1588 1701 1729 1790 ...
## $ mwt : num 54.4 59 84.8 47.6 38.6 ...
## $ ethnicity: chr "Other" "White" "Black" "Other" ...
## $ smoking : chr "Yes" "No" "Yes" "No" ...
birth3 = subset(birth, low==1 & smoke==1)
str(birth3)
## 'data.frame': 30 obs. of 14 variables:
## $ id : int 4 11 20 22 23 26 27 29 34 35 ...
## $ low : int 1 1 1 1 1 1 1 1 1 1 ...
## $ age : int 28 34 21 32 19 25 20 24 19 26 ...
## $ lwt : int 120 187 165 105 91 92 150 155 112 117 ...
## $ race : int 3 2 1 1 1 1 1 1 1 1 ...
## $ smoke : int 1 1 1 1 1 1 1 1 1 1 ...
## $ ptl : int 1 0 0 0 2 0 0 1 0 1 ...
## $ ht : int 0 1 1 0 0 0 0 0 0 0 ...
## $ ui : int 1 0 0 0 1 0 0 0 1 0 ...
## $ ftv : int 0 0 1 0 0 0 2 0 0 0 ...
## $ bwt : int 709 1135 1790 1818 1885 1928 1928 1936 2084 2084 ...
## $ mwt : num 54.4 84.8 74.8 47.6 41.3 ...
## $ ethnicity: chr "Other" "Black" "White" "White" ...
## $ smoking : chr "Yes" "Yes" "Yes" "Yes" ...
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
table1(~age + lwt + bwt, data=birth)
| Overall (N=189) |
|
|---|---|
| age | |
| Mean (SD) | 23.2 (5.30) |
| Median [Min, Max] | 23.0 [14.0, 45.0] |
| lwt | |
| Mean (SD) | 130 (30.6) |
| Median [Min, Max] | 121 [80.0, 250] |
| bwt | |
| Mean (SD) | 2940 (729) |
| Median [Min, Max] | 2980 [709, 4990] |
table1(~age + lwt + bwt | low, data=birth)
## Warning in table1.formula(~age + lwt + bwt | low, data = birth): Terms to the
## right of '|' in formula 'x' define table columns and are expected to be factors
## with meaningful labels.
| 0 (N=130) |
1 (N=59) |
Overall (N=189) |
|
|---|---|---|---|
| age | |||
| Mean (SD) | 23.7 (5.58) | 22.3 (4.51) | 23.2 (5.30) |
| Median [Min, Max] | 23.0 [14.0, 45.0] | 22.0 [14.0, 34.0] | 23.0 [14.0, 45.0] |
| lwt | |||
| Mean (SD) | 133 (31.7) | 122 (26.6) | 130 (30.6) |
| Median [Min, Max] | 124 [85.0, 250] | 120 [80.0, 200] | 121 [80.0, 250] |
| bwt | |||
| Mean (SD) | 3330 (478) | 2100 (391) | 2940 (729) |
| Median [Min, Max] | 3270 [2520, 4990] | 2210 [709, 2500] | 2980 [709, 4990] |
table1(~low + smoke + race, data=birth)
| Overall (N=189) |
|
|---|---|
| low | |
| Mean (SD) | 0.312 (0.465) |
| Median [Min, Max] | 0 [0, 1.00] |
| smoke | |
| Mean (SD) | 0.392 (0.489) |
| Median [Min, Max] | 0 [0, 1.00] |
| race | |
| Mean (SD) | 1.85 (0.918) |
| Median [Min, Max] | 1.00 [1.00, 3.00] |
library(lessR)
##
## lessR 4.3.9 feedback: gerbing@pdx.edu
## --------------------------------------------------------------
## > d <- Read("") Read text, Excel, SPSS, SAS, or R data file
## d is default data frame, data= in analysis routines optional
##
## Many examples of reading, writing, and manipulating data,
## graphics, testing means and proportions, regression, factor analysis,
## customization, and descriptive statistics from pivot tables
## Enter: browseVignettes("lessR")
##
## View lessR updates, now including time series forecasting
## Enter: news(package="lessR")
##
## Interactive data analysis
## Enter: interact()
##
## Attaching package: 'lessR'
## The following object is masked from 'package:table1':
##
## label
## The following object is masked from 'package:base':
##
## sort_by
Histogram(bwt, fill="lightblue", xlab="Birthweight (g)", ylab="Frequency", data = birth)
## >>> Suggestions
## bin_width: set the width of each bin
## bin_start: set the start of the first bin
## bin_end: set the end of the last bin
## Histogram(bwt, density=TRUE) # smoothed curve + histogram
## Plot(bwt) # Violin/Box/Scatterplot (VBS) plot
##
## --- bwt ---
##
## n miss mean sd min mdn max
## 189 0 2944.59 729.21 709.00 2977.00 4990.00
##
##
## --- Outliers --- from the box plot: 1
##
## Small Large
## ----- -----
## 709.0
##
##
## Bin Width: 500
## Number of Bins: 9
##
## Bin Midpnt Count Prop Cumul.c Cumul.p
## -----------------------------------------------------
## 500 > 1000 750 1 0.01 1 0.01
## 1000 > 1500 1250 4 0.02 5 0.03
## 1500 > 2000 1750 14 0.07 19 0.10
## 2000 > 2500 2250 40 0.21 59 0.31
## 2500 > 3000 2750 38 0.20 97 0.51
## 3000 > 3500 3250 45 0.24 142 0.75
## 3500 > 4000 3750 38 0.20 180 0.95
## 4000 > 4500 4250 7 0.04 187 0.99
## 4500 > 5000 4750 2 0.01 189 1.00
BarChart(ethnicity, data=birth)
## >>> Suggestions
## BarChart(ethnicity, horiz=TRUE) # horizontal bar chart
## BarChart(ethnicity, fill="reds") # red bars of varying lightness
## PieChart(ethnicity) # doughnut (ring) chart
## Plot(ethnicity) # bubble plot
## Plot(ethnicity, stat="count") # lollipop plot
##
## --- ethnicity ---
##
## Missing Values: 0
##
## Black Other White Total
## Frequencies: 26 67 96 189
## Proportions: 0.138 0.354 0.508 1.000
##
## Chi-squared test of null hypothesis of equal probabilities
## Chisq = 39.270, df = 2, p-value = 0.000
plot(birth$lwt, birth$bwt)
abline(lm(birth$bwt ~ birth$lwt), col = "red")
plot(birth$lwt, birth$bwt, by=birth$ethnicity)
abline(lm(birth$bwt ~ birth$lwt), col = "red")