bw <- read.csv("D:/1. KOREAMARCH/LOPR/bai tap/birthwt.csv")
bw$ethnicity <- factor(bw$race, levels = c(1, 2, 3),
labels = c("White", "Black", "Others"))
bw$smoking <- factor(bw$smoke, levels = c(1, 0),
labels = c("Yes", "No"))
bw$low.bw <- factor(bw$low, levels = c(0, 1),
labels = c("Normal", "Low BW"))
bw$mother.wt <- bw$lwt * 0.45
if (!require(table1)) install.packages("table1")
## Loading required package: table1
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
library(table1)
table1(~ age + ethnicity + smoking + mother.wt + bwt | low.bw, data = bw)
| Normal (N=130) |
Low BW (N=59) |
Overall (N=189) |
|
|---|---|---|---|
| age | |||
| Mean (SD) | 23.7 (5.58) | 22.3 (4.51) | 23.2 (5.30) |
| Median [Min, Max] | 23.0 [14.0, 45.0] | 22.0 [14.0, 34.0] | 23.0 [14.0, 45.0] |
| ethnicity | |||
| White | 73 (56.2%) | 23 (39.0%) | 96 (50.8%) |
| Black | 15 (11.5%) | 11 (18.6%) | 26 (13.8%) |
| Others | 42 (32.3%) | 25 (42.4%) | 67 (35.4%) |
| smoking | |||
| Yes | 44 (33.8%) | 30 (50.8%) | 74 (39.2%) |
| No | 86 (66.2%) | 29 (49.2%) | 115 (60.8%) |
| mother.wt | |||
| Mean (SD) | 60.0 (14.3) | 55.0 (12.0) | 58.4 (13.8) |
| Median [Min, Max] | 55.6 [38.3, 113] | 54.0 [36.0, 90.0] | 54.5 [36.0, 113] |
| bwt | |||
| Mean (SD) | 3330 (478) | 2100 (391) | 2940 (729) |
| Median [Min, Max] | 3270 [2520, 4990] | 2210 [709, 2500] | 2980 [709, 4990] |
if (!require(lessR)) install.packages("lessR")
## Loading required package: lessR
##
## lessR 4.4.3 feedback: gerbing@pdx.edu
## --------------------------------------------------------------
## > d <- Read("") Read data file, many formats available, e.g., Excel
## d is default data frame, data= in analysis routines optional
##
## Many examples of reading, writing, and manipulating data,
## graphics, testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation from pivot tables
## Enter: browseVignettes("lessR")
##
## View lessR updates, now including time series forecasting
## Enter: news(package="lessR")
##
## Interactive data analysis
## Enter: interact()
##
## Attaching package: 'lessR'
## The following object is masked from 'package:table1':
##
## label
## The following object is masked from 'package:base':
##
## sort_by
library(lessR)
Y <- bw$bwt
Histogram(Y, main = "Histogram of Birth Weight")
## >>> Note: Y is not in a data frame (table)
## >>> Note: Y is not in a data frame (table)
## >>> Suggestions
## bin_width: set the width of each bin
## bin_start: set the start of the first bin
## bin_end: set the end of the last bin
## Histogram(Y, density=TRUE) # smoothed curve + histogram
## Plot(Y) # Violin/Box/Scatterplot (VBS) plot
##
## --- Y ---
##
## n miss mean sd min mdn max
## 189 0 2944.59 729.21 709.00 2977.00 4990.00
##
##
##
## --- Outliers --- from the box plot: 1
##
## Small Large
## ----- -----
## 709.0
##
##
## Bin Width: 500
## Number of Bins: 9
##
## Bin Midpnt Count Prop Cumul.c Cumul.p
## -----------------------------------------------------
## 500 > 1000 750 1 0.01 1 0.01
## 1000 > 1500 1250 4 0.02 5 0.03
## 1500 > 2000 1750 14 0.07 19 0.10
## 2000 > 2500 2250 40 0.21 59 0.31
## 2500 > 3000 2750 38 0.20 97 0.51
## 3000 > 3500 3250 45 0.24 142 0.75
## 3500 > 4000 3750 38 0.20 180 0.95
## 4000 > 4500 4250 7 0.04 187 0.99
## 4500 > 5000 4750 2 0.01 189 1.00
##
E <- bw$ethnicity
BarChart(E, main = "Ethnicity Distribution")
## >>> Note: E is not in a data frame (table)
## >>> Suggestions
## BarChart(E, horiz=TRUE) # horizontal bar chart
## BarChart(E, fill="reds") # red bars of varying lightness
## PieChart(E) # doughnut (ring) chart
## Plot(E) # bubble plot
## Plot(E, stat="count") # lollipop plot
##
## --- E ---
##
## Missing Values: 0
##
## White Black Others Total
## Frequencies: 96 26 67 189
## Proportions: 0.508 0.138 0.354 1.000
##
## Chi-squared test of null hypothesis of equal probabilities
## Chisq = 39.270, df = 2, p-value = 0.000
X <- bw$mother.wt
Plot(Y, X, main = "Scatterplot: Mother's Weight vs Baby's Weight")
## >>> Note: Y is not in a data frame (table)
## >>> Note: X is not in a data frame (table)
##
## >>> Suggestions or enter: style(suggest=FALSE)
## Plot(Y, X, enhance=TRUE) # many options
## Plot(Y, X, fill="skyblue") # interior fill color of points
## Plot(Y, X, fit="lm", fit_se=c(.90,.99)) # fit line, stnd errors
## Plot(Y, X, MD_cut=6) # Mahalanobis distance from center > 6 is an outlier
##
##
## >>> Pearson's product-moment correlation
##
## Number of paired values with neither missing, n = 189
## Sample Correlation of Y and X: r = 0.186
##
## Hypothesis Test of 0 Correlation: t = 2.585, df = 187, p-value = 0.011
## 95% Confidence Interval for Correlation: 0.044 to 0.320
##