rw <- read.csv("D:\\R\\birthwt.csv",na.strings = " ",header = TRUE)
#input thu vien table1(mean, median,…)
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
library(lessR)
##
## lessR 4.4.3 feedback: gerbing@pdx.edu
## --------------------------------------------------------------
## > d <- Read("") Read data file, many formats available, e.g., Excel
## d is default data frame, data= in analysis routines optional
##
## Many examples of reading, writing, and manipulating data,
## graphics, testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation from pivot tables
## Enter: browseVignettes("lessR")
##
## View lessR updates, now including time series forecasting
## Enter: news(package="lessR")
##
## Interactive data analysis
## Enter: interact()
##
## Attaching package: 'lessR'
## The following object is masked from 'package:table1':
##
## label
## The following object is masked from 'package:base':
##
## sort_by
#Clean Data
rw$mwt = rw$lwt*0.4536
rw$race = factor(rw$race, levels = c(1,2,3), labels = c("white","black","others"))
rw$fein = ifelse(rw$smoke=="Yes",1,0)
rw1 <- rw[c(2,3)]
#discriptive analsys
rw <- rw[!is.na(rw$race), ]
table1(~age + bwt + lwt | race, data=rw)
| white (N=96) |
black (N=26) |
others (N=67) |
Overall (N=189) |
|
|---|---|---|---|---|
| age | ||||
| Mean (SD) | 24.3 (5.65) | 21.5 (5.11) | 22.4 (4.54) | 23.2 (5.30) |
| Median [Min, Max] | 23.5 [14.0, 45.0] | 20.5 [15.0, 35.0] | 22.0 [14.0, 33.0] | 23.0 [14.0, 45.0] |
| bwt | ||||
| Mean (SD) | 3100 (728) | 2720 (639) | 2810 (722) | 2940 (729) |
| Median [Min, Max] | 3060 [1020, 4990] | 2850 [1140, 3860] | 2840 [709, 4050] | 2980 [709, 4990] |
| lwt | ||||
| Mean (SD) | 132 (29.1) | 147 (39.6) | 120 (25.1) | 130 (30.6) |
| Median [Min, Max] | 130 [90.0, 235] | 129 [98.0, 241] | 119 [80.0, 250] | 121 [80.0, 250] |
table1(~factor(smoke)+factor(race)| factor(low),data=rw)
| 0 (N=130) |
1 (N=59) |
Overall (N=189) |
|
|---|---|---|---|
| factor(smoke) | |||
| 0 | 86 (66.2%) | 29 (49.2%) | 115 (60.8%) |
| 1 | 44 (33.8%) | 30 (50.8%) | 74 (39.2%) |
| factor(race) | |||
| white | 73 (56.2%) | 23 (39.0%) | 96 (50.8%) |
| black | 15 (11.5%) | 11 (18.6%) | 26 (13.8%) |
| others | 42 (32.3%) | 25 (42.4%) | 67 (35.4%) |
BarChart(smoke,data=rw)
## >>> Suggestions
## BarChart(smoke, horiz=TRUE) # horizontal bar chart
## BarChart(smoke, fill="reds") # red bars of varying lightness
## PieChart(smoke) # doughnut (ring) chart
## Plot(smoke) # bubble plot
## Plot(smoke, stat="count") # lollipop plot
##
## --- smoke ---
##
## Missing Values: 0
##
## 0 1 Total
## Frequencies: 115 74 189
## Proportions: 0.608 0.392 1.000
##
## Chi-squared test of null hypothesis of equal probabilities
## Chisq = 8.894, df = 1, p-value = 0.003
Histogram(bwt, fill="blue",xlab="can nang cua con",ylab="tan so",data=rw)
## >>> Suggestions
## bin_width: set the width of each bin
## bin_start: set the start of the first bin
## bin_end: set the end of the last bin
## Histogram(bwt, density=TRUE) # smoothed curve + histogram
## Plot(bwt) # Violin/Box/Scatterplot (VBS) plot
##
## --- bwt ---
##
## n miss mean sd min mdn max
## 189 0 2944.59 729.21 709.00 2977.00 4990.00
##
##
##
## --- Outliers --- from the box plot: 1
##
## Small Large
## ----- -----
## 709.0
##
##
## Bin Width: 500
## Number of Bins: 9
##
## Bin Midpnt Count Prop Cumul.c Cumul.p
## -----------------------------------------------------
## 500 > 1000 750 1 0.01 1 0.01
## 1000 > 1500 1250 4 0.02 5 0.03
## 1500 > 2000 1750 14 0.07 19 0.10
## 2000 > 2500 2250 40 0.21 59 0.31
## 2500 > 3000 2750 38 0.20 97 0.51
## 3000 > 3500 3250 45 0.24 142 0.75
## 3500 > 4000 3750 38 0.20 180 0.95
## 4000 > 4500 4250 7 0.04 187 0.99
## 4500 > 5000 4750 2 0.01 189 1.00
##
summary(rw)
## id low age lwt race
## Min. : 4.0 Min. :0.0000 Min. :14.00 Min. : 80.0 white :96
## 1st Qu.: 68.0 1st Qu.:0.0000 1st Qu.:19.00 1st Qu.:110.0 black :26
## Median :123.0 Median :0.0000 Median :23.00 Median :121.0 others:67
## Mean :121.1 Mean :0.3122 Mean :23.24 Mean :129.8
## 3rd Qu.:176.0 3rd Qu.:1.0000 3rd Qu.:26.00 3rd Qu.:140.0
## Max. :226.0 Max. :1.0000 Max. :45.00 Max. :250.0
## smoke ptl ht ui
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :0.3915 Mean :0.1958 Mean :0.06349 Mean :0.1481
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.0000 Max. :3.0000 Max. :1.00000 Max. :1.0000
## ftv bwt mwt fein
## Min. :0.0000 Min. : 709 Min. : 36.29 Min. :0
## 1st Qu.:0.0000 1st Qu.:2414 1st Qu.: 49.90 1st Qu.:0
## Median :0.0000 Median :2977 Median : 54.89 Median :0
## Mean :0.7937 Mean :2945 Mean : 58.88 Mean :0
## 3rd Qu.:1.0000 3rd Qu.:3487 3rd Qu.: 63.50 3rd Qu.:0
## Max. :6.0000 Max. :4990 Max. :113.40 Max. :0
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.