Loading library

library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
library(lessR)
## 
## lessR 4.3.9                         feedback: gerbing@pdx.edu 
## --------------------------------------------------------------
## > d <- Read("")   Read text, Excel, SPSS, SAS, or R data file
##   d is default data frame, data= in analysis routines optional
## 
## Many examples of reading, writing, and manipulating data, 
## graphics, testing means and proportions, regression, factor analysis,
## customization, and descriptive statistics from pivot tables
##   Enter: browseVignettes("lessR")
## 
## View lessR updates, now including time series forecasting
##   Enter: news(package="lessR")
## 
## Interactive data analysis
##   Enter: interact()
## 
## Attaching package: 'lessR'
## The following object is masked from 'package:table1':
## 
##     label
## The following object is masked from 'package:base':
## 
##     sort_by
library(ggplot2)

Reading data into R

df = read.csv("/Users/121493/Dropbox/_Conferences and Workshops/Datasets/Arrest dataset.csv")

head(df)
##   id age finance week arrest  race work.exp     married parole prior educ
## 1  1  27      no   20      1 black       no not married    yes     3    3
## 2  2  18      no   17      1 black       no not married    yes     8    4
## 3  3  19      no   25      1 other      yes not married    yes    13    3
## 4  4  23     yes   52      0 black      yes     married    yes     1    5
## 5  5  19      no   52      0 other      yes not married    yes     3    3
## 6  6  24      no   52      0 black      yes not married     no     2    4
##   employ1
## 1      no
## 2      no
## 3      no
## 4      no
## 5      no
## 6      no

Phan tich mo ta

table1(~age + finance + race + married + parole | arrest, data=df)
0
(N=318)
1
(N=114)
Overall
(N=432)
age
Mean (SD) 25.3 (6.31) 22.8 (5.12) 24.6 (6.11)
Median [Min, Max] 23.0 [17.0, 44.0] 21.0 [17.0, 44.0] 23.0 [17.0, 44.0]
finance
no 150 (47.2%) 66 (57.9%) 216 (50.0%)
yes 168 (52.8%) 48 (42.1%) 216 (50.0%)
race
black 277 (87.1%) 102 (89.5%) 379 (87.7%)
other 41 (12.9%) 12 (10.5%) 53 (12.3%)
married
married 45 (14.2%) 8 (7.0%) 53 (12.3%)
not married 273 (85.8%) 106 (93.0%) 379 (87.7%)
parole
no 119 (37.4%) 46 (40.4%) 165 (38.2%)
yes 199 (62.6%) 68 (59.6%) 267 (61.8%)

Vẽ biểu đồ dùng lessR

Histogram(age, fill="blue", xlab="Tuoi", ylab="Freq", data=df)

## >>> Suggestions 
## bin_width: set the width of each bin 
## bin_start: set the start of the first bin 
## bin_end: set the end of the last bin 
## Histogram(age, density=TRUE)  # smoothed curve + histogram 
## Plot(age)  # Violin/Box/Scatterplot (VBS) plot 
## 
## --- age --- 
##  
##       n   miss     mean       sd      min      mdn      max 
##      432      0    24.60     6.11    17.00    23.00    44.00 
## 
##   
## --- Outliers ---     from the box plot: 28 
##  
## Small      Large 
## -----      ----- 
##             44.0 
##             44.0 
##             44.0 
##             43.0 
##             43.0 
##             43.0 
##             43.0 
##             42.0 
##             42.0 
##             42.0 
##             42.0 
##             42.0 
##             42.0 
##             41.0 
##             40.0 
##             40.0 
##             40.0 
##             40.0 
## 
## + 10 more outliers 
## 
## 
## Bin Width: 2 
## Number of Bins: 14 
##  
##      Bin  Midpnt  Count    Prop  Cumul.c  Cumul.p 
## ------------------------------------------------- 
##  16 > 18      17     27    0.06       27     0.06 
##  18 > 20      19    100    0.23      127     0.29 
##  20 > 22      21     79    0.18      206     0.48 
##  22 > 24      23     72    0.17      278     0.64 
##  24 > 26      25     42    0.10      320     0.74 
##  26 > 28      27     25    0.06      345     0.80 
##  28 > 30      29     23    0.05      368     0.85 
##  30 > 32      31     14    0.03      382     0.88 
##  32 > 34      33      8    0.02      390     0.90 
##  34 > 36      35     11    0.03      401     0.93 
##  36 > 38      37      8    0.02      409     0.95 
##  38 > 40      39      9    0.02      418     0.97 
##  40 > 42      41      7    0.02      425     0.98 
##  42 > 44      43      7    0.02      432     1.00
BarChart(married, data=df)

## >>> Suggestions
## BarChart(married, horiz=TRUE)  # horizontal bar chart
## BarChart(married, fill="reds")  # red bars of varying lightness
## PieChart(married)  # doughnut (ring) chart
## Plot(married)  # bubble plot
## Plot(married, stat="count")  # lollipop plot 
## 
## --- married --- 
## 
## Missing Values: 0 
## 
##                married  not married     Total 
## Frequencies:        53          379       432 
## Proportions:     0.123        0.877     1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 246.009, df = 1, p-value = 0.000
Plot(age, week, fit="lm", data=df)

## 
## >>> Suggestions  or  enter: style(suggest=FALSE)
## Plot(age, week, enhance=TRUE)  # many options
## Plot(age, week, fill="skyblue")  # interior fill color of points
## Plot(age, week, MD_cut=6)  # Mahalanobis distance from center > 6 is an outlier 
## 
## 
## >>> Pearson's product-moment correlation 
##  
## Number of paired values with neither missing, n = 432 
## Sample Correlation of age and week: r = 0.099 
##   
## Hypothesis Test of 0 Correlation:  t = 2.072,  df = 430,  p-value = 0.039 
## 95% Confidence Interval for Correlation:  0.005 to 0.192 
##   
## 
##  Line: b0 = 40.79   b1 = 0.21    Fit: MSE = 159.117   Rsq = 0.010
##