library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
library(lessR)
##
## lessR 4.3.9 feedback: gerbing@pdx.edu
## --------------------------------------------------------------
## > d <- Read("") Read text, Excel, SPSS, SAS, or R data file
## d is default data frame, data= in analysis routines optional
##
## Many examples of reading, writing, and manipulating data,
## graphics, testing means and proportions, regression, factor analysis,
## customization, and descriptive statistics from pivot tables
## Enter: browseVignettes("lessR")
##
## View lessR updates, now including time series forecasting
## Enter: news(package="lessR")
##
## Interactive data analysis
## Enter: interact()
##
## Attaching package: 'lessR'
## The following object is masked from 'package:table1':
##
## label
## The following object is masked from 'package:base':
##
## sort_by
library(ggplot2)
df = read.csv("/Users/121493/Dropbox/_Conferences and Workshops/Datasets/Arrest dataset.csv")
head(df)
## id age finance week arrest race work.exp married parole prior educ
## 1 1 27 no 20 1 black no not married yes 3 3
## 2 2 18 no 17 1 black no not married yes 8 4
## 3 3 19 no 25 1 other yes not married yes 13 3
## 4 4 23 yes 52 0 black yes married yes 1 5
## 5 5 19 no 52 0 other yes not married yes 3 3
## 6 6 24 no 52 0 black yes not married no 2 4
## employ1
## 1 no
## 2 no
## 3 no
## 4 no
## 5 no
## 6 no
table1(~age + finance + race + married + parole | arrest, data=df)
0 (N=318) |
1 (N=114) |
Overall (N=432) |
|
---|---|---|---|
age | |||
Mean (SD) | 25.3 (6.31) | 22.8 (5.12) | 24.6 (6.11) |
Median [Min, Max] | 23.0 [17.0, 44.0] | 21.0 [17.0, 44.0] | 23.0 [17.0, 44.0] |
finance | |||
no | 150 (47.2%) | 66 (57.9%) | 216 (50.0%) |
yes | 168 (52.8%) | 48 (42.1%) | 216 (50.0%) |
race | |||
black | 277 (87.1%) | 102 (89.5%) | 379 (87.7%) |
other | 41 (12.9%) | 12 (10.5%) | 53 (12.3%) |
married | |||
married | 45 (14.2%) | 8 (7.0%) | 53 (12.3%) |
not married | 273 (85.8%) | 106 (93.0%) | 379 (87.7%) |
parole | |||
no | 119 (37.4%) | 46 (40.4%) | 165 (38.2%) |
yes | 199 (62.6%) | 68 (59.6%) | 267 (61.8%) |
Histogram(age, fill="blue", xlab="Tuoi", ylab="Freq", data=df)
## >>> Suggestions
## bin_width: set the width of each bin
## bin_start: set the start of the first bin
## bin_end: set the end of the last bin
## Histogram(age, density=TRUE) # smoothed curve + histogram
## Plot(age) # Violin/Box/Scatterplot (VBS) plot
##
## --- age ---
##
## n miss mean sd min mdn max
## 432 0 24.60 6.11 17.00 23.00 44.00
##
##
## --- Outliers --- from the box plot: 28
##
## Small Large
## ----- -----
## 44.0
## 44.0
## 44.0
## 43.0
## 43.0
## 43.0
## 43.0
## 42.0
## 42.0
## 42.0
## 42.0
## 42.0
## 42.0
## 41.0
## 40.0
## 40.0
## 40.0
## 40.0
##
## + 10 more outliers
##
##
## Bin Width: 2
## Number of Bins: 14
##
## Bin Midpnt Count Prop Cumul.c Cumul.p
## -------------------------------------------------
## 16 > 18 17 27 0.06 27 0.06
## 18 > 20 19 100 0.23 127 0.29
## 20 > 22 21 79 0.18 206 0.48
## 22 > 24 23 72 0.17 278 0.64
## 24 > 26 25 42 0.10 320 0.74
## 26 > 28 27 25 0.06 345 0.80
## 28 > 30 29 23 0.05 368 0.85
## 30 > 32 31 14 0.03 382 0.88
## 32 > 34 33 8 0.02 390 0.90
## 34 > 36 35 11 0.03 401 0.93
## 36 > 38 37 8 0.02 409 0.95
## 38 > 40 39 9 0.02 418 0.97
## 40 > 42 41 7 0.02 425 0.98
## 42 > 44 43 7 0.02 432 1.00
BarChart(married, data=df)
## >>> Suggestions
## BarChart(married, horiz=TRUE) # horizontal bar chart
## BarChart(married, fill="reds") # red bars of varying lightness
## PieChart(married) # doughnut (ring) chart
## Plot(married) # bubble plot
## Plot(married, stat="count") # lollipop plot
##
## --- married ---
##
## Missing Values: 0
##
## married not married Total
## Frequencies: 53 379 432
## Proportions: 0.123 0.877 1.000
##
## Chi-squared test of null hypothesis of equal probabilities
## Chisq = 246.009, df = 1, p-value = 0.000
Plot(age, week, fit="lm", data=df)
##
## >>> Suggestions or enter: style(suggest=FALSE)
## Plot(age, week, enhance=TRUE) # many options
## Plot(age, week, fill="skyblue") # interior fill color of points
## Plot(age, week, MD_cut=6) # Mahalanobis distance from center > 6 is an outlier
##
##
## >>> Pearson's product-moment correlation
##
## Number of paired values with neither missing, n = 432
## Sample Correlation of age and week: r = 0.099
##
## Hypothesis Test of 0 Correlation: t = 2.072, df = 430, p-value = 0.039
## 95% Confidence Interval for Correlation: 0.005 to 0.192
##
##
## Line: b0 = 40.79 b1 = 0.21 Fit: MSE = 159.117 Rsq = 0.010
##