Libraries

library(vtable)
library(table1)
library(psych)

Base R

library(readxl)
usar <- read_excel('C:/Users/я/Documents/USArrests.xlsx')



summary(USArrests)
##      Murder          Assault         UrbanPop          Rape      
##  Min.   : 0.800   Min.   : 45.0   Min.   :32.00   Min.   : 7.30  
##  1st Qu.: 4.075   1st Qu.:109.0   1st Qu.:54.50   1st Qu.:15.07  
##  Median : 7.250   Median :159.0   Median :66.00   Median :20.10  
##  Mean   : 7.788   Mean   :170.8   Mean   :65.54   Mean   :21.23  
##  3rd Qu.:11.250   3rd Qu.:249.0   3rd Qu.:77.75   3rd Qu.:26.18  
##  Max.   :17.400   Max.   :337.0   Max.   :91.00   Max.   :46.00

Some improvements of representation with knitr:

library(knitr)
library(boot)
## 
## Attaching package: 'boot'
## The following object is masked from 'package:psych':
## 
##     logit
summary(usar) %>%
  kable() %>%
  kable_styling()
State TypCrime Arrests UrbanPop
Length:150 Length:150 Min. : 0.80 Min. :32.00
Class :character Class :character 1st Qu.: 9.55 1st Qu.:54.00
Mode :character Mode :character Median : 20.10 Median :66.00
NA NA Mean : 66.59 Mean :65.54
NA NA 3rd Qu.:108.25 3rd Qu.:78.00
NA NA Max. :337.00 Max. :91.00
head(usar) %>%
  kable() %>%
  kable_styling()
State TypCrime Arrests UrbanPop
Alabama Murder 13.2 58
Alaska Murder 10.0 48
Arizona Murder 8.1 80
Arkansas Murder 8.8 50
California Murder 9.0 91
Colorado Murder 7.9 78
head(melanoma) %>%
  kable() %>%
  kable_styling()
time status sex age year thickness ulcer
10 3 1 76 1972 6.76 1
30 3 1 56 1968 0.65 0
35 2 1 41 1977 1.34 0
99 3 0 71 1968 2.90 0
185 1 1 52 1965 12.08 1
204 1 1 28 1971 4.84 1

TABLE1

## One type of data - USArrests modified
usar1 <- usar

usar1$TypCrime <- 
  factor(usar$TypCrime, 
         levels=c("Murder","Assault","Rape"),
         labels=c("Murder", # Reference
                  "Assault", 
                  "Rape"))
usar1$State <-
  factor(usar$State)
label(usar1$State) <- "State"

table1(~State | TypCrime, data=usar1)
Murder
(N=50)
Assault
(N=50)
Rape
(N=50)
Overall
(N=150)
State
Alabama 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Alaska 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Arizona 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Arkansas 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
California 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Colorado 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Connecticut 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Delaware 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Florida 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Georgia 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Hawaii 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Idaho 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Illinois 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Indiana 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Iowa 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Kansas 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Kentucky 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Louisiana 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Maine 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Maryland 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Massachusetts 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Michigan 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Minnesota 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Mississippi 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Missouri 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Montana 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Nebraska 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Nevada 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
New Hampshire 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
New Jersey 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
New Mexico 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
New York 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
North Carolina 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
North Dakota 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Ohio 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Oklahoma 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Oregon 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Pennsylvania 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Rhode Island 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
South Carolina 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
South Dakota 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Tennessee 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Texas 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Utah 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Vermont 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Virginia 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Washington 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
West Virginia 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Wisconsin 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
Wyoming 1 (2.0%) 1 (2.0%) 1 (2.0%) 3 (2.0%)
summary(usar$Arrests)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.80    9.55   20.10   66.59  108.25  337.00
library(ggpubr)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.0.2
## 
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
ggplot(usar, aes(Arrests)) + geom_histogram(color = "Lightsteelblue2", fill = "lightsteelblue1") + theme_pubclean() + labs(y = NULL) + ylim(0,60)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

usar1$Arrests <- ifelse(usar$Arrests <= 40, "Low",
                        ifelse(usar$Arrests > 40, "High", NA))

table1(~State + TypCrime | Arrests, data=usar1)
High
(N=53)
Low
(N=97)
Overall
(N=150)
State
Alabama 1 (1.9%) 2 (2.1%) 3 (2.0%)
Alaska 2 (3.8%) 1 (1.0%) 3 (2.0%)
Arizona 1 (1.9%) 2 (2.1%) 3 (2.0%)
Arkansas 1 (1.9%) 2 (2.1%) 3 (2.0%)
California 2 (3.8%) 1 (1.0%) 3 (2.0%)
Colorado 1 (1.9%) 2 (2.1%) 3 (2.0%)
Connecticut 1 (1.9%) 2 (2.1%) 3 (2.0%)
Delaware 1 (1.9%) 2 (2.1%) 3 (2.0%)
Florida 1 (1.9%) 2 (2.1%) 3 (2.0%)
Georgia 1 (1.9%) 2 (2.1%) 3 (2.0%)
Hawaii 1 (1.9%) 2 (2.1%) 3 (2.0%)
Idaho 1 (1.9%) 2 (2.1%) 3 (2.0%)
Illinois 1 (1.9%) 2 (2.1%) 3 (2.0%)
Indiana 1 (1.9%) 2 (2.1%) 3 (2.0%)
Iowa 1 (1.9%) 2 (2.1%) 3 (2.0%)
Kansas 1 (1.9%) 2 (2.1%) 3 (2.0%)
Kentucky 1 (1.9%) 2 (2.1%) 3 (2.0%)
Louisiana 1 (1.9%) 2 (2.1%) 3 (2.0%)
Maine 1 (1.9%) 2 (2.1%) 3 (2.0%)
Maryland 1 (1.9%) 2 (2.1%) 3 (2.0%)
Massachusetts 1 (1.9%) 2 (2.1%) 3 (2.0%)
Michigan 1 (1.9%) 2 (2.1%) 3 (2.0%)
Minnesota 1 (1.9%) 2 (2.1%) 3 (2.0%)
Mississippi 1 (1.9%) 2 (2.1%) 3 (2.0%)
Missouri 1 (1.9%) 2 (2.1%) 3 (2.0%)
Montana 1 (1.9%) 2 (2.1%) 3 (2.0%)
Nebraska 1 (1.9%) 2 (2.1%) 3 (2.0%)
Nevada 2 (3.8%) 1 (1.0%) 3 (2.0%)
New Hampshire 1 (1.9%) 2 (2.1%) 3 (2.0%)
New Jersey 1 (1.9%) 2 (2.1%) 3 (2.0%)
New Mexico 1 (1.9%) 2 (2.1%) 3 (2.0%)
New York 1 (1.9%) 2 (2.1%) 3 (2.0%)
North Carolina 1 (1.9%) 2 (2.1%) 3 (2.0%)
North Dakota 1 (1.9%) 2 (2.1%) 3 (2.0%)
Ohio 1 (1.9%) 2 (2.1%) 3 (2.0%)
Oklahoma 1 (1.9%) 2 (2.1%) 3 (2.0%)
Oregon 1 (1.9%) 2 (2.1%) 3 (2.0%)
Pennsylvania 1 (1.9%) 2 (2.1%) 3 (2.0%)
Rhode Island 1 (1.9%) 2 (2.1%) 3 (2.0%)
South Carolina 1 (1.9%) 2 (2.1%) 3 (2.0%)
South Dakota 1 (1.9%) 2 (2.1%) 3 (2.0%)
Tennessee 1 (1.9%) 2 (2.1%) 3 (2.0%)
Texas 1 (1.9%) 2 (2.1%) 3 (2.0%)
Utah 1 (1.9%) 2 (2.1%) 3 (2.0%)
Vermont 1 (1.9%) 2 (2.1%) 3 (2.0%)
Virginia 1 (1.9%) 2 (2.1%) 3 (2.0%)
Washington 1 (1.9%) 2 (2.1%) 3 (2.0%)
West Virginia 1 (1.9%) 2 (2.1%) 3 (2.0%)
Wisconsin 1 (1.9%) 2 (2.1%) 3 (2.0%)
Wyoming 1 (1.9%) 2 (2.1%) 3 (2.0%)
TypCrime
Murder 0 (0%) 50 (51.5%) 50 (33.3%)
Assault 50 (94.3%) 0 (0%) 50 (33.3%)
Rape 3 (5.7%) 47 (48.5%) 50 (33.3%)
###-------------------------------------------------------------------------------------

## Another type - melanoma dataset
melanoma2 <- melanoma
 
melanoma2$status <- 
  factor(melanoma2$status, 
         levels=c(2,1,3),
         labels=c("Alive", # Reference
                  "Melanoma death", 
                  "Non-melanoma death"))

melanoma2$sex <- 
  factor(melanoma2$sex, levels=c(1,0),
         labels=c("Male", 
                  "Female"))
 
melanoma2$ulcer <- 
  factor(melanoma2$ulcer, levels=c(0,1),
         labels=c("Absent", 
                  "Present"))

label(melanoma2$sex)       <- "Sex"
label(melanoma2$age)       <- "Age"
label(melanoma2$ulcer)     <- "Ulceration"
label(melanoma2$thickness) <- "Thickness"

units(melanoma2$age)       <- "years"
units(melanoma2$thickness) <- "mm"

table1(~ sex + age + ulcer + thickness | status, data=melanoma2, overall="Total")
Alive
(N=134)
Melanoma death
(N=57)
Non-melanoma death
(N=14)
Total
(N=205)
Sex
Male 43 (32.1%) 29 (50.9%) 7 (50.0%) 79 (38.5%)
Female 91 (67.9%) 28 (49.1%) 7 (50.0%) 126 (61.5%)
Age (years)
Mean (SD) 50.0 (15.9) 55.1 (17.9) 65.3 (10.9) 52.5 (16.7)
Median [Min, Max] 52.0 [4.00, 84.0] 56.0 [14.0, 95.0] 65.0 [49.0, 86.0] 54.0 [4.00, 95.0]
Ulceration
Absent 92 (68.7%) 16 (28.1%) 7 (50.0%) 115 (56.1%)
Present 42 (31.3%) 41 (71.9%) 7 (50.0%) 90 (43.9%)
Thickness (mm)
Mean (SD) 2.24 (2.33) 4.31 (3.57) 3.72 (3.63) 2.92 (2.96)
Median [Min, Max] 1.36 [0.100, 12.9] 3.54 [0.320, 17.4] 2.26 [0.160, 12.6] 1.94 [0.100, 17.4]

VTABLE and PSYCH

## Psych and vtable comparison
vtable(usar, data.title = "USA arrests", char.values = TRUE, lush = TRUE, col.width = c(2,2,5,2,4), col.align = "left")
USA arrests
Name Class Values Missing Summary
State character ‘Alabama’ ‘Alaska’ ‘Arizona’ ‘Arkansas’ ‘California’ and more 0 nuniq: 50
TypCrime character ‘Assault’ ‘Murder’ ‘Rape’ 0 nuniq: 3
Arrests numeric Num: 0.8 to 337 0 mean: 66.593<br>sd: 88.381<br>nuniq: 131
UrbanPop numeric Num: 32 to 91 0 mean: 65.54<br>sd: 14.377<br>nuniq: 36
describe(usar, check = TRUE)
## Warning in describe(usar, check = TRUE): в результате преобразования созданы NA

## Warning in describe(usar, check = TRUE): в результате преобразования созданы NA
## Warning in FUN(newX[, i], ...): у 'min' нет не пропущенных аргументов; возвращаю
## Inf

## Warning in FUN(newX[, i], ...): у 'min' нет не пропущенных аргументов; возвращаю
## Inf
## Warning in FUN(newX[, i], ...): у 'max' нет не пропущенных аргументов; возвращаю
## -Inf

## Warning in FUN(newX[, i], ...): у 'max' нет не пропущенных аргументов; возвращаю
## -Inf
##           vars   n  mean    sd median trimmed   mad  min  max range  skew
## State*       1 150   NaN    NA     NA     NaN    NA  Inf -Inf  -Inf    NA
## TypCrime*    2 150   NaN    NA     NA     NaN    NA  Inf -Inf  -Inf    NA
## Arrests      3 150 66.59 88.38   20.1   48.50 20.90  0.8  337 336.2  1.48
## UrbanPop     4 150 65.54 14.38   66.0   65.88 17.79 32.0   91  59.0 -0.22
##           kurtosis   se
## State*          NA   NA
## TypCrime*       NA   NA
## Arrests       0.94 7.22
## UrbanPop     -0.81 1.17
sumtable(usar, col.align = "left")
Summary Statistics
Variable N Mean Std. Dev. Min Pctl. 25 Pctl. 75 Max
TypCrime 150
… Assault 50 33.3%
… Murder 50 33.3%
… Rape 50 33.3%
Arrests 150 66.593 88.381 0.8 9.55 108.25 337
UrbanPop 150 65.54 14.377 32 54 78 91

Psych feature:

## Model and base r summary
model1 <- lm(Arrests ~ UrbanPop, data = usar)
mod1 <- summary(model1)

## Diagram of the model
p <- setCor(Arrests ~ UrbanPop, data = usar, std=FALSE)

setCor.diagram(p)