library(vtable)
library(table1)
library(psych)
library(readxl)
usar <- read_excel('C:/Users/я/Documents/USArrests.xlsx')
summary(USArrests)
## Murder Assault UrbanPop Rape
## Min. : 0.800 Min. : 45.0 Min. :32.00 Min. : 7.30
## 1st Qu.: 4.075 1st Qu.:109.0 1st Qu.:54.50 1st Qu.:15.07
## Median : 7.250 Median :159.0 Median :66.00 Median :20.10
## Mean : 7.788 Mean :170.8 Mean :65.54 Mean :21.23
## 3rd Qu.:11.250 3rd Qu.:249.0 3rd Qu.:77.75 3rd Qu.:26.18
## Max. :17.400 Max. :337.0 Max. :91.00 Max. :46.00
Some improvements of representation with knitr:
library(knitr)
library(boot)
##
## Attaching package: 'boot'
## The following object is masked from 'package:psych':
##
## logit
summary(usar) %>%
kable() %>%
kable_styling()
| State | TypCrime | Arrests | UrbanPop | |
|---|---|---|---|---|
| Length:150 | Length:150 | Min. : 0.80 | Min. :32.00 | |
| Class :character | Class :character | 1st Qu.: 9.55 | 1st Qu.:54.00 | |
| Mode :character | Mode :character | Median : 20.10 | Median :66.00 | |
| NA | NA | Mean : 66.59 | Mean :65.54 | |
| NA | NA | 3rd Qu.:108.25 | 3rd Qu.:78.00 | |
| NA | NA | Max. :337.00 | Max. :91.00 |
head(usar) %>%
kable() %>%
kable_styling()
| State | TypCrime | Arrests | UrbanPop |
|---|---|---|---|
| Alabama | Murder | 13.2 | 58 |
| Alaska | Murder | 10.0 | 48 |
| Arizona | Murder | 8.1 | 80 |
| Arkansas | Murder | 8.8 | 50 |
| California | Murder | 9.0 | 91 |
| Colorado | Murder | 7.9 | 78 |
head(melanoma) %>%
kable() %>%
kable_styling()
| time | status | sex | age | year | thickness | ulcer |
|---|---|---|---|---|---|---|
| 10 | 3 | 1 | 76 | 1972 | 6.76 | 1 |
| 30 | 3 | 1 | 56 | 1968 | 0.65 | 0 |
| 35 | 2 | 1 | 41 | 1977 | 1.34 | 0 |
| 99 | 3 | 0 | 71 | 1968 | 2.90 | 0 |
| 185 | 1 | 1 | 52 | 1965 | 12.08 | 1 |
| 204 | 1 | 1 | 28 | 1971 | 4.84 | 1 |
## One type of data - USArrests modified
usar1 <- usar
usar1$TypCrime <-
factor(usar$TypCrime,
levels=c("Murder","Assault","Rape"),
labels=c("Murder", # Reference
"Assault",
"Rape"))
usar1$State <-
factor(usar$State)
label(usar1$State) <- "State"
table1(~State | TypCrime, data=usar1)
| Murder (N=50) |
Assault (N=50) |
Rape (N=50) |
Overall (N=150) |
|
|---|---|---|---|---|
| State | ||||
| Alabama | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Alaska | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Arizona | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Arkansas | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| California | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Colorado | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Connecticut | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Delaware | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Florida | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Georgia | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Hawaii | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Idaho | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Illinois | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Indiana | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Iowa | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Kansas | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Kentucky | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Louisiana | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Maine | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Maryland | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Massachusetts | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Michigan | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Minnesota | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Mississippi | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Missouri | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Montana | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Nebraska | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Nevada | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| New Hampshire | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| New Jersey | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| New Mexico | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| New York | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| North Carolina | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| North Dakota | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Ohio | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Oklahoma | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Oregon | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Pennsylvania | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Rhode Island | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| South Carolina | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| South Dakota | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Tennessee | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Texas | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Utah | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Vermont | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Virginia | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Washington | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| West Virginia | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Wisconsin | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
| Wyoming | 1 (2.0%) | 1 (2.0%) | 1 (2.0%) | 3 (2.0%) |
summary(usar$Arrests)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.80 9.55 20.10 66.59 108.25 337.00
library(ggpubr)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.0.2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
ggplot(usar, aes(Arrests)) + geom_histogram(color = "Lightsteelblue2", fill = "lightsteelblue1") + theme_pubclean() + labs(y = NULL) + ylim(0,60)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
usar1$Arrests <- ifelse(usar$Arrests <= 40, "Low",
ifelse(usar$Arrests > 40, "High", NA))
table1(~State + TypCrime | Arrests, data=usar1)
| High (N=53) |
Low (N=97) |
Overall (N=150) |
|
|---|---|---|---|
| State | |||
| Alabama | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Alaska | 2 (3.8%) | 1 (1.0%) | 3 (2.0%) |
| Arizona | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Arkansas | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| California | 2 (3.8%) | 1 (1.0%) | 3 (2.0%) |
| Colorado | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Connecticut | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Delaware | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Florida | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Georgia | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Hawaii | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Idaho | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Illinois | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Indiana | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Iowa | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Kansas | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Kentucky | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Louisiana | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Maine | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Maryland | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Massachusetts | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Michigan | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Minnesota | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Mississippi | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Missouri | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Montana | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Nebraska | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Nevada | 2 (3.8%) | 1 (1.0%) | 3 (2.0%) |
| New Hampshire | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| New Jersey | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| New Mexico | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| New York | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| North Carolina | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| North Dakota | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Ohio | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Oklahoma | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Oregon | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Pennsylvania | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Rhode Island | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| South Carolina | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| South Dakota | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Tennessee | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Texas | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Utah | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Vermont | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Virginia | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Washington | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| West Virginia | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Wisconsin | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| Wyoming | 1 (1.9%) | 2 (2.1%) | 3 (2.0%) |
| TypCrime | |||
| Murder | 0 (0%) | 50 (51.5%) | 50 (33.3%) |
| Assault | 50 (94.3%) | 0 (0%) | 50 (33.3%) |
| Rape | 3 (5.7%) | 47 (48.5%) | 50 (33.3%) |
###-------------------------------------------------------------------------------------
## Another type - melanoma dataset
melanoma2 <- melanoma
melanoma2$status <-
factor(melanoma2$status,
levels=c(2,1,3),
labels=c("Alive", # Reference
"Melanoma death",
"Non-melanoma death"))
melanoma2$sex <-
factor(melanoma2$sex, levels=c(1,0),
labels=c("Male",
"Female"))
melanoma2$ulcer <-
factor(melanoma2$ulcer, levels=c(0,1),
labels=c("Absent",
"Present"))
label(melanoma2$sex) <- "Sex"
label(melanoma2$age) <- "Age"
label(melanoma2$ulcer) <- "Ulceration"
label(melanoma2$thickness) <- "Thickness"
units(melanoma2$age) <- "years"
units(melanoma2$thickness) <- "mm"
table1(~ sex + age + ulcer + thickness | status, data=melanoma2, overall="Total")
| Alive (N=134) |
Melanoma death (N=57) |
Non-melanoma death (N=14) |
Total (N=205) |
|
|---|---|---|---|---|
| Sex | ||||
| Male | 43 (32.1%) | 29 (50.9%) | 7 (50.0%) | 79 (38.5%) |
| Female | 91 (67.9%) | 28 (49.1%) | 7 (50.0%) | 126 (61.5%) |
| Age (years) | ||||
| Mean (SD) | 50.0 (15.9) | 55.1 (17.9) | 65.3 (10.9) | 52.5 (16.7) |
| Median [Min, Max] | 52.0 [4.00, 84.0] | 56.0 [14.0, 95.0] | 65.0 [49.0, 86.0] | 54.0 [4.00, 95.0] |
| Ulceration | ||||
| Absent | 92 (68.7%) | 16 (28.1%) | 7 (50.0%) | 115 (56.1%) |
| Present | 42 (31.3%) | 41 (71.9%) | 7 (50.0%) | 90 (43.9%) |
| Thickness (mm) | ||||
| Mean (SD) | 2.24 (2.33) | 4.31 (3.57) | 3.72 (3.63) | 2.92 (2.96) |
| Median [Min, Max] | 1.36 [0.100, 12.9] | 3.54 [0.320, 17.4] | 2.26 [0.160, 12.6] | 1.94 [0.100, 17.4] |
## Psych and vtable comparison
vtable(usar, data.title = "USA arrests", char.values = TRUE, lush = TRUE, col.width = c(2,2,5,2,4), col.align = "left")
| Name | Class | Values | Missing | Summary |
|---|---|---|---|---|
| State | character | ‘Alabama’ ‘Alaska’ ‘Arizona’ ‘Arkansas’ ‘California’ and more | 0 | nuniq: 50 |
| TypCrime | character | ‘Assault’ ‘Murder’ ‘Rape’ | 0 | nuniq: 3 |
| Arrests | numeric | Num: 0.8 to 337 | 0 | mean: 66.593<br>sd: 88.381<br>nuniq: 131 |
| UrbanPop | numeric | Num: 32 to 91 | 0 | mean: 65.54<br>sd: 14.377<br>nuniq: 36 |
describe(usar, check = TRUE)
## Warning in describe(usar, check = TRUE): в результате преобразования созданы NA
## Warning in describe(usar, check = TRUE): в результате преобразования созданы NA
## Warning in FUN(newX[, i], ...): у 'min' нет не пропущенных аргументов; возвращаю
## Inf
## Warning in FUN(newX[, i], ...): у 'min' нет не пропущенных аргументов; возвращаю
## Inf
## Warning in FUN(newX[, i], ...): у 'max' нет не пропущенных аргументов; возвращаю
## -Inf
## Warning in FUN(newX[, i], ...): у 'max' нет не пропущенных аргументов; возвращаю
## -Inf
## vars n mean sd median trimmed mad min max range skew
## State* 1 150 NaN NA NA NaN NA Inf -Inf -Inf NA
## TypCrime* 2 150 NaN NA NA NaN NA Inf -Inf -Inf NA
## Arrests 3 150 66.59 88.38 20.1 48.50 20.90 0.8 337 336.2 1.48
## UrbanPop 4 150 65.54 14.38 66.0 65.88 17.79 32.0 91 59.0 -0.22
## kurtosis se
## State* NA NA
## TypCrime* NA NA
## Arrests 0.94 7.22
## UrbanPop -0.81 1.17
sumtable(usar, col.align = "left")
| Variable | N | Mean | Std. Dev. | Min | Pctl. 25 | Pctl. 75 | Max |
|---|---|---|---|---|---|---|---|
| TypCrime | 150 | ||||||
| … Assault | 50 | 33.3% | |||||
| … Murder | 50 | 33.3% | |||||
| … Rape | 50 | 33.3% | |||||
| Arrests | 150 | 66.593 | 88.381 | 0.8 | 9.55 | 108.25 | 337 |
| UrbanPop | 150 | 65.54 | 14.377 | 32 | 54 | 78 | 91 |
Psych feature:
## Model and base r summary
model1 <- lm(Arrests ~ UrbanPop, data = usar)
mod1 <- summary(model1)
## Diagram of the model
p <- setCor(Arrests ~ UrbanPop, data = usar, std=FALSE)
setCor.diagram(p)