Load library

library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
library(explore)
library(ggplot2)
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
## 
## Attaching package: 'GGally'
## The following object is masked from 'package:explore':
## 
##     rescale01
library(gridExtra)

Load data

This dataset includes the following variables: mpg: miles per gallon cyl: number of cylinders disp: displacement, a measure of engine power hp: horsepower wt: weight of each car (lb/1000) gear: number of forward gears

data(mtcars)
head(mtcars)
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
temp = mtcars[, c("mpg", "cyl", "disp", "wt", "gear")]
head(temp)
##                    mpg cyl disp    wt gear
## Mazda RX4         21.0   6  160 2.620    4
## Mazda RX4 Wag     21.0   6  160 2.875    4
## Datsun 710        22.8   4  108 2.320    4
## Hornet 4 Drive    21.4   6  258 3.215    3
## Hornet Sportabout 18.7   8  360 3.440    3
## Valiant           18.1   6  225 3.460    3

Descriptive analysis with table1

table1(~mpg + cyl + disp + hp + wt, data=mtcars)
Overall
(n=32)
mpg
Mean (SD) 20.1 (6.03)
Median [Min, Max] 19.2 [10.4, 33.9]
cyl
Mean (SD) 6.19 (1.79)
Median [Min, Max] 6.00 [4.00, 8.00]
disp
Mean (SD) 231 (124)
Median [Min, Max] 196 [71.1, 472]
hp
Mean (SD) 147 (68.6)
Median [Min, Max] 123 [52.0, 335]
wt
Mean (SD) 3.22 (0.978)
Median [Min, Max] 3.33 [1.51, 5.42]
table1(~mpg + factor(cyl) + disp + hp + wt | gear, data=mtcars)
## Warning in table1.formula(~mpg + factor(cyl) + disp + hp + wt | gear, data
## = mtcars): Terms to the right of '|' in formula 'x' define table columns
## and are expected to be factors with meaningful labels.
3
(n=15)
4
(n=12)
5
(n=5)
Overall
(n=32)
mpg
Mean (SD) 16.1 (3.37) 24.5 (5.28) 21.4 (6.66) 20.1 (6.03)
Median [Min, Max] 15.5 [10.4, 21.5] 22.8 [17.8, 33.9] 19.7 [15.0, 30.4] 19.2 [10.4, 33.9]
factor(cyl)
4 1 (6.7%) 8 (66.7%) 2 (40.0%) 11 (34.4%)
6 2 (13.3%) 4 (33.3%) 1 (20.0%) 7 (21.9%)
8 12 (80.0%) 0 (0%) 2 (40.0%) 14 (43.8%)
disp
Mean (SD) 326 (94.9) 123 (38.9) 202 (115) 231 (124)
Median [Min, Max] 318 [120, 472] 131 [71.1, 168] 145 [95.1, 351] 196 [71.1, 472]
hp
Mean (SD) 176 (47.7) 89.5 (25.9) 196 (103) 147 (68.6)
Median [Min, Max] 180 [97.0, 245] 94.0 [52.0, 123] 175 [91.0, 335] 123 [52.0, 335]
wt
Mean (SD) 3.89 (0.833) 2.62 (0.633) 2.63 (0.819) 3.22 (0.978)
Median [Min, Max] 3.73 [2.47, 5.42] 2.70 [1.62, 3.44] 2.77 [1.51, 3.57] 3.33 [1.51, 5.42]

Exploratory analysis with explore package

explore_all(mtcars)

More exploratory analysis with explore package

explore_all(temp, target=gear)

Correlation analysis with GGally

ggpairs(mtcars)

Graphical analysis with ggplot2

p1 = ggplot(data=mtcars, aes(x=mpg, col=mpg)) + geom_histogram()

p2 = ggplot(data=mtcars, aes(x=mpg, col=mpg)) + geom_histogram(col="white", fill="blue") 

p3 = ggplot(data=mtcars, aes(x=mpg, col=mpg)) + geom_histogram(aes(y=..density..), col="white", fill="blue") + geom_density(col="red")

grid.arrange(p1, p2, p3, ncol=3)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

p = ggplot(data=mtcars, aes(x=disp, y=mpg))
p1 = p + geom_point()
p2 = p + geom_point() + geom_smooth()
p3 = p + geom_point() + geom_smooth(method="lm", formula=y~x+I(x^2)) 
grid.arrange(p1, p2, p3, ncol=3)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'