Descriptive Statistics by Group

Sameer Mathur

Load the mtcars dataset

mt <- mtcars[c("mpg", "hp", "wt", "am")]
head(mt)
                   mpg  hp    wt am
Mazda RX4         21.0 110 2.620  1
Mazda RX4 Wag     21.0 110 2.875  1
Datsun 710        22.8  93 2.320  1
Hornet 4 Drive    21.4 110 3.215  0
Hornet Sportabout 18.7 175 3.440  0
Valiant           18.1 105 3.460  0

Descriptive statistics by group using aggregate

myvars <- c("mpg", "hp", "wt")
aggregate(mtcars[myvars], by=list(am=mtcars$am), mean)
  am      mpg       hp       wt
1  0 17.14737 160.2632 3.768895
2  1 24.39231 126.8462 2.411000
aggregate(mtcars[myvars], by=list(am=mtcars$am), sd)
  am      mpg       hp        wt
1  0 3.833966 53.90820 0.7774001
2  1 6.166504 84.06232 0.6169816

Descriptive statistics using sapply

mystats <- function(x, na.omit=FALSE){
  if (na.omit)
    x <- x[!is.na(x)]
  m <- mean(x)
  n <- length(x)
  s <- sd(x)
  skew <- sum((x-m)^3/s^3)/n
  kurt <- sum((x-m)^4/s^4)/n - 3
  return(c(n=n, mean=m, stdev=s, skew=skew, kurtosis=kurt))
}

Descriptive statistics by group using by

dstats <- function(x)sapply(x, mystats)
myvars <- c("mpg", "hp", "wt")
by(mtcars[myvars], mtcars$am, dstats)
mtcars$am: 0
                 mpg           hp         wt
n        19.00000000  19.00000000 19.0000000
mean     17.14736842 160.26315789  3.7688947
stdev     3.83396639  53.90819573  0.7774001
skew      0.01395038  -0.01422519  0.9759294
kurtosis -0.80317826  -1.20969733  0.1415676
-------------------------------------------------------- 
mtcars$am: 1
                 mpg          hp         wt
n        13.00000000  13.0000000 13.0000000
mean     24.39230769 126.8461538  2.4110000
stdev     6.16650381  84.0623243  0.6169816
skew      0.05256118   1.3598859  0.2103128
kurtosis -1.45535200   0.5634635 -1.1737358