Descriptive statistics with R

Download Rosner data: http://academic.cengage.com/resource_uploads/downloads/0538733497_245020.zip
Extract the data file so that you can read in BONEDEN.DAT.txt.

Functions in the default install

bone <- read.csv("./BONEDEN.DAT.txt", quote = "'")

## Mean
mean(bone$age)
[1] 48.85

## SD
sd(bone$age)
[1] 11.77

## Median
median(bone$age)
[1] 47

## Quantiles:
quantile(bone$age)
  0%  25%  50%  75% 100% 
  27   42   47   53   76 

## Interquartile range
IQR(bone$age)
[1] 11

## Maximum
max(bone$age)
[1] 76

## Minimum
min(bone$age)
[1] 27

Use e1071 package for skewness and kurtosis

## Load e1071
library(e1071)

## Skewness
skewness(bone$age)              # Type 3
[1] 0.6941
skewness(bone$age, type = 2)    # Type 2: SAS-method
[1] 0.748
skewness(bone$age, type = 1)    # Type 1: Stata-method
[1] 0.7203

## Kurtosis
kurtosis(bone$age)              # Type 3
[1] -0.1392
kurtosis(bone$age, type = 2)    # Type 2: SAS-method
[1] 0.1683
kurtosis(bone$age, type = 1)    # Type 1: Stata-method
[1] 0.005628

Use summary() to summarize all variables at once

summary(bone)
       ID               age            zyg            ht1           wt1             tea1           cof1     
 Min.   :1002501   Min.   :27.0   Min.   :1.00   Min.   :149   Min.   : 47.0   Min.   : 0.0   Min.   : 0.0  
 1st Qu.:1273101   1st Qu.:42.0   1st Qu.:1.00   1st Qu.:159   1st Qu.: 57.0   1st Qu.: 1.0   1st Qu.: 0.0  
 Median :1528201   Median :47.0   Median :1.00   Median :162   Median : 61.0   Median :14.0   Median : 7.0  
 Mean   :1476714   Mean   :48.9   Mean   :1.49   Mean   :161   Mean   : 65.4   Mean   :19.3   Mean   :14.6  
 3rd Qu.:1628601   3rd Qu.:53.0   3rd Qu.:2.00   3rd Qu.:165   3rd Qu.: 70.0   3rd Qu.:35.0   3rd Qu.:28.0  
 Max.   :2609801   Max.   :76.0   Max.   :2.00   Max.   :171   Max.   :114.0   Max.   :56.0   Max.   :56.0  
      alc1            cur1            men1            pyr1            ls1             fn1             fs1      
 Min.   : 0.00   Min.   : 0.00   Min.   :0.000   Min.   : 0.00   Min.   :0.500   Min.   :0.420   Min.   :0.70  
 1st Qu.: 0.00   1st Qu.: 0.00   1st Qu.:0.000   1st Qu.: 0.00   1st Qu.:0.710   1st Qu.:0.600   1st Qu.:1.05  
 Median : 1.00   Median : 0.00   Median :1.000   Median : 0.00   Median :0.800   Median :0.660   Median :1.11  
 Mean   : 4.07   Mean   : 3.66   Mean   :0.707   Mean   : 4.36   Mean   :0.795   Mean   :0.665   Mean   :1.10  
 3rd Qu.: 6.00   3rd Qu.: 3.00   3rd Qu.:1.000   3rd Qu.: 0.00   3rd Qu.:0.870   3rd Qu.:0.720   3rd Qu.:1.15  
 Max.   :28.00   Max.   :28.00   Max.   :2.000   Max.   :33.75   Max.   :1.040   Max.   :0.900   Max.   :1.42  
      ht2           wt2            tea2           cof2           alc2            cur2            men2      
 Min.   :150   Min.   :43.0   Min.   : 0.0   Min.   : 0.0   Min.   : 0.00   Min.   : 0.00   Min.   :0.000  
 1st Qu.:159   1st Qu.:54.0   1st Qu.: 0.0   1st Qu.: 7.0   1st Qu.: 0.00   1st Qu.: 0.00   1st Qu.:0.000  
 Median :162   Median :58.0   Median : 7.0   Median :21.0   Median : 0.00   Median : 1.00   Median :1.000  
 Mean   :162   Mean   :61.8   Mean   :15.1   Mean   :23.1   Mean   : 5.29   Mean   : 5.07   Mean   :0.634  
 3rd Qu.:165   3rd Qu.:68.0   3rd Qu.:21.0   3rd Qu.:35.0   3rd Qu.: 9.00   3rd Qu.: 8.00   3rd Qu.:1.000  
 Max.   :170   Max.   :94.0   Max.   :91.0   Max.   :99.0   Max.   :35.00   Max.   :30.00   Max.   :2.000  
      pyr2           ls2             fn2             fs2      
 Min.   : 5.0   Min.   :0.470   Min.   :0.380   Min.   :0.64  
 1st Qu.:14.0   1st Qu.:0.650   1st Qu.:0.610   1st Qu.:1.01  
 Median :25.5   Median :0.770   Median :0.660   Median :1.08  
 Mean   :27.5   Mean   :0.759   Mean   :0.664   Mean   :1.07  
 3rd Qu.:39.0   3rd Qu.:0.860   3rd Qu.:0.740   3rd Qu.:1.16  
 Max.   :64.5   Max.   :0.980   Max.   :0.880   Max.   :1.33  

Use pysch package, describe() for similar purpose

library(psych)
## describe all variables except for the first (ID)
describe(bone[,-1], type = 2)        # Type 2: SAS-method for skewness and kurtosis
     var  n   mean    sd median trimmed   mad    min    max range  skew kurtosis   se
age    1 41  48.85 11.77  47.00   47.94  8.90  27.00  76.00 49.00  0.75     0.17 1.84
zyg    2 41   1.49  0.51   1.00    1.48  0.00   1.00   2.00  1.00  0.05    -2.10 0.08
ht1    3 41 161.41  5.23 162.00  161.82  4.45 149.00 171.00 22.00 -0.71     0.22 0.82
wt1    4 41  65.39 14.89  61.00   63.18 10.38  47.00 114.00 67.00  1.77     3.83 2.32
tea1   5 41  19.32 18.11  14.00   17.64 20.76   0.00  56.00 56.00  0.58    -0.88 2.83
cof1   6 41  14.56 15.80   7.00   12.36 10.38   0.00  56.00 56.00  1.00     0.04 2.47
alc1   7 41   4.07  6.72   1.00    2.45  1.48   0.00  28.00 28.00  2.20     4.66 1.05
cur1   8 41   3.66  6.77   0.00    1.91  0.00   0.00  28.00 28.00  2.36     5.09 1.06
men1   9 41   0.71  0.64   1.00    0.64  0.00   0.00   2.00  2.00  0.35    -0.62 0.10
pyr1  10 41   4.36  9.33   0.00    2.04  0.00   0.00  33.75 33.75  1.98     2.62 1.46
ls1   11 41   0.79  0.13   0.80    0.80  0.13   0.50   1.04  0.54 -0.21    -0.12 0.02
fn1   12 41   0.66  0.10   0.66    0.66  0.09   0.42   0.90  0.48  0.05     0.11 0.02
fs1   13 41   1.10  0.13   1.11    1.10  0.09   0.70   1.42  0.72 -0.58     2.09 0.02
ht2   14 41 161.59  4.67 162.00  161.82  4.45 150.00 170.00 20.00 -0.49    -0.24 0.73
wt2   15 41  61.83 11.89  58.00   60.58  7.41  43.00  94.00 51.00  0.97     0.51 1.86
tea2  16 41  15.07 21.17   7.00   10.67 10.38   0.00  91.00 91.00  2.41     6.68 3.31
cof2  17 41  23.15 21.09  21.00   20.73 20.76   0.00  99.00 99.00  1.29     2.94 3.29
alc2  18 41   5.29  8.14   0.00    3.76  0.00   0.00  35.00 35.00  1.93     4.08 1.27
cur2  19 41   5.07  7.55   1.00    3.52  1.48   0.00  30.00 30.00  1.87     3.42 1.18
men2  20 41   0.63  0.66   1.00    0.55  1.48   0.00   2.00  2.00  0.57    -0.61 0.10
pyr2  21 41  27.51 14.82  25.50   26.80 18.24   5.00  64.50 59.50  0.43    -0.53 2.31
ls2   22 41   0.76  0.14   0.77    0.77  0.16   0.47   0.98  0.51 -0.45    -0.62 0.02
fn2   23 41   0.66  0.12   0.66    0.67  0.12   0.38   0.88  0.50 -0.24    -0.17 0.02
fs2   24 41   1.07  0.15   1.08    1.08  0.10   0.64   1.33  0.69 -1.01     1.42 0.02
## item name
## item number
## number of valid cases
## mean
## standard deviation
## trimmed mean (with trim defaulting to .1)
## median (standard or interpolated
## mad: median absolute deviation (from the median)
## minimum
## maximum
## skew
## kurtosis
## standard error

## describeBy()
## Same except for grouping by zyg status
describeBy(bone[,-1], bone$zyg, type = 2)
group: 1
     var  n   mean    sd median trimmed   mad    min    max range  skew kurtosis   se
age    1 21  51.38 10.74  49.00   50.71  7.41  36.00  73.00 37.00  0.85     0.16 2.34
zyg    2 21   1.00  0.00   1.00    1.00  0.00   1.00   1.00  0.00   NaN      NaN 0.00
ht1    3 21 160.76  5.66 162.00  161.12  4.45 149.00 169.00 20.00 -0.64    -0.40 1.23
wt1    4 21  66.95 13.63  66.00   65.71  7.41  48.00 112.00 64.00  1.69     5.30 2.97
tea1   5 21  19.29 20.50  14.00   17.24 20.76   0.00  56.00 56.00  0.79    -0.82 4.47
cof1   6 21  15.67 15.79  14.00   13.59 20.76   0.00  56.00 56.00  1.00     0.53 3.45
alc1   7 21   3.62  6.05   1.00    2.29  1.48   0.00  23.00 23.00  2.12     4.47 1.32
cur1   8 21   3.62  6.05   1.00    2.29  1.48   0.00  22.00 22.00  1.98     3.49 1.32
men1   9 21   0.76  0.62   1.00    0.71  0.00   0.00   2.00  2.00  0.20    -0.36 0.14
pyr1  10 21   7.70 11.71   0.00    5.88  0.00   0.00  33.75 33.75  1.12    -0.35 2.55
ls1   11 21   0.77  0.09   0.75    0.76  0.07   0.58   0.95  0.37  0.30    -0.09 0.02
fn1   12 21   0.67  0.10   0.66    0.67  0.09   0.42   0.85  0.43 -0.11     0.51 0.02
fs1   13 21   1.08  0.11   1.11    1.10  0.07   0.70   1.22  0.52 -1.99     5.60 0.02
ht2   14 21 162.38  4.35 163.00  162.76  2.97 153.00 169.00 16.00 -0.79     0.12 0.95
wt2   15 21  65.19 12.99  62.00   64.59 11.86  43.00  94.00 51.00  0.53    -0.25 2.84
tea2  16 21  13.67 20.47   7.00    9.88 10.38   0.00  91.00 91.00  2.88    10.42 4.47
cof2  17 21  23.95 22.85  28.00   21.29 20.76   0.00  99.00 99.00  1.70     4.88 4.99
alc2  18 21   5.10  6.46   0.00    4.53  0.00   0.00  16.00 16.00  0.66    -1.52 1.41
cur2  19 21   4.86  5.69   2.00    4.06  2.97   0.00  20.00 20.00  1.10     0.68 1.24
men2  20 21   0.71  0.64   1.00    0.65  0.00   0.00   2.00  2.00  0.33    -0.51 0.14
pyr2  21 21  28.21 12.73  30.00   28.15 14.46   6.00  56.00 50.00  0.09    -0.24 2.78
ls2   22 21   0.75  0.11   0.76    0.75  0.12   0.51   0.96  0.45 -0.23     0.06 0.02
fn2   23 21   0.67  0.11   0.65    0.67  0.09   0.46   0.88  0.42  0.17    -0.27 0.02
fs2   24 21   1.07  0.13   1.09    1.08  0.10   0.66   1.24  0.58 -1.67     4.87 0.03
--------------------------------------------------------------------------------------- 
group: 2
     var  n   mean    sd median trimmed   mad    min    max range  skew kurtosis   se
age    1 20  46.20 12.48  44.00   44.81  6.67  27.00  76.00 49.00  1.01     0.86 2.79
zyg    2 20   2.00  0.00   2.00    2.00  0.00   2.00   2.00  0.00   NaN      NaN 0.00
ht1    3 20 162.10  4.79 162.50  162.50  2.97 150.00 171.00 21.00 -0.76     1.60 1.07
wt1    4 20  63.75 16.29  58.50   60.50  5.93  47.00 114.00 67.00  2.06     4.48 3.64
tea1   5 20  19.35 15.75  21.00   18.94 20.76   0.00  42.00 42.00  0.11    -1.53 3.52
cof1   6 20  13.40 16.12   7.00   11.06 10.38   0.00  49.00 49.00  1.11    -0.02 3.61
alc1   7 20   4.55  7.50   1.00    2.62  1.48   0.00  28.00 28.00  2.29     5.14 1.68
cur1   8 20   3.70  7.62   0.00    1.50  0.00   0.00  28.00 28.00  2.63     6.37 1.70
men1   9 20   0.65  0.67   1.00    0.56  1.48   0.00   2.00  2.00  0.55    -0.55 0.15
pyr1  10 20   0.85  3.69   0.00    0.00  0.00   0.00  16.50 16.50  4.47    19.96 0.82
ls1   11 20   0.83  0.16   0.84    0.84  0.15   0.50   1.04  0.54 -0.76     0.06 0.04
fn1   12 20   0.66  0.11   0.66    0.66  0.08   0.47   0.90  0.43  0.19     0.11 0.02
fs1   13 20   1.11  0.14   1.12    1.12  0.16   0.81   1.42  0.61 -0.01     0.38 0.03
ht2   14 20 160.75  4.96 161.50  160.81  5.19 150.00 170.00 20.00 -0.22    -0.11 1.11
wt2   15 20  58.30  9.70  56.00   57.06  5.19  43.00  88.00 45.00  1.67     3.92 2.17
tea2  16 20  16.55 22.32  10.50   12.38 15.57   0.00  91.00 91.00  2.20     5.85 4.99
cof2  17 20  22.30 19.63  21.00   20.12 20.76   0.00  68.00 68.00  0.67    -0.01 4.39
alc2  18 20   5.50  9.78   0.50    2.94  0.74   0.00  35.00 35.00  2.24     4.59 2.19
cur2  19 20   5.30  9.26   0.00    3.00  0.00   0.00  30.00 30.00  1.93     2.92 2.07
men2  20 20   0.55  0.69   0.00    0.44  0.00   0.00   2.00  2.00  0.89    -0.24 0.15
pyr2  21 20  26.79 17.05  20.75   25.64 15.57   5.00  64.50 59.50  0.65    -0.67 3.81
ls2   22 20   0.77  0.17   0.84    0.78  0.12   0.47   0.98  0.51 -0.63    -1.00 0.04
fn2   23 20   0.66  0.12   0.68    0.67  0.11   0.38   0.87  0.49 -0.55     0.00 0.03
fs2   24 20   1.06  0.18   1.06    1.07  0.19   0.64   1.33  0.69 -0.71     0.30 0.04
## item name
## item number
## number of valid cases
## mean
## standard deviation
## median
## mad: median absolute deviation (from the median)
## minimum
## maximum
## skew
## standard error

Trimmed mean

## Density plot of age (outliers of the higher side)
library(lattice)
densityplot(bone$age)

plot of chunk unnamed-chunk-6


## mean is affected by the outliers
mean(bone$age)
[1] 48.85

## trimmed mean: top 10% and bottom 10% of observations are removed before mean
mean(bone$age, trim = 0.1)
[1] 47.94

## trimmed mean is closer to median (less affected by the outliers)
median(bone$age)
[1] 47

Median absolute distance

Compute the median absolute deviation, i.e., the (lo-/hi-) median of the absolute deviations from the median, and (by default) adjust by a factor for asymptotically normal consistency.

mad(bone$age)
[1] 8.896