Data Manipulation with actual examples

## Load datasets package full of data!
library(datasets)

## List all the datasets (suppressed due to too much output)
## data(package = "datasets")

## You have already seen vectors.


## Array 4-dimensional example
data(Titanic)
Titanic
, , Age = Child, Survived = No

      Sex
Class  Male Female
  1st     0      0
  2nd     0      0
  3rd    35     17
  Crew    0      0

, , Age = Adult, Survived = No

      Sex
Class  Male Female
  1st   118      4
  2nd   154     13
  3rd   387     89
  Crew  670      3

, , Age = Child, Survived = Yes

      Sex
Class  Male Female
  1st     5      1
  2nd    11     13
  3rd    13     14
  Crew    0      0

, , Age = Adult, Survived = Yes

      Sex
Class  Male Female
  1st    57    140
  2nd    14     80
  3rd    75     76
  Crew  192     20

## Extraction of first stratum results in a matrix (2-dimensional array)
stratum1 <- Titanic[,,"Child","No"]
stratum1
      Sex
Class  Male Female
  1st     0      0
  2nd     0      0
  3rd    35     17
  Crew    0      0

## List example
data(Harman23.cor)
Harman23.cor
$cov
               height arm.span forearm lower.leg weight bitro.diameter chest.girth chest.width
height          1.000    0.846   0.805     0.859  0.473          0.398       0.301       0.382
arm.span        0.846    1.000   0.881     0.826  0.376          0.326       0.277       0.415
forearm         0.805    0.881   1.000     0.801  0.380          0.319       0.237       0.345
lower.leg       0.859    0.826   0.801     1.000  0.436          0.329       0.327       0.365
weight          0.473    0.376   0.380     0.436  1.000          0.762       0.730       0.629
bitro.diameter  0.398    0.326   0.319     0.329  0.762          1.000       0.583       0.577
chest.girth     0.301    0.277   0.237     0.327  0.730          0.583       1.000       0.539
chest.width     0.382    0.415   0.345     0.365  0.629          0.577       0.539       1.000

$center
[1] 0 0 0 0 0 0 0 0

$n.obs
[1] 305

## Named elements can be extracted with $ operator
Harman23.cor$center
[1] 0 0 0 0 0 0 0 0

## Data Frame example: Similar to matrix but each vector (column) can hold different variables
## Elements are named and can be accessed by $ operator
data(esoph)
head(esoph, 20)
   agegp     alcgp    tobgp ncases ncontrols
1  25-34 0-39g/day 0-9g/day      0        40
2  25-34 0-39g/day    10-19      0        10
3  25-34 0-39g/day    20-29      0         6
4  25-34 0-39g/day      30+      0         5
5  25-34     40-79 0-9g/day      0        27
6  25-34     40-79    10-19      0         7
7  25-34     40-79    20-29      0         4
8  25-34     40-79      30+      0         7
9  25-34    80-119 0-9g/day      0         2
10 25-34    80-119    10-19      0         1
11 25-34    80-119      30+      0         2
12 25-34      120+ 0-9g/day      0         1
13 25-34      120+    10-19      1         1
14 25-34      120+    20-29      0         1
15 25-34      120+      30+      0         2
16 35-44 0-39g/day 0-9g/day      0        60
17 35-44 0-39g/day    10-19      1        14
18 35-44 0-39g/day    20-29      0         7
19 35-44 0-39g/day      30+      0         8
20 35-44     40-79 0-9g/day      0        35

## Let's break a data frame into a list
esoph10 <- head(esoph, 10)
class(esoph10) <- "list"
esoph10
$agegp
 [1] 25-34 25-34 25-34 25-34 25-34 25-34 25-34 25-34 25-34 25-34
Levels: 25-34 < 35-44 < 45-54 < 55-64 < 65-74 < 75+

$alcgp
 [1] 0-39g/day 0-39g/day 0-39g/day 0-39g/day 40-79     40-79     40-79     40-79     80-119    80-119   
Levels: 0-39g/day < 40-79 < 80-119 < 120+

$tobgp
 [1] 0-9g/day 10-19    20-29    30+      0-9g/day 10-19    20-29    30+      0-9g/day 10-19   
Levels: 0-9g/day < 10-19 < 20-29 < 30+

$ncases
 [1] 0 0 0 0 0 0 0 0 0 0

$ncontrols
 [1] 40 10  6  5 27  7  4  7  2  1

attr(,"row.names")
 [1]  1  2  3  4  5  6  7  8  9 10