Merging data

## Settings for RMarkdown http://yihui.name/knitr/options#chunk_options
opts_chunk$set(comment = "", warning = FALSE, message = FALSE, tidy = FALSE, 
    echo = T, fig.width = 5, fig.height = 5)
options(width = 150, scipen = 5, digits = 5)

Prepare data. In reality you just read in two datasets, so that is much simpler than this.

## Load package
library(survival)

## PBC data
head(pbc)

  id time status trt    age sex ascites hepato spiders edema bili chol albumin copper alk.phos    ast trig platelet protime stage
1  1  400      2   1 58.765   f       1      1       1   1.0 14.5  261    2.60    156   1718.0 137.95  172      190    12.2     4
2  2 4500      0   1 56.446   f       0      1       1   0.0  1.1  302    4.14     54   7394.8 113.52   88      221    10.6     3
3  3 1012      2   1 70.073   m       0      0       0   0.5  1.4  176    3.48    210    516.0  96.10   55      151    12.0     4
4  4 1925      2   1 54.741   f       0      1       1   0.5  1.8  244    2.54     64   6121.8  60.63   92      183    10.3     4
5  5 1504      1   2 38.105   f       0      1       1   0.0  3.4  279    3.53    143    671.0 113.15   72      136    10.9     3
6  6 2503      2   2 66.259   f       0      1       0   0.0  0.8  248    3.98     50    944.0  93.00   63       NA    11.0     3


## Split data intentionally
partA <- pbc[,c(1:10)]
partB <- pbc[,c(1,11:20)]

## Reduce part A by randomly sampling 200 out of 418 people
partA <- partA[sample(1:418, 200),]

## Resort
partA <- partA[order(partA$id),]

## Number of rows diffent now
lapply(list(partA, partB), nrow)

[[1]]
[1] 200

[[2]]
[1] 418


## Heading part differs now
lapply(list(partA, partB), head)

[[1]]
   id time status trt    age sex ascites hepato spiders edema
2   2 4500      0   1 56.446   f       0      1       1     0
5   5 1504      1   2 38.105   f       0      1       1     0
7   7 1832      0   2 55.535   f       0      1       0     0
8   8 2466      2   2 53.057   f       0      0       0     0
11 11 3762      2   2 53.714   f       0      1       1     0
15 15 3584      2   1 64.646   f       0      0       0     0

[[2]]
  id bili chol albumin copper alk.phos    ast trig platelet protime stage
1  1 14.5  261    2.60    156   1718.0 137.95  172      190    12.2     4
2  2  1.1  302    4.14     54   7394.8 113.52   88      221    10.6     3
3  3  1.4  176    3.48    210    516.0  96.10   55      151    12.0     4
4  4  1.8  244    2.54     64   6121.8  60.63   92      183    10.3     4
5  5  3.4  279    3.53    143    671.0 113.15   72      136    10.9     3
6  6  0.8  248    3.98     50    944.0  93.00   63       NA    11.0     3

Merge two datasets

merged <- merge(partA,
                partB,
                by = "id")
head(merged)

  id time status trt    age sex ascites hepato spiders edema bili chol albumin copper alk.phos    ast trig platelet protime stage
1  2 4500      0   1 56.446   f       0      1       1     0  1.1  302    4.14     54   7394.8 113.52   88      221    10.6     3
2  5 1504      1   2 38.105   f       0      1       1     0  3.4  279    3.53    143    671.0 113.15   72      136    10.9     3
3  7 1832      0   2 55.535   f       0      1       0     0  1.0  322    4.09     52    824.0  60.45  213      204     9.7     3
4  8 2466      2   2 53.057   f       0      0       0     0  0.3  280    4.00     52   4651.2  28.38  189      373    11.0     3
5 11 3762      2   2 53.714   f       0      1       1     0  1.4  259    4.16     46   1104.0  79.05   79      258    12.0     4
6 15 3584      2   1 64.646   f       0      0       0     0  0.8  231    3.87    173   9009.8 127.71   96      295    11.0     3