First difference

dat = data.frame(group=rep(c(1,2), each=3), value=c(10, 20, 25, 5, 10, 12))
dat

##   group value
## 1     1    10
## 2     1    20
## 3     1    25
## 4     2     5
## 5     2    10
## 6     2    12

Using dplyr to calculate first diff

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

df = dat %>% group_by(group) %>% mutate(diff = value-lag(value))

df

## # A tibble: 6 x 3
## # Groups:   group [2]
##   group value  diff
##   <dbl> <dbl> <dbl>
## 1     1    10    NA
## 2     1    20    10
## 3     1    25     5
## 4     2     5    NA
## 5     2    10     5
## 6     2    12     2

Calculate difference from baseline value

library(plyr)

## -------------------------------------------------------------------------

## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)

## -------------------------------------------------------------------------

## 
## Attaching package: 'plyr'

## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize

dd = ddply(dat, .(group), transform, change=value-value[1])
dd

##   group value change
## 1     1    10      0
## 2     1    20     10
## 3     1    25     15
## 4     2     5      0
## 5     2    10      5
## 6     2    12      7

Descriptive analysis by group

library(dplyr)
dat = data.frame(ID=rep(1:3, 3), var_1=rnorm(9), var_2=rnorm(9), var_3=rnorm(9))
dat

##   ID       var_1      var_2       var_3
## 1  1 -0.14739583 -0.4306836 -0.58153697
## 2  2  0.15094836 -2.0792531 -0.95066923
## 3  3  0.07662893  2.0491784  0.04407715
## 4  1  0.22336567  1.5970724 -0.33122437
## 5  2  0.08356196 -0.8409474 -0.34459661
## 6  3  1.47870567 -2.0511762 -1.23637334
## 7  1  1.18083213  0.5745588 -0.57190991
## 8  2 -1.03019609 -0.4572041  0.58422783
## 9  3 -0.59111929 -1.2593005  0.06166539

df = dat %>% group_by(ID) %>% summarise_each(funs(length, mean, sd))

## Warning: funs() is soft deprecated as of dplyr 0.8.0
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once per session.

df

## # A tibble: 3 x 10
##      ID var_1_length var_2_length var_3_length var_1_mean var_2_mean
##   <int>        <int>        <int>        <int>      <dbl>      <dbl>
## 1     1            3            3            3      0.419      0.580
## 2     2            3            3            3     -0.265     -1.13 
## 3     3            3            3            3      0.321     -0.420
## # … with 4 more variables: var_3_mean <dbl>, var_1_sd <dbl>,
## #   var_2_sd <dbl>, var_3_sd <dbl>

Data manipulation first diff

T Nguyen

1/6/2020

First difference

Using dplyr to calculate first diff

Calculate difference from baseline value

Descriptive analysis by group