Test 1

# the command “rleid” is in the data.table R package
# rle stands for “Run Length Encoding”


cat <- c(1, 2, 2, 2, 3, 3, 3,3,3,4,4,4, 4,4,5, 5, 6)
cat <- LETTERS[cat]
cat
##  [1] "A" "B" "B" "B" "C" "C" "C" "C" "C" "D" "D" "D" "D" "D" "E" "E" "F"
num <- runif(length(cat),10,20) %>% round(2)
myDF <- data.frame(cat,num)
myDF
##    cat   num
## 1    A 18.91
## 2    B 17.74
## 3    B 15.96
## 4    B 13.37
## 5    C 14.43
## 6    C 18.72
## 7    C 12.75
## 8    C 19.64
## 9    C 12.01
## 10   D 16.75
## 11   D 19.69
## 12   D 13.80
## 13   D 14.22
## 14   D 16.02
## 15   E 10.98
## 16   E 10.28
## 17   F 13.79
myDF <- myDF %>% mutate(group=rleid(cat)) %>% group_by(cat) %>% mutate(index=row_number())
myDF
## # A tibble: 17 x 4
## # Groups:   cat [6]
##    cat     num group index
##    <chr> <dbl> <int> <int>
##  1 A      18.9     1     1
##  2 B      17.7     2     1
##  3 B      16.0     2     2
##  4 B      13.4     2     3
##  5 C      14.4     3     1
##  6 C      18.7     3     2
##  7 C      12.8     3     3
##  8 C      19.6     3     4
##  9 C      12.0     3     5
## 10 D      16.8     4     1
## 11 D      19.7     4     2
## 12 D      13.8     4     3
## 13 D      14.2     4     4
## 14 D      16.0     4     5
## 15 E      11.0     5     1
## 16 E      10.3     5     2
## 17 F      13.8     6     1
DT = data.table(grp=rep(c("A", "B", "C", "A", "B"), c(2,2,3,1,2)), value=1:10)
DT
##     grp value
##  1:   A     1
##  2:   A     2
##  3:   B     3
##  4:   B     4
##  5:   C     5
##  6:   C     6
##  7:   C     7
##  8:   A     8
##  9:   B     9
## 10:   B    10
rleid(DT$grp) # get run-length ids
##  [1] 1 1 2 2 3 3 3 4 5 5
rleidv(DT, "grp") # same as above
##  [1] 1 1 2 2 3 3 3 4 5 5
rleid(DT$grp, prefix="grp") # prefix with 'grp'
##  [1] "grp1" "grp1" "grp2" "grp2" "grp3" "grp3" "grp3" "grp4" "grp5" "grp5"
# get sum of value over run-length groups
DT[, sum(value), by=.(grp, rleid(grp))]
##    grp rleid V1
## 1:   A     1  3
## 2:   B     2  7
## 3:   C     3 18
## 4:   A     4  8
## 5:   B     5 19
DT[, sum(value), by=.(grp, rleid(grp, prefix="grp"))]
##    grp rleid V1
## 1:   A  grp1  3
## 2:   B  grp2  7
## 3:   C  grp3 18
## 4:   A  grp4  8
## 5:   B  grp5 19