Test 1
# the command “rleid” is in the data.table R package
# rle stands for “Run Length Encoding”
cat <- c(1, 2, 2, 2, 3, 3, 3,3,3,4,4,4, 4,4,5, 5, 6)
cat <- LETTERS[cat]
cat
## [1] "A" "B" "B" "B" "C" "C" "C" "C" "C" "D" "D" "D" "D" "D" "E" "E" "F"
num <- runif(length(cat),10,20) %>% round(2)
myDF <- data.frame(cat,num)
myDF
## cat num
## 1 A 18.91
## 2 B 17.74
## 3 B 15.96
## 4 B 13.37
## 5 C 14.43
## 6 C 18.72
## 7 C 12.75
## 8 C 19.64
## 9 C 12.01
## 10 D 16.75
## 11 D 19.69
## 12 D 13.80
## 13 D 14.22
## 14 D 16.02
## 15 E 10.98
## 16 E 10.28
## 17 F 13.79
myDF <- myDF %>% mutate(group=rleid(cat)) %>% group_by(cat) %>% mutate(index=row_number())
myDF
## # A tibble: 17 x 4
## # Groups: cat [6]
## cat num group index
## <chr> <dbl> <int> <int>
## 1 A 18.9 1 1
## 2 B 17.7 2 1
## 3 B 16.0 2 2
## 4 B 13.4 2 3
## 5 C 14.4 3 1
## 6 C 18.7 3 2
## 7 C 12.8 3 3
## 8 C 19.6 3 4
## 9 C 12.0 3 5
## 10 D 16.8 4 1
## 11 D 19.7 4 2
## 12 D 13.8 4 3
## 13 D 14.2 4 4
## 14 D 16.0 4 5
## 15 E 11.0 5 1
## 16 E 10.3 5 2
## 17 F 13.8 6 1
DT = data.table(grp=rep(c("A", "B", "C", "A", "B"), c(2,2,3,1,2)), value=1:10)
DT
## grp value
## 1: A 1
## 2: A 2
## 3: B 3
## 4: B 4
## 5: C 5
## 6: C 6
## 7: C 7
## 8: A 8
## 9: B 9
## 10: B 10
rleid(DT$grp) # get run-length ids
## [1] 1 1 2 2 3 3 3 4 5 5
rleidv(DT, "grp") # same as above
## [1] 1 1 2 2 3 3 3 4 5 5
rleid(DT$grp, prefix="grp") # prefix with 'grp'
## [1] "grp1" "grp1" "grp2" "grp2" "grp3" "grp3" "grp3" "grp4" "grp5" "grp5"
# get sum of value over run-length groups
DT[, sum(value), by=.(grp, rleid(grp))]
## grp rleid V1
## 1: A 1 3
## 2: B 2 7
## 3: C 3 18
## 4: A 4 8
## 5: B 5 19
DT[, sum(value), by=.(grp, rleid(grp, prefix="grp"))]
## grp rleid V1
## 1: A grp1 3
## 2: B grp2 7
## 3: C grp3 18
## 4: A grp4 8
## 5: B grp5 19