Introduction

Useful code for data management of group enumeration


Create some data

  l1<-2         
  l2<-2         
  l3<-2         
  l4<-2         
  rep<-2      
  N<-l1*l2*l3*l4*rep
 
  level1. <- (rep (1:(l1), each=rep*l2*l3*l4 )) 
  level2. <- (rep (1:(l1*l2), each=rep*l3*l4 ))
  level3. <- (rep (1:(l1*l2*l3), each=rep*l4 )) 
  level4. <- (rep (1:(l1*l2*l3*l4), each=rep ))
  reps<-rep(1:rep, times=N)
  d    <- data.frame(level1.,level2.,level3.,level4. )
  str(d)
'data.frame':   32 obs. of  4 variables:
 $ level1.: int  1 1 1 1 1 1 1 1 1 1 ...
 $ level2.: int  1 1 1 1 1 1 1 1 2 2 ...
 $ level3.: int  1 1 1 1 2 2 2 2 3 3 ...
 $ level4.: int  1 1 2 2 3 3 4 4 5 5 ...
  d
   level1. level2. level3. level4.
1        1       1       1       1
2        1       1       1       1
3        1       1       1       2
4        1       1       1       2
5        1       1       2       3
6        1       1       2       3
7        1       1       2       4
8        1       1       2       4
9        1       2       3       5
10       1       2       3       5
11       1       2       3       6
12       1       2       3       6
13       1       2       4       7
14       1       2       4       7
15       1       2       4       8
16       1       2       4       8
17       2       3       5       9
18       2       3       5       9
19       2       3       5      10
20       2       3       5      10
21       2       3       6      11
22       2       3       6      11
23       2       3       6      12
24       2       3       6      12
25       2       4       7      13
26       2       4       7      13
27       2       4       7      14
28       2       4       7      14
29       2       4       8      15
30       2       4       8      15
31       2       4       8      16
32       2       4       8      16

Running count within level1. variable

  ave(d$level1. , d$level1. ,  FUN = seq_along)  
 [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16
  as.vector(unlist(tapply(X=d$level1., INDEX=list( d$level1.), FUN=seq_along)))  
 [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16

Likewise running count within level2. variable

  ave(d$level2. , d$level2. ,  FUN = seq_along)  
 [1] 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8
  as.vector(unlist(tapply(X=d$level2., INDEX=list( d$level2.), FUN=seq_along)))  
 [1] 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8

Use MethComp::make.repl code to count replicates

  xx <- as.integer(factor(interaction(d$level1. ,d$level2., d$level3. )))
  as.vector(1:nrow(d) - tapply(1:nrow(d),  xx, min)[xx] + 1)
 [1] 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4 1 2 3 4

Running count of level3. within level2., as if crossed

    test<-aggregate(level3. ~ level2., data=d,  FUN = 
           function(x){rep(1:length(unique(x)), 
                           c(rle(x)$lengths )
           )}) 

   as.vector((unlist(t(test$level3.))) )
 [1] 1 1 1 1 2 2 2 2 1 1 1 1 2 2 2 2 1 1 1 1 2 2 2 2 1 1 1 1 2 2 2 2

Function to identify odd and even

  is.even <- function(x){ x %% 2 == 0 }   
  ifelse(is.even(d [,"level4."] )==TRUE,2,1)
 [1] 1 1 2 2 1 1 2 2 1 1 2 2 1 1 2 2 1 1 2 2 1 1 2 2 1 1 2 2 1 1 2 2

More

   interaction(d$level1. , d$level2. )
 [1] 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.1 1.2 1.2 1.2 1.2 1.2 1.2 1.2 1.2 2.3 2.3 2.3 2.3 2.3 2.3 2.3 2.3 2.4 2.4 2.4 2.4 2.4
[30] 2.4 2.4 2.4
Levels: 1.1 2.1 1.2 2.2 1.3 2.3 1.4 2.4
   factor(factor(d$level1.):factor(d$level2. ))   
 [1] 1:1 1:1 1:1 1:1 1:1 1:1 1:1 1:1 1:2 1:2 1:2 1:2 1:2 1:2 1:2 1:2 2:3 2:3 2:3 2:3 2:3 2:3 2:3 2:3 2:4 2:4 2:4 2:4 2:4
[30] 2:4 2:4 2:4
Levels: 1:1 1:2 2:3 2:4
   rep(seq_along( rle(d$level3.)$values ), times = rle(d$level3.)$lengths )
 [1] 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4 5 5 5 5 6 6 6 6 7 7 7 7 8 8 8 8
   cumsum(c(TRUE, d$level3.[-1]!=d$level3.[-length(d$level3.)]))
 [1] 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4 5 5 5 5 6 6 6 6 7 7 7 7 8 8 8 8

A long-winded approach

  fooz<-list()  # this is required 
  
  foo<-d
  foo$grp<-paste(foo$level1., foo$level2., sep=".") #var to identify groups
  J<-unique(foo$grp)
  k<-length(J)                                      #how many groups?
  
  #loop 
  for (i in 1:k) {{
    
    foo1<-foo[grep(J[i], foo$grp), ]                #select a group
    foo1$grps<-i                                    #count indicator for group
    fooz[[i]]<-foo1                                 #collect group 
    
  }
    tmp<-do.call(rbind, fooz)                       #bind the groups
    tmp$grp<-NULL                                   #clean up
    foo99<-data.frame(tmp)                          #output
  }
  
  
  foo99 
   level1. level2. level3. level4. grps
1        1       1       1       1    1
2        1       1       1       1    1
3        1       1       1       2    1
4        1       1       1       2    1
5        1       1       2       3    1
6        1       1       2       3    1
7        1       1       2       4    1
8        1       1       2       4    1
9        1       2       3       5    2
10       1       2       3       5    2
11       1       2       3       6    2
12       1       2       3       6    2
13       1       2       4       7    2
14       1       2       4       7    2
15       1       2       4       8    2
16       1       2       4       8    2
17       2       3       5       9    3
18       2       3       5       9    3
19       2       3       5      10    3
20       2       3       5      10    3
21       2       3       6      11    3
22       2       3       6      11    3
23       2       3       6      12    3
24       2       3       6      12    3
25       2       4       7      13    4
26       2       4       7      13    4
27       2       4       7      14    4
28       2       4       7      14    4
29       2       4       8      15    4
30       2       4       8      15    4
31       2       4       8      16    4
32       2       4       8      16    4

Couple more

  x2 <- by(d$level1., d$level2., sum)  #sum up d by r
  do.call(rbind,as.list(x2))
  [,1]
1    8
2    8
3   16
4   16
  x<-table(d$level3.)
  as.vector(unlist(x))
[1] 4 4 4 4 4 4 4 4
  names(x)
[1] "1" "2" "3" "4" "5" "6" "7" "8"

Computing Environment

sessionInfo()
R version 3.2.2 (2015-08-14)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 8 x64 (build 9200)

locale:
[1] LC_COLLATE=English_United Kingdom.1252  LC_CTYPE=English_United Kingdom.1252   
[3] LC_MONETARY=English_United Kingdom.1252 LC_NUMERIC=C                           
[5] LC_TIME=English_United Kingdom.1252    

attached base packages:
[1] grid      stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] MASS_7.3-44     xlsx_0.5.7      xlsxjars_0.6.1  rJava_0.9-7     gdata_2.17.0    tables_0.7.79   car_2.1-0      
 [8] foreign_0.8-66  xtable_1.7-4    rms_4.4-0       SparseM_1.7     Hmisc_3.17-0    ggplot2_1.0.1   Formula_1.2-1  
[15] survival_2.38-3 lattice_0.20-33 knitr_1.11     

loaded via a namespace (and not attached):
 [1] gtools_3.5.0        zoo_1.7-12          reshape2_1.4.1      splines_3.2.2       colorspace_1.2-6   
 [6] htmltools_0.2.6     yaml_2.1.13         mgcv_1.8-7          nloptr_1.0.4        RColorBrewer_1.1-2 
[11] multcomp_1.4-1      plyr_1.8.3          stringr_1.0.0       MatrixModels_0.4-1  munsell_0.4.2      
[16] gtable_0.1.2        mvtnorm_1.0-3       codetools_0.2-14    evaluate_0.8        latticeExtra_0.6-26
[21] quantreg_5.19       pbkrtest_0.4-2      parallel_3.2.2      TH.data_1.0-6       proto_0.3-10       
[26] Rcpp_0.12.1         acepack_1.3-3.3     scales_0.3.0        formatR_1.2.1       lme4_1.1-10        
[31] gridExtra_2.0.0     digest_0.6.8        stringi_0.5-5       polspline_1.1.12    tools_3.2.2        
[36] sandwich_2.3-4      magrittr_1.5        cluster_2.0.3       Matrix_1.2-2        minqa_1.2.4        
[41] rmarkdown_0.8       rpart_4.1-10        nnet_7.3-10         nlme_3.1-122       

This took 5.28 seconds to execute.