Find the #Group

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 3.4.3

## -- Attaching packages ---------------------------------------------------------------- tidyverse 1.2.1 --

## v ggplot2 2.2.1     v purrr   0.2.4
## v tibble  1.4.2     v dplyr   0.7.4
## v tidyr   0.7.2     v stringr 1.3.0
## v readr   1.1.1     v forcats 0.2.0

## Warning: package 'ggplot2' was built under R version 3.4.4

## Warning: package 'tibble' was built under R version 3.4.3

## Warning: package 'tidyr' was built under R version 3.4.3

## Warning: package 'purrr' was built under R version 3.4.3

## Warning: package 'dplyr' was built under R version 3.4.3

## Warning: package 'stringr' was built under R version 3.4.4

## -- Conflicts ------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Create the dataframe

Here is the dataframe with weird group inserts!

# create a dataframe for reprex
df <- data.frame(col1 = c("#GROUP 1", 1, 4, "#GROUP 2", 7, 10, "#GROUP 3", 13, 16),
                col2 = c(NA, 2, 5, NA, 8, 11, NA, 14, 17),
                col3 = c(NA, 3, 6, NA, 9, 12, NA, 15, 18))

df

find where the groups are

We grep to find where the rows are and then combine into a dataframe that tells us what group corresponds to top and bottom row for each of the groups. In this case, group1 is rows 2 and 3 (because #GROUP1 is at row 1, and #GROUP4 is at row 4).

# get a string that identifies row location of the `#GROUP`
group_range <- grep("#GROUP", df$col1)

# what rows have group in them?
group_range

## [1] 1 4 7

Combine into a dataframe

This lets us see where the matches of group_number are with the range of rows.

# string with group numbers
group_number <- 1:length(group_range)

# combine the group_number with group_range
group_df <- data.frame(group_number, group_range)
group_df

Function for finding top and bottow rows for each of the groups

This spits out what the rows of actual data are for each of the groups.

find_range_fun <- function(top_group, bottom_group, df_name) {
    print(paste0("The top number is ", df_name[top_group, 2] + 1))
    print(paste0("The bottom number is ", df_name[bottom_group, 2] - 1))
}

Test the function

test the function for group 1

# test function for group 1 (between just below #GROUP 1 and just above #GROUP 2)
find_range_fun(top_group = 1,
               bottom_group = 2, 
               group_df)

## [1] "The top number is 2"
## [1] "The bottom number is 3"

and for group 2.

# test function for group 2 (between just below #GROUP 2 and just above #GROUP 3)
find_range_fun(2, 3, group_df)

## [1] "The top number is 5"
## [1] "The bottom number is 6"

Function for subsetting original dataframe

This dataframe grabs each of the subsets.

# create a function that combines between subsets of rows
find_fun <- function(row_top, row_bottom, grp_num, df_name, rep_length){
    cbind(df_name[row_top:row_bottom, ], group_var = rep(grp_num, rep_length))
}

Testing the function

This works for group 1 and group 2.

#testing the function
group_1_subset <- find_fun(row_top = 2, 
         row_bottom = 3, 
         grp_num = 1, 
         df_name = df, 
         rep_length = 2)
group_1_subset

#testing the function
group_2_subset <- find_fun(row_top = 5, 
         row_bottom = 6, 
         grp_num = 2, 
         df_name = df, 
         rep_length = 2)
group_2_subset

Look at raw DF

# compare those results to raw df
df

Compared to the swapped data.

rbind(group_1_subset, group_2_subset)