library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.4.3
## -- Attaching packages ---------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1 v purrr 0.2.4
## v tibble 1.4.2 v dplyr 0.7.4
## v tidyr 0.7.2 v stringr 1.3.0
## v readr 1.1.1 v forcats 0.2.0
## Warning: package 'ggplot2' was built under R version 3.4.4
## Warning: package 'tibble' was built under R version 3.4.3
## Warning: package 'tidyr' was built under R version 3.4.3
## Warning: package 'purrr' was built under R version 3.4.3
## Warning: package 'dplyr' was built under R version 3.4.3
## Warning: package 'stringr' was built under R version 3.4.4
## -- Conflicts ------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Here is the dataframe with weird group inserts!
# create a dataframe for reprex
df <- data.frame(col1 = c("#GROUP 1", 1, 4, "#GROUP 2", 7, 10, "#GROUP 3", 13, 16),
col2 = c(NA, 2, 5, NA, 8, 11, NA, 14, 17),
col3 = c(NA, 3, 6, NA, 9, 12, NA, 15, 18))
df
We grep
to find where the rows are and then combine into a dataframe that tells us what group corresponds to top and bottom row for each of the groups. In this case, group1 is rows 2 and 3 (because #GROUP1 is at row 1, and #GROUP4 is at row 4).
# get a string that identifies row location of the `#GROUP`
group_range <- grep("#GROUP", df$col1)
# what rows have group in them?
group_range
## [1] 1 4 7
This lets us see where the matches of group_number are with the range of rows.
# string with group numbers
group_number <- 1:length(group_range)
# combine the group_number with group_range
group_df <- data.frame(group_number, group_range)
group_df
This spits out what the rows of actual data are for each of the groups.
find_range_fun <- function(top_group, bottom_group, df_name) {
print(paste0("The top number is ", df_name[top_group, 2] + 1))
print(paste0("The bottom number is ", df_name[bottom_group, 2] - 1))
}
test the function for group 1
# test function for group 1 (between just below #GROUP 1 and just above #GROUP 2)
find_range_fun(top_group = 1,
bottom_group = 2,
group_df)
## [1] "The top number is 2"
## [1] "The bottom number is 3"
and for group 2.
# test function for group 2 (between just below #GROUP 2 and just above #GROUP 3)
find_range_fun(2, 3, group_df)
## [1] "The top number is 5"
## [1] "The bottom number is 6"
This dataframe grabs each of the subsets.
# create a function that combines between subsets of rows
find_fun <- function(row_top, row_bottom, grp_num, df_name, rep_length){
cbind(df_name[row_top:row_bottom, ], group_var = rep(grp_num, rep_length))
}
This works for group 1 and group 2.
#testing the function
group_1_subset <- find_fun(row_top = 2,
row_bottom = 3,
grp_num = 1,
df_name = df,
rep_length = 2)
group_1_subset
#testing the function
group_2_subset <- find_fun(row_top = 5,
row_bottom = 6,
grp_num = 2,
df_name = df,
rep_length = 2)
group_2_subset
# compare those results to raw df
df
rbind(group_1_subset, group_2_subset)