# Load libraries

library(tidyverse)

First we’ll create some random data

# Create df1
df1 <- data.frame(id = c(1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4),
                  date = c(1, 2, 4, 5, 2, 3, 6, 2, 4, 6, 7, 8, 2, 6, 7),
                  water = round(rnorm(n = 15, mean = .5, sd = .1), 2))

# Create Df2
df2 <- data.frame(id = c(1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4),
                  date = c(1, 4, 5, 8, 1, 2, 7, 1, 2, 3, 5, 8, 2, 3, 7),
                  compliance = round(rnorm(n = 15, mean = .5, sd = .1), 2))

Here are the results:

knitr::kable(df1)
id date water
1 1 0.54
1 2 0.48
1 4 0.54
1 5 0.42
2 2 0.47
2 3 0.33
2 6 0.38
3 2 0.51
3 4 0.52
3 6 0.59
3 7 0.61
3 8 0.50
4 2 0.48
4 6 0.58
4 7 0.48
knitr::kable(df2)
id date compliance
1 1 0.61
1 4 0.37
1 5 0.37
1 8 0.35
2 1 0.43
2 2 0.69
2 7 0.47
3 1 0.54
3 2 0.57
3 3 0.58
3 5 0.62
3 8 0.59
4 2 0.39
4 3 0.42
4 7 0.61

Our goal is to add matching values of compliance in df2 to df1 using a ‘fuzzy’ matching rule.

First, we’ll add some dummy columns to df1 and df2.

# First, add compliance = NA column to df 1

df1 <- df1 %>%
  mutate(
    compliance = NA,
    compliance_matched = 0,
    compliance_datediff = NA
  )

# Now add matched = 0 column to df2 

df2 <- df2 %>%
  mutate(
    compliance_matched = 0
  )

Here’s what we’ll do

Loop 1 – Look for exact matches on id and date

Run loop 1

for(i in 1:nrow(df1)) {
  
  # Get id of current row
  id.i <- df1$id[i]
  
  # Get date of current row
  date.i <- df1$date[i]
  
  # Look for exact match in df2 (and where there hasn't been a match)
  df2$match <- with(df2, id == id.i & date == date.i & compliance_matched == 0)
  
  # If there are any matches...
  if(any(df2$match)) {
    
    # Stop if there are more than 1 matches (should never happen)
    if(sum(any(df2$match)) > 1) {
      
      stop("More than 1 identical match!")}
    
    # Get the row number (match_i) of the match in df2
    match_i <- which(df2$match)
    
    # Get compliance
    compliance.i <- df2$compliance[match_i]
    
    # Add compliance and date_diff data to df1
    df1$compliance[i] <- compliance.i
    df1$compliance_datediff[i] <- 0 # 0 because there is a perfect match

    # Indicate that both have been matched
    
    df1$compliance_matched[i] <- 1
    df2$compliance_matched[match_i] <- 1

  }
}

# remove df2$match from df2
df2 <- df2 %>% select(-match)

Here is the result after running loop 1

knitr::kable(df1)
id date water compliance compliance_matched compliance_datediff
1 1 0.54 0.61 1 0
1 2 0.48 NA 0 NA
1 4 0.54 0.37 1 0
1 5 0.42 0.37 1 0
2 2 0.47 0.69 1 0
2 3 0.33 NA 0 NA
2 6 0.38 NA 0 NA
3 2 0.51 0.57 1 0
3 4 0.52 NA 0 NA
3 6 0.59 NA 0 NA
3 7 0.61 NA 0 NA
3 8 0.50 0.59 1 0
4 2 0.48 0.39 1 0
4 6 0.58 NA 0 NA
4 7 0.48 0.61 1 0

Loop 2 – Look for exact matches on id and ‘close’ matches on date

# Loop over rows in df1
for(i in 1:nrow(df1)) {
  
  # Get id of current row
  id.i <- df1$id[i]
  
  # Get date of current row
  date.i <- df1$date[i]
  
  # Only do the following if the row has not already been matched
  if(df1$compliance_matched[i] == 0) {
  
  # Add difference between date.i and date in df2
  df2 <- df2 %>% mutate(date_dev = date - date.i)
  
  # Look for fuzzy match in df 2
  #  id is the same, but abs(date_dev) must be less than 7
  df2$match <- with(df2, id == id.i & abs(date_dev) <= 7 & compliance_matched == 0)
  
  # If there are any matches...
  if(any(df2$match)) {
    
    # Which row has a match AND has the lowest date deviation of all matches
    match_i <- which(with(df2, match & abs(date_dev) == min(abs(date_dev[match]))))
    
    # In case there are 2 matches, take the first
    match_i <- match_i[1]
  
    # Get compliance
    compliance.i <- df2$compliance[match_i]
    
    # Get date difference for match
    datediff.i <- abs(df2$date_dev[match_i])
    
    # Add compliance and date difference data to df1
    df1$compliance[i] <- compliance.i
    df1$compliance_datediff[i] <- abs(df2$date_dev[match_i])
    
    # Indicate that both have been matched
    df1$compliance_matched[i] <- 1
    df2$compliance_matched[match_i] <- 1
    
  }
  }
}

# remove compliance matched from df1
df1 <- select(df1, -compliance_matched)

Here is the final result!

knitr::kable(df1)
id date water compliance compliance_datediff
1 1 0.54 0.61 0
1 2 0.48 0.35 6
1 4 0.54 0.37 0
1 5 0.42 0.37 0
2 2 0.47 0.69 0
2 3 0.33 0.43 2
2 6 0.38 0.47 1
3 2 0.51 0.57 0
3 4 0.52 0.58 1
3 6 0.59 0.62 1
3 7 0.61 0.54 6
3 8 0.50 0.59 0
4 2 0.48 0.39 0
4 6 0.58 0.42 3
4 7 0.48 0.61 0