2024 Data Prep Challenge W7

Loading In Packages

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.2.0     ✔ readr     2.2.0
✔ forcats   1.0.1     ✔ stringr   1.6.0
✔ ggplot2   4.0.2     ✔ tibble    3.3.1
✔ lubridate 1.9.5     ✔ tidyr     1.3.2
✔ purrr     1.2.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)

Reading in the Input Data - List of couples and Relationship start date

Valentine_s_Input<- read_excel("Valentine's Preppin' Data.xlsx")
Valentine_s_Input
# A tibble: 10 × 2
   Couple        `Relationship Start`
   <chr>         <chr>               
 1 The Loves     January 15, 2021    
 2 The Roses     October 8, 1964     
 3 The Harts     May 28, 2018        
 4 The Darlings  December 3, 2017    
 5 The Doves     August 21, 1994     
 6 The Archers   February 12, 2020   
 7 The Potters   November 4, 2015    
 8 The Bakers    April 17, 1989      
 9 The Gardeners June 9, 1974        
10 The Lovelaces September 30, 2009  
Gifts_LookUp <- read_excel("Valentine's Preppin' Data.xlsx", sheet = "Gifts")
Gifts_LookUp
# A tibble: 20 × 2
   Year  Gift         
   <chr> <chr>        
 1 1st   Paper        
 2 2nd   Cotton       
 3 3rd   Leather      
 4 4th   Fruit/Flowers
 5 5th   Wood         
 6 6th   Iron         
 7 7th   Copper/Wool  
 8 8th   Bronze       
 9 9th   Pottery      
10 10th  Aluminium/Tin
11 15th  Crystal      
12 20th  China        
13 25th  Silver       
14 30th  Pearl        
15 35th  Coral        
16 40th  Ruby         
17 45th  Sapphire     
18 50th  Gold         
19 55th  Emerald      
20 60th  Diamond      

Step #1 - Convert Relationship Start Field into a Date Field

Valentine_Input <- Valentine_s_Input|> 
  dplyr::mutate(
    `Relationship Start` = as.Date(`Relationship Start`, format = "%B %d,%Y" )
  )
# Parameters within as.date([field/ string you want to convert],[The formatof the field/ string] )

Step #2 - Creating a Today’s Date Field

Valentine_Input <- Valentine_Input |> 
    dplyr::mutate(
      Todays_Date = as.Date("14/02/2024", format = "%d/%m/%Y")
    )

Step #3 - Raw Years together Count

#Finding the Raw Number of years between start and end date. 
Valentine_Input <- Valentine_Input |> 
  dplyr::mutate(
  Raw_NumberOfYrs = year( `Todays_Date`) - year(`Relationship Start`) 
  )

Step #4 - Started Before V day

Since we are trying to get how many V Days a couple has spent together we have to factor in the specific day the couples started dating.

##Adding 1 to Row difference if they started Dating before the 14/02 in their start year
#Valentine_Input |> 
 # dplyr::mutate(
  #  Actual_NumberYrs =
    #  if(month(`Relationship Start`) < 2) { Raw_NumberOfYrs + 1}   
    #  else if( month(`Relationship Start`) == 2 & day(`Relationship Start`) <= 14){ Raw_NumberOfYrs + 1}
     # else {Raw_NumberOfYrs}
  #)
#In R, a standard if statement is not "vectorized"—it can only handle one true/false value at a time. Since you are inside mutate(), you are dealing with a whole column of dates. To handle this, you should use dplyr::case_when() or ifelse().
 Valentine_Input <-  Valentine_Input |>
  dplyr::mutate(
    Actual_NumberYrs = dplyr::case_when(
      month(`Relationship Start`) < 2 ~ Raw_NumberOfYrs + 1,
      month(`Relationship Start`) == 2 & day(`Relationship Start`) <= 14 ~ Raw_NumberOfYrs + 1,
      .default = Raw_NumberOfYrs
    )
  )

Step #5 - Appending Text onto Field

Adding ‘th’ to the Actual_NumberYrs so that we can join to the look up table.

 # Valentine_Input |> 
 # dplyr::mutate(
 #   Actual_NumberYrs = 
 #    as.character(Actual_NumberYrs) + " "+"th"
 # )
  
# In many softwares // other coding languages the + can be used to concatnate onto a string if both are are the same data type, However in R, '+' is strictly for Maths, so we have to use a function to concatenate strings together. 
 Valentine_Input <- Valentine_Input |> 
  dplyr::mutate(
    Actual_NumberYrs = 
     str_c(Actual_NumberYrs, "th")
  ) |> 
   dplyr::select(
     Couple, `Relationship Start`, Todays_Date , Actual_NumberYrs
   )

Step 6 - Joining The Lookup Table

Now we can join the two Tables together!

Week7Output <- 
  Valentine_Input |> 
  dplyr::inner_join(Gifts_LookUp, by = c("Actual_NumberYrs" = "Year")) |> 
  #Selecting the output Fields 
  dplyr::select(
    Couple, Actual_NumberYrs , Gift
  )

🥳 Week 7 Output Complete 🥳