Load data

crdf <- read.csv("/Users/ra/Library/CloudStorage/Box-Box/2024-Summer/Misleading Claims/Data/Sephora product data/02_processed_data/anti_aging_moisture_3.3.csv")
brand <- read.csv("/Users/ra/Library/CloudStorage/Box-Box/2024-Summer/Misleading Claims/Data/Brand Equity Survey/02 - processed data/brand_equity_construct.csv")

df <- merge(
  crdf,
  brand,
  by.x = "Brand.Name",
  by.y = "brand",
  all.x = TRUE  # Keep all rows from crdf, even if there's no match in brand
)

nobipart matching with avegrage brand equity measure

library(nbpMatching)

## Warning: package 'nbpMatching' was built under R version 4.3.3

library(dplyr)
library(tableone)

# Select relevant columns for matching
df_match <- df %>% select(Product.ID, Rating, Reviews, h_price, avg_brand_equity_ta)


# Check data summary
summary(df_match)

##   Product.ID            Rating         Reviews          h_price     
##  Length:41          Min.   :3.924   Min.   :  18.0   Min.   :22.00  
##  Class :character   1st Qu.:4.242   1st Qu.: 205.0   1st Qu.:49.00  
##  Mode  :character   Median :4.396   Median : 493.0   Median :60.00  
##                     Mean   :4.400   Mean   : 912.1   Mean   :59.12  
##                     3rd Qu.:4.566   3rd Qu.: 809.0   3rd Qu.:72.00  
##                     Max.   :4.765   Max.   :7357.0   Max.   :89.00  
##  avg_brand_equity_ta
##  Min.   :17.10      
##  1st Qu.:19.73      
##  Median :20.94      
##  Mean   :21.93      
##  3rd Qu.:23.23      
##  Max.   :34.15

# create distances
df.dist <- gendistance(df_match, idcol=1)
# create distancematrix object
df.mdm <- distancematrix(df.dist)
# create matches
df.match <- nonbimatch(df.mdm)

# review quality of matches
df.qom <- qom(df.dist$cov, df.match$matches)

assign.grp(df.match$matches,seed = 68)

##    Group1.ID Group1.Row Group2.ID Group2.Row  Distance treatment.grp
## 1    P455237          1   P503690         34 0.8152455             B
## 2    P507952          2   P440312         17 1.1519803             A
## 3    P433887          3   P432829         39 0.8606697             A
## 4    P503642          4   P454090         30 1.5663707             A
## 5    P509259          5   P467750          6 0.4955818             A
## 6    P467750          6   P509259          5 0.4955818             B
## 7    P472454          7   P433971         31 1.7823947             B
## 8    P467749          8   P509464         29 0.2906119             B
## 9    P511726          9   P501282         33 0.8437822             B
## 10   P500790         10   P503909         37 3.0273611             B
## 11   P500718         11   P500771         12 1.4565459             A
## 12   P500771         12   P500718         11 1.4565459             B
## 13   P504007         13   P393076         16 0.7639330             B
## 14   P427421         14 phantom42         42 0.0000000             A
## 15   P439926         15   P502656         21 1.2696684             B
## 16   P393076         16   P504007         13 0.7639330             A
## 17   P440312         17   P507952          2 1.1519803             B
## 18   P471037         18   P480447         19 0.2596045             B
## 19   P480447         19   P471037         18 0.2596045             A
## 20   P509828         20   P483699         38 0.8739744             B
## 21   P502656         21   P439926         15 1.2696684             A
## 22   P232906         22   P500138         41 0.7133054             B
## 23   P502197         23   P505739         35 0.6278739             A
## 24   P506691         24   P173652         36 0.8414059             A
## 25   P461948         25   P483664         26 0.9825537             A
## 26   P483664         26   P461948         25 0.9825537             B
## 27   P508186         27   P509690         28 0.5388116             A
## 28   P509690         28   P508186         27 0.5388116             B
## 29   P509464         29   P467749          8 0.2906119             A
## 30   P454090         30   P503642          4 1.5663707             B
## 31   P433971         31   P472454          7 1.7823947             A
## 32   P378852         32   P440307         40 1.4167662             B
## 33   P501282         33   P511726          9 0.8437822             A
## 34   P503690         34   P455237          1 0.8152455             A
## 35   P505739         35   P502197         23 0.6278739             B
## 36   P173652         36   P506691         24 0.8414059             B
## 37   P503909         37   P500790         10 3.0273611             A
## 38   P483699         38   P509828         20 0.8739744             A
## 39   P432829         39   P433887          3 0.8606697             B
## 40   P440307         40   P378852         32 1.4167662             A
## 41   P500138         41   P232906         22 0.7133054             A
## 42 phantom42         42   P427421         14 0.0000000             B

assignment <- assign.grp(df.match$matches,seed = 68)

# Merge assignment dataframe into df, keeping only treatment.grp column
df <- merge(
  x = df,                              # Target dataframe
  y = assignment[, c("Group1.ID", "treatment.grp")], # Source dataframe with only needed columns
  by.x = "Product.ID",                 # Column in df to match on
  by.y = "Group1.ID",                  # Column in assignment to match on
  all.x = TRUE                         # Keep all rows from df (left join)
)

# Rename the treatment.grp column to treatment_ta
names(df)[names(df) == "treatment.grp"] <- "treatment_ta"

# Recode treatment_ta: 'A' becomes 0, 'B' becomes 1
df$treatment_ta <- ifelse(df$treatment_ta == "A", 0, 
                         ifelse(df$treatment_ta == "B", 1, df$treatment_ta))

nobipart matching with avg_awareness_pa, avg_loyalty_pa, avg_quality_pa

# Select relevant columns for matching
df_match_2 <- df %>% select(Product.ID, Rating, Reviews, h_price, avg_awareness_pa, avg_loyalty_pa, avg_quality_pa)


# Check data summary
summary(df_match_2)

##   Product.ID            Rating         Reviews          h_price     
##  Length:41          Min.   :3.924   Min.   :  18.0   Min.   :22.00  
##  Class :character   1st Qu.:4.242   1st Qu.: 205.0   1st Qu.:49.00  
##  Mode  :character   Median :4.396   Median : 493.0   Median :60.00  
##                     Mean   :4.400   Mean   : 912.1   Mean   :59.12  
##                     3rd Qu.:4.566   3rd Qu.: 809.0   3rd Qu.:72.00  
##                     Max.   :4.765   Max.   :7357.0   Max.   :89.00  
##  avg_awareness_pa avg_loyalty_pa  avg_quality_pa 
##  Min.   : 6.519   Min.   :4.125   Min.   :6.042  
##  1st Qu.: 7.319   1st Qu.:5.300   1st Qu.:6.660  
##  Median : 8.400   Median :5.627   Median :6.915  
##  Mean   : 9.227   Mean   :5.743   Mean   :6.964  
##  3rd Qu.: 9.833   3rd Qu.:6.038   3rd Qu.:7.122  
##  Max.   :18.146   Max.   :7.771   Max.   :8.229

# create distances
df.dist_2 <- gendistance(df_match_2, idcol=1)
# create distancematrix object
df.mdm_2 <- distancematrix(df.dist_2)
# create matches
df.match_2 <- nonbimatch(df.mdm_2)

# review quality of matches
df.qom_2 <- qom(df.dist_2$cov, df.match_2$matches)

assign.grp(df.match_2$matches,seed = 68)

##    Group1.ID Group1.Row Group2.ID Group2.Row  Distance treatment.grp
## 1    P173652          1   P506691         34 1.4054847             B
## 2    P232906          2   P439926          9 1.2500112             A
## 3    P378852          3   P472454         18 2.2041778             A
## 4    P393076          4   P433971          8 1.5996962             A
## 5    P427421          5 phantom42         42 0.0000000             A
## 6    P432829          6   P440307         10 0.7226207             B
## 7    P433887          7   P503642         29 1.6256744             B
## 8    P433971          8   P393076          4 1.5996962             B
## 9    P439926          9   P232906          2 1.2500112             B
## 10   P440307         10   P432829          6 0.7226207             A
## 11   P440312         11   P508186         36 1.7172654             A
## 12   P454090         12   P504007         32 2.8147422             A
## 13   P455237         13   P507952         35 1.2574348             B
## 14   P461948         14   P500138         22 1.4652879             A
## 15   P467749         15   P509464         38 1.1388474             B
## 16   P467750         16   P509259         37 0.8206596             A
## 17   P471037         17   P480447         19 0.2879720             B
## 18   P472454         18   P378852          3 2.2041778             B
## 19   P480447         19   P471037         17 0.2879720             A
## 20   P483664         20   P503690         30 1.1052949             B
## 21   P483699         21   P502656         28 3.4860663             A
## 22   P500138         22   P461948         14 1.4652879             B
## 23   P500718         23   P500771         24 1.4739340             A
## 24   P500771         24   P500718         23 1.4739340             B
## 25   P500790         25   P503909         31 3.0327268             A
## 26   P501282         26   P511726         41 0.9991730             B
## 27   P502197         27   P505739         33 0.6826465             A
## 28   P502656         28   P483699         21 3.4860663             B
## 29   P503642         29   P433887          7 1.6256744             A
## 30   P503690         30   P483664         20 1.1052949             A
## 31   P503909         31   P500790         25 3.0327268             B
## 32   P504007         32   P454090         12 2.8147422             B
## 33   P505739         33   P502197         27 0.6826465             B
## 34   P506691         34   P173652          1 1.4054847             A
## 35   P507952         35   P455237         13 1.2574348             A
## 36   P508186         36   P440312         11 1.7172654             B
## 37   P509259         37   P467750         16 0.8206596             B
## 38   P509464         38   P467749         15 1.1388474             A
## 39   P509690         39   P509828         40 1.7762696             A
## 40   P509828         40   P509690         39 1.7762696             B
## 41   P511726         41   P501282         26 0.9991730             A
## 42 phantom42         42   P427421          5 0.0000000             B

assignment_2 <- assign.grp(df.match_2$matches,seed = 68)

# Merge assignment dataframe into df, keeping only treatment.grp column
df <- merge(
  x = df,                              # Target dataframe
  y = assignment_2[, c("Group1.ID", "treatment.grp")], # Source dataframe with only needed columns
  by.x = "Product.ID",                 # Column in df to match on
  by.y = "Group1.ID",                  # Column in assignment to match on
  all.x = TRUE                         # Keep all rows from df (left join)
)

# Rename the treatment.grp column to treatment_pa
names(df)[names(df) == "treatment.grp"] <- "treatment_pa"

# Recode treatment_pa: 'A' becomes 0, 'B' becomes 1
df$treatment_pa <- ifelse(df$treatment_pa == "A", 0, 
                         ifelse(df$treatment_pa == "B", 1, df$treatment_pa))

nobipart matching with avg_awareness_fs, avg_loyalty_fs, avg_quality_fs

# Select relevant columns for matching
df_match_3 <- df %>% select(Product.ID, Rating, Reviews, h_price, avg_awareness_fs, avg_loyalty_fs, avg_quality_fs)


# Check data summary
summary(df_match_3)

##   Product.ID            Rating         Reviews          h_price     
##  Length:41          Min.   :3.924   Min.   :  18.0   Min.   :22.00  
##  Class :character   1st Qu.:4.242   1st Qu.: 205.0   1st Qu.:49.00  
##  Mode  :character   Median :4.396   Median : 493.0   Median :60.00  
##                     Mean   :4.400   Mean   : 912.1   Mean   :59.12  
##                     3rd Qu.:4.566   3rd Qu.: 809.0   3rd Qu.:72.00  
##                     Max.   :4.765   Max.   :7357.0   Max.   :89.00  
##  avg_awareness_fs   avg_loyalty_fs     avg_quality_fs    
##  Min.   :-0.65362   Min.   :-0.49057   Min.   :-0.42576  
##  1st Qu.:-0.46302   1st Qu.:-0.13224   1st Qu.:-0.11423  
##  Median :-0.16516   Median :-0.03502   Median :-0.01529  
##  Mean   : 0.04832   Mean   : 0.01304   Mean   : 0.02737  
##  3rd Qu.: 0.23782   3rd Qu.: 0.10952   3rd Qu.: 0.10370  
##  Max.   : 2.37043   Max.   : 0.73928   Max.   : 0.69084

# create distances
df.dist_3 <- gendistance(df_match_3, idcol=1)
# create distancematrix object
df.mdm_3 <- distancematrix(df.dist_3)
# create matches
df.match_3 <- nonbimatch(df.mdm_3)

# review quality of matches
df.qom_3 <- qom(df.dist_3$cov, df.match_3$matches)

assign.grp(df.match_3$matches,seed = 68)

##    Group1.ID Group1.Row Group2.ID Group2.Row  Distance treatment.grp
## 1    P173652          1   P506691         34 1.4929742             B
## 2    P232906          2   P439926          9 1.2528062             A
## 3    P378852          3   P472454         18 2.1585636             A
## 4    P393076          4   P433971          8 1.5110983             A
## 5    P427421          5 phantom42         42 0.0000000             A
## 6    P432829          6   P440307         10 0.7188218             B
## 7    P433887          7   P503642         29 1.6262738             B
## 8    P433971          8   P393076          4 1.5110983             B
## 9    P439926          9   P232906          2 1.2528062             B
## 10   P440307         10   P432829          6 0.7188218             A
## 11   P440312         11   P508186         36 1.6607578             A
## 12   P454090         12   P504007         32 2.9046912             A
## 13   P455237         13   P507952         35 1.2506208             B
## 14   P461948         14   P500138         22 1.5478359             A
## 15   P467749         15   P509464         38 1.3006587             B
## 16   P467750         16   P509259         37 0.7574145             A
## 17   P471037         17   P480447         19 0.2915499             B
## 18   P472454         18   P378852          3 2.1585636             B
## 19   P480447         19   P471037         17 0.2915499             A
## 20   P483664         20   P503690         30 1.0801472             B
## 21   P483699         21   P502656         28 3.4373254             A
## 22   P500138         22   P461948         14 1.5478359             B
## 23   P500718         23   P500771         24 1.4809296             A
## 24   P500771         24   P500718         23 1.4809296             B
## 25   P500790         25   P503909         31 3.0467745             A
## 26   P501282         26   P511726         41 1.1483426             B
## 27   P502197         27   P505739         33 0.6851291             A
## 28   P502656         28   P483699         21 3.4373254             B
## 29   P503642         29   P433887          7 1.6262738             A
## 30   P503690         30   P483664         20 1.0801472             A
## 31   P503909         31   P500790         25 3.0467745             B
## 32   P504007         32   P454090         12 2.9046912             B
## 33   P505739         33   P502197         27 0.6851291             B
## 34   P506691         34   P173652          1 1.4929742             A
## 35   P507952         35   P455237         13 1.2506208             A
## 36   P508186         36   P440312         11 1.6607578             B
## 37   P509259         37   P467750         16 0.7574145             B
## 38   P509464         38   P467749         15 1.3006587             A
## 39   P509690         39   P509828         40 1.6914943             A
## 40   P509828         40   P509690         39 1.6914943             B
## 41   P511726         41   P501282         26 1.1483426             A
## 42 phantom42         42   P427421          5 0.0000000             B

assignment_3 <- assign.grp(df.match_3$matches,seed = 68)

# Merge assignment dataframe into df, keeping only treatment.grp column
df <- merge(
  x = df,                              # Target dataframe
  y = assignment_3[, c("Group1.ID", "treatment.grp")], # Source dataframe with only needed columns
  by.x = "Product.ID",                 # Column in df to match on
  by.y = "Group1.ID",                  # Column in assignment to match on
  all.x = TRUE                         # Keep all rows from df (left join)
)

# Rename the treatment.grp column to treatment_ta
names(df)[names(df) == "treatment.grp"] <- "treatment_fs"

# Recode treatment_fs: 'A' becomes 0, 'B' becomes 1
df$treatment_fs <- ifelse(df$treatment_fs == "A", 0, 
                         ifelse(df$treatment_fs == "B", 1, df$treatment_fs))

Check if the matching is balanced

https://cran.r-project.org/web/packages/MatchIt/vignettes/assessing-balance.html

# Load libraries
library(tableone)
library(randChecks)

## Warning: package 'randChecks' was built under R version 4.3.3

library(Matching)

## Warning: package 'Matching' was built under R version 4.3.3

## Loading required package: MASS

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

## ## 
## ##  Matching (Version 4.10-15, Build Date: 2024-10-14)
## ##  See https://www.jsekhon.com for additional documentation.
## ##  Please cite software as:
## ##   Jasjeet S. Sekhon. 2011. ``Multivariate and Propensity Score Matching
## ##   Software with Automated Balance Optimization: The Matching package for R.''
## ##   Journal of Statistical Software, 42(7): 1-52. 
## ##

library(ggplot2)
library(cobalt)

##  cobalt (Version 4.5.5, Build Date: 2024-04-02)

## 
## Attaching package: 'cobalt'

## The following object is masked from 'package:randChecks':
## 
##     lalonde

library(gridExtra)

## Warning: package 'gridExtra' was built under R version 4.3.3

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

# Define covariates to check balance
covariates <- c("Rating", "Reviews", "h_price", "avg_awareness_pa", 
                "avg_loyalty_pa", "avg_quality_pa", "avg_awareness_fs", 
                "avg_loyalty_fs", "avg_quality_fs", "avg_brand_equity_ta")

# Function to check balance for each treatment separately
check_balance <- function(df, treatment_var) {
  
  # Convert treatment to factor
  df[[treatment_var]] <- as.factor(df[[treatment_var]])
  
  # Compute Standardized Mean Differences (SMD)
  table1 <- CreateTableOne(vars = covariates, strata = treatment_var, data = df, test = FALSE)
  cat("\n### Standardized Mean Differences for", treatment_var, "###\n")
  smd_results <- print(table1, smd = TRUE)
  
  cat("\n### p-values for Mean Differences (t-test for continuous) for", treatment_var, "###\n")
  p_values <- c()
  
  for (var in covariates) {
    # Perform a t-test for continuous variables
    t_test <- t.test(df[[var]][df[[treatment_var]] == 1], df[[var]][df[[treatment_var]] == 0])
    p_values[var] <- t_test$p.value
    cat(var, ": p-value =", round(t_test$p.value, 3), "\n")
  }
  
  # Compute Variance Ratios
  cat("\n### Variance Ratios for", treatment_var, "###\n")
  for (var in covariates) {
    var_ratio <- var(df[[var]][df[[treatment_var]] == 1], na.rm = TRUE) / 
                 var(df[[var]][df[[treatment_var]] == 0], na.rm = TRUE)
    cat(var, ": Variance Ratio =", round(var_ratio, 3), "\n")
  }
  
  # Compute Empirical CDF Differences using Kolmogorov-Smirnov test
  cat("\n### Kolmogorov-Smirnov Test (eCDF) for", treatment_var, "###\n")
  for (var in covariates) {
    ks_test <- ks.test(df[[var]][df[[treatment_var]] == 1], df[[var]][df[[treatment_var]] == 0])
    cat(var, ": KS Statistic =", round(ks_test$statistic, 3), ", p-value =", round(ks_test$p.value, 3), "\n")
  }
}

# Function to generate density plots
plot_density <- function(df, treatment_var) {
  for (var in covariates) {
    density_plot <- ggplot(df, aes_string(x = var, fill = treatment_var)) +
      geom_density(alpha = 0.5) +
      labs(title = paste("Density Plot:", var, "by", treatment_var), x = var, y = "Density") +
      theme_minimal()
    
    print(density_plot)
  }
}

# qq plot function
plot_qq <- function(df, treatment_var) {
  for (var in covariates) {
    treated_values <- df[[var]][df[[treatment_var]] == 1]
    control_values <- df[[var]][df[[treatment_var]] == 0]
    
    # Find the minimum length between treated and control groups
    min_length <- min(length(treated_values), length(control_values))
    
    # Generate matching quantiles for both groups
    probs <- seq(0, 1, length.out = min_length)
    qq_data <- data.frame(
      control = quantile(control_values, probs = probs, na.rm = TRUE),
      treated = quantile(treated_values, probs = probs, na.rm = TRUE)
    )
    
    qq_plot <- ggplot(qq_data, aes(x = control, y = treated)) +
      geom_point(color = "blue") +
      geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "red") +
      labs(title = paste("QQ Plot:", var, "by", treatment_var),
           x = "Control Group Quantiles",
           y = "Treated Group Quantiles") +
      theme_minimal()
    
    print(qq_plot)
  }
}

check_balance(df, "treatment_ta")

## 
## ### Standardized Mean Differences for treatment_ta ###
##                                  Stratified by treatment_ta
##                                   0                 1               SMD   
##   n                                    21               20                
##   Rating (mean (SD))                 4.39 (0.22)      4.41 (0.22)    0.114
##   Reviews (mean (SD))             1027.10 (1628.77) 791.30 (848.92)  0.182
##   h_price (mean (SD))               60.24 (16.30)    57.95 (17.03)   0.137
##   avg_awareness_pa (mean (SD))       9.10 (2.69)      9.36 (3.29)    0.086
##   avg_loyalty_pa (mean (SD))         5.64 (0.68)      5.85 (0.77)    0.283
##   avg_quality_pa (mean (SD))         6.90 (0.44)      7.03 (0.51)    0.275
##   avg_awareness_fs (mean (SD))       0.01 (0.69)      0.09 (0.85)    0.112
##   avg_loyalty_fs (mean (SD))        -0.02 (0.23)      0.05 (0.27)    0.260
##   avg_quality_fs (mean (SD))        -0.01 (0.22)      0.06 (0.27)    0.286
##   avg_brand_equity_ta (mean (SD))   21.64 (3.65)     22.24 (4.48)    0.146
## 
## ### p-values for Mean Differences (t-test for continuous) for treatment_ta ###
## Rating : p-value = 0.717 
## Reviews : p-value = 0.563 
## h_price : p-value = 0.663 
## avg_awareness_pa : p-value = 0.784 
## avg_loyalty_pa : p-value = 0.372 
## avg_quality_pa : p-value = 0.385 
## avg_awareness_fs : p-value = 0.723 
## avg_loyalty_fs : p-value = 0.412 
## avg_quality_fs : p-value = 0.367 
## avg_brand_equity_ta : p-value = 0.644 
## 
## ### Variance Ratios for treatment_ta ###
## Rating : Variance Ratio = 0.915 
## Reviews : Variance Ratio = 0.272 
## h_price : Variance Ratio = 1.092 
## avg_awareness_pa : Variance Ratio = 1.496 
## avg_loyalty_pa : Variance Ratio = 1.304 
## avg_quality_pa : Variance Ratio = 1.374 
## avg_awareness_fs : Variance Ratio = 1.513 
## avg_loyalty_fs : Variance Ratio = 1.383 
## avg_quality_fs : Variance Ratio = 1.42 
## avg_brand_equity_ta : Variance Ratio = 1.507 
## 
## ### Kolmogorov-Smirnov Test (eCDF) for treatment_ta ###
## Rating : KS Statistic = 0.167 , p-value = 0.873 
## Reviews : KS Statistic = 0.205 , p-value = 0.701 
## h_price : KS Statistic = 0.126 , p-value = 0.961 
## avg_awareness_pa : KS Statistic = 0.176 , p-value = 0.781 
## avg_loyalty_pa : KS Statistic = 0.219 , p-value = 0.549 
## avg_quality_pa : KS Statistic = 0.26 , p-value = 0.364 
## avg_awareness_fs : KS Statistic = 0.176 , p-value = 0.782 
## avg_loyalty_fs : KS Statistic = 0.219 , p-value = 0.551 
## avg_quality_fs : KS Statistic = 0.217 , p-value = 0.547 
## avg_brand_equity_ta : KS Statistic = 0.221 , p-value = 0.506

plot_density(df, "treatment_ta")

## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

plot_qq(df, 'treatment_ta')

check_balance(df, "treatment_pa")

## 
## ### Standardized Mean Differences for treatment_pa ###
##                                  Stratified by treatment_pa
##                                   0                 1               SMD   
##   n                                    21               20                
##   Rating (mean (SD))                 4.41 (0.24)      4.39 (0.19)    0.053
##   Reviews (mean (SD))             1030.71 (1699.78) 787.50 (685.54)  0.188
##   h_price (mean (SD))               58.95 (15.37)    59.30 (17.99)   0.021
##   avg_awareness_pa (mean (SD))       9.32 (3.27)      9.13 (2.68)    0.066
##   avg_loyalty_pa (mean (SD))         5.75 (0.84)      5.73 (0.59)    0.022
##   avg_quality_pa (mean (SD))         6.97 (0.53)      6.95 (0.43)    0.042
##   avg_awareness_fs (mean (SD))       0.06 (0.85)      0.03 (0.69)    0.043
##   avg_loyalty_fs (mean (SD))         0.02 (0.29)      0.01 (0.21)    0.018
##   avg_quality_fs (mean (SD))         0.03 (0.27)      0.02 (0.22)    0.033
##   avg_brand_equity_ta (mean (SD))   22.05 (4.53)     21.82 (3.56)    0.057
## 
## ### p-values for Mean Differences (t-test for continuous) for treatment_pa ###
## Rating : p-value = 0.866 
## Reviews : p-value = 0.55 
## h_price : p-value = 0.947 
## avg_awareness_pa : p-value = 0.834 
## avg_loyalty_pa : p-value = 0.943 
## avg_quality_pa : p-value = 0.893 
## avg_awareness_fs : p-value = 0.89 
## avg_loyalty_fs : p-value = 0.955 
## avg_quality_fs : p-value = 0.916 
## avg_brand_equity_ta : p-value = 0.855 
## 
## ### Variance Ratios for treatment_pa ###
## Rating : Variance Ratio = 0.623 
## Reviews : Variance Ratio = 0.163 
## h_price : Variance Ratio = 1.371 
## avg_awareness_pa : Variance Ratio = 0.667 
## avg_loyalty_pa : Variance Ratio = 0.495 
## avg_quality_pa : Variance Ratio = 0.655 
## avg_awareness_fs : Variance Ratio = 0.653 
## avg_loyalty_fs : Variance Ratio = 0.504 
## avg_quality_fs : Variance Ratio = 0.664 
## avg_brand_equity_ta : Variance Ratio = 0.616 
## 
## ### Kolmogorov-Smirnov Test (eCDF) for treatment_pa ###
## Rating : KS Statistic = 0.188 , p-value = 0.753 
## Reviews : KS Statistic = 0.276 , p-value = 0.317 
## h_price : KS Statistic = 0.124 , p-value = 0.968 
## avg_awareness_pa : KS Statistic = 0.129 , p-value = 0.957 
## avg_loyalty_pa : KS Statistic = 0.238 , p-value = 0.415 
## avg_quality_pa : KS Statistic = 0.119 , p-value = 0.975 
## avg_awareness_fs : KS Statistic = 0.129 , p-value = 0.96 
## avg_loyalty_fs : KS Statistic = 0.186 , p-value = 0.706 
## avg_quality_fs : KS Statistic = 0.119 , p-value = 0.974 
## avg_brand_equity_ta : KS Statistic = 0.129 , p-value = 0.949

plot_density(df, "treatment_pa")

plot_qq(df, 'treatment_pa')

check_balance(df, "treatment_fs")

## 
## ### Standardized Mean Differences for treatment_fs ###
##                                  Stratified by treatment_fs
##                                   0                 1               SMD   
##   n                                    21               20                
##   Rating (mean (SD))                 4.41 (0.24)      4.39 (0.19)    0.053
##   Reviews (mean (SD))             1030.71 (1699.78) 787.50 (685.54)  0.188
##   h_price (mean (SD))               58.95 (15.37)    59.30 (17.99)   0.021
##   avg_awareness_pa (mean (SD))       9.32 (3.27)      9.13 (2.68)    0.066
##   avg_loyalty_pa (mean (SD))         5.75 (0.84)      5.73 (0.59)    0.022
##   avg_quality_pa (mean (SD))         6.97 (0.53)      6.95 (0.43)    0.042
##   avg_awareness_fs (mean (SD))       0.06 (0.85)      0.03 (0.69)    0.043
##   avg_loyalty_fs (mean (SD))         0.02 (0.29)      0.01 (0.21)    0.018
##   avg_quality_fs (mean (SD))         0.03 (0.27)      0.02 (0.22)    0.033
##   avg_brand_equity_ta (mean (SD))   22.05 (4.53)     21.82 (3.56)    0.057
## 
## ### p-values for Mean Differences (t-test for continuous) for treatment_fs ###
## Rating : p-value = 0.866 
## Reviews : p-value = 0.55 
## h_price : p-value = 0.947 
## avg_awareness_pa : p-value = 0.834 
## avg_loyalty_pa : p-value = 0.943 
## avg_quality_pa : p-value = 0.893 
## avg_awareness_fs : p-value = 0.89 
## avg_loyalty_fs : p-value = 0.955 
## avg_quality_fs : p-value = 0.916 
## avg_brand_equity_ta : p-value = 0.855 
## 
## ### Variance Ratios for treatment_fs ###
## Rating : Variance Ratio = 0.623 
## Reviews : Variance Ratio = 0.163 
## h_price : Variance Ratio = 1.371 
## avg_awareness_pa : Variance Ratio = 0.667 
## avg_loyalty_pa : Variance Ratio = 0.495 
## avg_quality_pa : Variance Ratio = 0.655 
## avg_awareness_fs : Variance Ratio = 0.653 
## avg_loyalty_fs : Variance Ratio = 0.504 
## avg_quality_fs : Variance Ratio = 0.664 
## avg_brand_equity_ta : Variance Ratio = 0.616 
## 
## ### Kolmogorov-Smirnov Test (eCDF) for treatment_fs ###
## Rating : KS Statistic = 0.188 , p-value = 0.753 
## Reviews : KS Statistic = 0.276 , p-value = 0.317 
## h_price : KS Statistic = 0.124 , p-value = 0.968 
## avg_awareness_pa : KS Statistic = 0.129 , p-value = 0.957 
## avg_loyalty_pa : KS Statistic = 0.238 , p-value = 0.415 
## avg_quality_pa : KS Statistic = 0.119 , p-value = 0.975 
## avg_awareness_fs : KS Statistic = 0.129 , p-value = 0.96 
## avg_loyalty_fs : KS Statistic = 0.186 , p-value = 0.706 
## avg_quality_fs : KS Statistic = 0.119 , p-value = 0.974 
## avg_brand_equity_ta : KS Statistic = 0.129 , p-value = 0.949

plot_density(df, "treatment_fs")

plot_qq(df, 'treatment_fs')

7a - product_randomization_brand_equity

Rui

2025-03-03

Load data

nobipart matching with avegrage brand equity measure

nobipart matching with avg_awareness_pa, avg_loyalty_pa, avg_quality_pa

nobipart matching with avg_awareness_fs, avg_loyalty_fs, avg_quality_fs

Check if the matching is balanced