crdf <- read.csv("/Users/ra/Library/CloudStorage/Box-Box/2024-Summer/Misleading Claims/Data/Sephora product data/02_processed_data/anti_aging_moisture_3.3.csv")
brand <- read.csv("/Users/ra/Library/CloudStorage/Box-Box/2024-Summer/Misleading Claims/Data/Brand Equity Survey/02 - processed data/brand_equity_construct.csv")
df <- merge(
crdf,
brand,
by.x = "Brand.Name",
by.y = "brand",
all.x = TRUE # Keep all rows from crdf, even if there's no match in brand
)
library(nbpMatching)
## Warning: package 'nbpMatching' was built under R version 4.3.3
library(dplyr)
library(tableone)
# Select relevant columns for matching
df_match <- df %>% select(Product.ID, Rating, Reviews, h_price, avg_brand_equity_ta)
# Check data summary
summary(df_match)
## Product.ID Rating Reviews h_price
## Length:41 Min. :3.924 Min. : 18.0 Min. :22.00
## Class :character 1st Qu.:4.242 1st Qu.: 205.0 1st Qu.:49.00
## Mode :character Median :4.396 Median : 493.0 Median :60.00
## Mean :4.400 Mean : 912.1 Mean :59.12
## 3rd Qu.:4.566 3rd Qu.: 809.0 3rd Qu.:72.00
## Max. :4.765 Max. :7357.0 Max. :89.00
## avg_brand_equity_ta
## Min. :17.10
## 1st Qu.:19.73
## Median :20.94
## Mean :21.93
## 3rd Qu.:23.23
## Max. :34.15
# create distances
df.dist <- gendistance(df_match, idcol=1)
# create distancematrix object
df.mdm <- distancematrix(df.dist)
# create matches
df.match <- nonbimatch(df.mdm)
# review quality of matches
df.qom <- qom(df.dist$cov, df.match$matches)
assign.grp(df.match$matches,seed = 68)
## Group1.ID Group1.Row Group2.ID Group2.Row Distance treatment.grp
## 1 P455237 1 P503690 34 0.8152455 B
## 2 P507952 2 P440312 17 1.1519803 A
## 3 P433887 3 P432829 39 0.8606697 A
## 4 P503642 4 P454090 30 1.5663707 A
## 5 P509259 5 P467750 6 0.4955818 A
## 6 P467750 6 P509259 5 0.4955818 B
## 7 P472454 7 P433971 31 1.7823947 B
## 8 P467749 8 P509464 29 0.2906119 B
## 9 P511726 9 P501282 33 0.8437822 B
## 10 P500790 10 P503909 37 3.0273611 B
## 11 P500718 11 P500771 12 1.4565459 A
## 12 P500771 12 P500718 11 1.4565459 B
## 13 P504007 13 P393076 16 0.7639330 B
## 14 P427421 14 phantom42 42 0.0000000 A
## 15 P439926 15 P502656 21 1.2696684 B
## 16 P393076 16 P504007 13 0.7639330 A
## 17 P440312 17 P507952 2 1.1519803 B
## 18 P471037 18 P480447 19 0.2596045 B
## 19 P480447 19 P471037 18 0.2596045 A
## 20 P509828 20 P483699 38 0.8739744 B
## 21 P502656 21 P439926 15 1.2696684 A
## 22 P232906 22 P500138 41 0.7133054 B
## 23 P502197 23 P505739 35 0.6278739 A
## 24 P506691 24 P173652 36 0.8414059 A
## 25 P461948 25 P483664 26 0.9825537 A
## 26 P483664 26 P461948 25 0.9825537 B
## 27 P508186 27 P509690 28 0.5388116 A
## 28 P509690 28 P508186 27 0.5388116 B
## 29 P509464 29 P467749 8 0.2906119 A
## 30 P454090 30 P503642 4 1.5663707 B
## 31 P433971 31 P472454 7 1.7823947 A
## 32 P378852 32 P440307 40 1.4167662 B
## 33 P501282 33 P511726 9 0.8437822 A
## 34 P503690 34 P455237 1 0.8152455 A
## 35 P505739 35 P502197 23 0.6278739 B
## 36 P173652 36 P506691 24 0.8414059 B
## 37 P503909 37 P500790 10 3.0273611 A
## 38 P483699 38 P509828 20 0.8739744 A
## 39 P432829 39 P433887 3 0.8606697 B
## 40 P440307 40 P378852 32 1.4167662 A
## 41 P500138 41 P232906 22 0.7133054 A
## 42 phantom42 42 P427421 14 0.0000000 B
assignment <- assign.grp(df.match$matches,seed = 68)
# Merge assignment dataframe into df, keeping only treatment.grp column
df <- merge(
x = df, # Target dataframe
y = assignment[, c("Group1.ID", "treatment.grp")], # Source dataframe with only needed columns
by.x = "Product.ID", # Column in df to match on
by.y = "Group1.ID", # Column in assignment to match on
all.x = TRUE # Keep all rows from df (left join)
)
# Rename the treatment.grp column to treatment_ta
names(df)[names(df) == "treatment.grp"] <- "treatment_ta"
# Recode treatment_ta: 'A' becomes 0, 'B' becomes 1
df$treatment_ta <- ifelse(df$treatment_ta == "A", 0,
ifelse(df$treatment_ta == "B", 1, df$treatment_ta))
# Select relevant columns for matching
df_match_2 <- df %>% select(Product.ID, Rating, Reviews, h_price, avg_awareness_pa, avg_loyalty_pa, avg_quality_pa)
# Check data summary
summary(df_match_2)
## Product.ID Rating Reviews h_price
## Length:41 Min. :3.924 Min. : 18.0 Min. :22.00
## Class :character 1st Qu.:4.242 1st Qu.: 205.0 1st Qu.:49.00
## Mode :character Median :4.396 Median : 493.0 Median :60.00
## Mean :4.400 Mean : 912.1 Mean :59.12
## 3rd Qu.:4.566 3rd Qu.: 809.0 3rd Qu.:72.00
## Max. :4.765 Max. :7357.0 Max. :89.00
## avg_awareness_pa avg_loyalty_pa avg_quality_pa
## Min. : 6.519 Min. :4.125 Min. :6.042
## 1st Qu.: 7.319 1st Qu.:5.300 1st Qu.:6.660
## Median : 8.400 Median :5.627 Median :6.915
## Mean : 9.227 Mean :5.743 Mean :6.964
## 3rd Qu.: 9.833 3rd Qu.:6.038 3rd Qu.:7.122
## Max. :18.146 Max. :7.771 Max. :8.229
# create distances
df.dist_2 <- gendistance(df_match_2, idcol=1)
# create distancematrix object
df.mdm_2 <- distancematrix(df.dist_2)
# create matches
df.match_2 <- nonbimatch(df.mdm_2)
# review quality of matches
df.qom_2 <- qom(df.dist_2$cov, df.match_2$matches)
assign.grp(df.match_2$matches,seed = 68)
## Group1.ID Group1.Row Group2.ID Group2.Row Distance treatment.grp
## 1 P173652 1 P506691 34 1.4054847 B
## 2 P232906 2 P439926 9 1.2500112 A
## 3 P378852 3 P472454 18 2.2041778 A
## 4 P393076 4 P433971 8 1.5996962 A
## 5 P427421 5 phantom42 42 0.0000000 A
## 6 P432829 6 P440307 10 0.7226207 B
## 7 P433887 7 P503642 29 1.6256744 B
## 8 P433971 8 P393076 4 1.5996962 B
## 9 P439926 9 P232906 2 1.2500112 B
## 10 P440307 10 P432829 6 0.7226207 A
## 11 P440312 11 P508186 36 1.7172654 A
## 12 P454090 12 P504007 32 2.8147422 A
## 13 P455237 13 P507952 35 1.2574348 B
## 14 P461948 14 P500138 22 1.4652879 A
## 15 P467749 15 P509464 38 1.1388474 B
## 16 P467750 16 P509259 37 0.8206596 A
## 17 P471037 17 P480447 19 0.2879720 B
## 18 P472454 18 P378852 3 2.2041778 B
## 19 P480447 19 P471037 17 0.2879720 A
## 20 P483664 20 P503690 30 1.1052949 B
## 21 P483699 21 P502656 28 3.4860663 A
## 22 P500138 22 P461948 14 1.4652879 B
## 23 P500718 23 P500771 24 1.4739340 A
## 24 P500771 24 P500718 23 1.4739340 B
## 25 P500790 25 P503909 31 3.0327268 A
## 26 P501282 26 P511726 41 0.9991730 B
## 27 P502197 27 P505739 33 0.6826465 A
## 28 P502656 28 P483699 21 3.4860663 B
## 29 P503642 29 P433887 7 1.6256744 A
## 30 P503690 30 P483664 20 1.1052949 A
## 31 P503909 31 P500790 25 3.0327268 B
## 32 P504007 32 P454090 12 2.8147422 B
## 33 P505739 33 P502197 27 0.6826465 B
## 34 P506691 34 P173652 1 1.4054847 A
## 35 P507952 35 P455237 13 1.2574348 A
## 36 P508186 36 P440312 11 1.7172654 B
## 37 P509259 37 P467750 16 0.8206596 B
## 38 P509464 38 P467749 15 1.1388474 A
## 39 P509690 39 P509828 40 1.7762696 A
## 40 P509828 40 P509690 39 1.7762696 B
## 41 P511726 41 P501282 26 0.9991730 A
## 42 phantom42 42 P427421 5 0.0000000 B
assignment_2 <- assign.grp(df.match_2$matches,seed = 68)
# Merge assignment dataframe into df, keeping only treatment.grp column
df <- merge(
x = df, # Target dataframe
y = assignment_2[, c("Group1.ID", "treatment.grp")], # Source dataframe with only needed columns
by.x = "Product.ID", # Column in df to match on
by.y = "Group1.ID", # Column in assignment to match on
all.x = TRUE # Keep all rows from df (left join)
)
# Rename the treatment.grp column to treatment_pa
names(df)[names(df) == "treatment.grp"] <- "treatment_pa"
# Recode treatment_pa: 'A' becomes 0, 'B' becomes 1
df$treatment_pa <- ifelse(df$treatment_pa == "A", 0,
ifelse(df$treatment_pa == "B", 1, df$treatment_pa))
# Select relevant columns for matching
df_match_3 <- df %>% select(Product.ID, Rating, Reviews, h_price, avg_awareness_fs, avg_loyalty_fs, avg_quality_fs)
# Check data summary
summary(df_match_3)
## Product.ID Rating Reviews h_price
## Length:41 Min. :3.924 Min. : 18.0 Min. :22.00
## Class :character 1st Qu.:4.242 1st Qu.: 205.0 1st Qu.:49.00
## Mode :character Median :4.396 Median : 493.0 Median :60.00
## Mean :4.400 Mean : 912.1 Mean :59.12
## 3rd Qu.:4.566 3rd Qu.: 809.0 3rd Qu.:72.00
## Max. :4.765 Max. :7357.0 Max. :89.00
## avg_awareness_fs avg_loyalty_fs avg_quality_fs
## Min. :-0.65362 Min. :-0.49057 Min. :-0.42576
## 1st Qu.:-0.46302 1st Qu.:-0.13224 1st Qu.:-0.11423
## Median :-0.16516 Median :-0.03502 Median :-0.01529
## Mean : 0.04832 Mean : 0.01304 Mean : 0.02737
## 3rd Qu.: 0.23782 3rd Qu.: 0.10952 3rd Qu.: 0.10370
## Max. : 2.37043 Max. : 0.73928 Max. : 0.69084
# create distances
df.dist_3 <- gendistance(df_match_3, idcol=1)
# create distancematrix object
df.mdm_3 <- distancematrix(df.dist_3)
# create matches
df.match_3 <- nonbimatch(df.mdm_3)
# review quality of matches
df.qom_3 <- qom(df.dist_3$cov, df.match_3$matches)
assign.grp(df.match_3$matches,seed = 68)
## Group1.ID Group1.Row Group2.ID Group2.Row Distance treatment.grp
## 1 P173652 1 P506691 34 1.4929742 B
## 2 P232906 2 P439926 9 1.2528062 A
## 3 P378852 3 P472454 18 2.1585636 A
## 4 P393076 4 P433971 8 1.5110983 A
## 5 P427421 5 phantom42 42 0.0000000 A
## 6 P432829 6 P440307 10 0.7188218 B
## 7 P433887 7 P503642 29 1.6262738 B
## 8 P433971 8 P393076 4 1.5110983 B
## 9 P439926 9 P232906 2 1.2528062 B
## 10 P440307 10 P432829 6 0.7188218 A
## 11 P440312 11 P508186 36 1.6607578 A
## 12 P454090 12 P504007 32 2.9046912 A
## 13 P455237 13 P507952 35 1.2506208 B
## 14 P461948 14 P500138 22 1.5478359 A
## 15 P467749 15 P509464 38 1.3006587 B
## 16 P467750 16 P509259 37 0.7574145 A
## 17 P471037 17 P480447 19 0.2915499 B
## 18 P472454 18 P378852 3 2.1585636 B
## 19 P480447 19 P471037 17 0.2915499 A
## 20 P483664 20 P503690 30 1.0801472 B
## 21 P483699 21 P502656 28 3.4373254 A
## 22 P500138 22 P461948 14 1.5478359 B
## 23 P500718 23 P500771 24 1.4809296 A
## 24 P500771 24 P500718 23 1.4809296 B
## 25 P500790 25 P503909 31 3.0467745 A
## 26 P501282 26 P511726 41 1.1483426 B
## 27 P502197 27 P505739 33 0.6851291 A
## 28 P502656 28 P483699 21 3.4373254 B
## 29 P503642 29 P433887 7 1.6262738 A
## 30 P503690 30 P483664 20 1.0801472 A
## 31 P503909 31 P500790 25 3.0467745 B
## 32 P504007 32 P454090 12 2.9046912 B
## 33 P505739 33 P502197 27 0.6851291 B
## 34 P506691 34 P173652 1 1.4929742 A
## 35 P507952 35 P455237 13 1.2506208 A
## 36 P508186 36 P440312 11 1.6607578 B
## 37 P509259 37 P467750 16 0.7574145 B
## 38 P509464 38 P467749 15 1.3006587 A
## 39 P509690 39 P509828 40 1.6914943 A
## 40 P509828 40 P509690 39 1.6914943 B
## 41 P511726 41 P501282 26 1.1483426 A
## 42 phantom42 42 P427421 5 0.0000000 B
assignment_3 <- assign.grp(df.match_3$matches,seed = 68)
# Merge assignment dataframe into df, keeping only treatment.grp column
df <- merge(
x = df, # Target dataframe
y = assignment_3[, c("Group1.ID", "treatment.grp")], # Source dataframe with only needed columns
by.x = "Product.ID", # Column in df to match on
by.y = "Group1.ID", # Column in assignment to match on
all.x = TRUE # Keep all rows from df (left join)
)
# Rename the treatment.grp column to treatment_ta
names(df)[names(df) == "treatment.grp"] <- "treatment_fs"
# Recode treatment_fs: 'A' becomes 0, 'B' becomes 1
df$treatment_fs <- ifelse(df$treatment_fs == "A", 0,
ifelse(df$treatment_fs == "B", 1, df$treatment_fs))
https://cran.r-project.org/web/packages/MatchIt/vignettes/assessing-balance.html
# Load libraries
library(tableone)
library(randChecks)
## Warning: package 'randChecks' was built under R version 4.3.3
library(Matching)
## Warning: package 'Matching' was built under R version 4.3.3
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## ##
## ## Matching (Version 4.10-15, Build Date: 2024-10-14)
## ## See https://www.jsekhon.com for additional documentation.
## ## Please cite software as:
## ## Jasjeet S. Sekhon. 2011. ``Multivariate and Propensity Score Matching
## ## Software with Automated Balance Optimization: The Matching package for R.''
## ## Journal of Statistical Software, 42(7): 1-52.
## ##
library(ggplot2)
library(cobalt)
## cobalt (Version 4.5.5, Build Date: 2024-04-02)
##
## Attaching package: 'cobalt'
## The following object is masked from 'package:randChecks':
##
## lalonde
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.3.3
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
# Define covariates to check balance
covariates <- c("Rating", "Reviews", "h_price", "avg_awareness_pa",
"avg_loyalty_pa", "avg_quality_pa", "avg_awareness_fs",
"avg_loyalty_fs", "avg_quality_fs", "avg_brand_equity_ta")
# Function to check balance for each treatment separately
check_balance <- function(df, treatment_var) {
# Convert treatment to factor
df[[treatment_var]] <- as.factor(df[[treatment_var]])
# Compute Standardized Mean Differences (SMD)
table1 <- CreateTableOne(vars = covariates, strata = treatment_var, data = df, test = FALSE)
cat("\n### Standardized Mean Differences for", treatment_var, "###\n")
smd_results <- print(table1, smd = TRUE)
cat("\n### p-values for Mean Differences (t-test for continuous) for", treatment_var, "###\n")
p_values <- c()
for (var in covariates) {
# Perform a t-test for continuous variables
t_test <- t.test(df[[var]][df[[treatment_var]] == 1], df[[var]][df[[treatment_var]] == 0])
p_values[var] <- t_test$p.value
cat(var, ": p-value =", round(t_test$p.value, 3), "\n")
}
# Compute Variance Ratios
cat("\n### Variance Ratios for", treatment_var, "###\n")
for (var in covariates) {
var_ratio <- var(df[[var]][df[[treatment_var]] == 1], na.rm = TRUE) /
var(df[[var]][df[[treatment_var]] == 0], na.rm = TRUE)
cat(var, ": Variance Ratio =", round(var_ratio, 3), "\n")
}
# Compute Empirical CDF Differences using Kolmogorov-Smirnov test
cat("\n### Kolmogorov-Smirnov Test (eCDF) for", treatment_var, "###\n")
for (var in covariates) {
ks_test <- ks.test(df[[var]][df[[treatment_var]] == 1], df[[var]][df[[treatment_var]] == 0])
cat(var, ": KS Statistic =", round(ks_test$statistic, 3), ", p-value =", round(ks_test$p.value, 3), "\n")
}
}
# Function to generate density plots
plot_density <- function(df, treatment_var) {
for (var in covariates) {
density_plot <- ggplot(df, aes_string(x = var, fill = treatment_var)) +
geom_density(alpha = 0.5) +
labs(title = paste("Density Plot:", var, "by", treatment_var), x = var, y = "Density") +
theme_minimal()
print(density_plot)
}
}
# qq plot function
plot_qq <- function(df, treatment_var) {
for (var in covariates) {
treated_values <- df[[var]][df[[treatment_var]] == 1]
control_values <- df[[var]][df[[treatment_var]] == 0]
# Find the minimum length between treated and control groups
min_length <- min(length(treated_values), length(control_values))
# Generate matching quantiles for both groups
probs <- seq(0, 1, length.out = min_length)
qq_data <- data.frame(
control = quantile(control_values, probs = probs, na.rm = TRUE),
treated = quantile(treated_values, probs = probs, na.rm = TRUE)
)
qq_plot <- ggplot(qq_data, aes(x = control, y = treated)) +
geom_point(color = "blue") +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "red") +
labs(title = paste("QQ Plot:", var, "by", treatment_var),
x = "Control Group Quantiles",
y = "Treated Group Quantiles") +
theme_minimal()
print(qq_plot)
}
}
check_balance(df, "treatment_ta")
##
## ### Standardized Mean Differences for treatment_ta ###
## Stratified by treatment_ta
## 0 1 SMD
## n 21 20
## Rating (mean (SD)) 4.39 (0.22) 4.41 (0.22) 0.114
## Reviews (mean (SD)) 1027.10 (1628.77) 791.30 (848.92) 0.182
## h_price (mean (SD)) 60.24 (16.30) 57.95 (17.03) 0.137
## avg_awareness_pa (mean (SD)) 9.10 (2.69) 9.36 (3.29) 0.086
## avg_loyalty_pa (mean (SD)) 5.64 (0.68) 5.85 (0.77) 0.283
## avg_quality_pa (mean (SD)) 6.90 (0.44) 7.03 (0.51) 0.275
## avg_awareness_fs (mean (SD)) 0.01 (0.69) 0.09 (0.85) 0.112
## avg_loyalty_fs (mean (SD)) -0.02 (0.23) 0.05 (0.27) 0.260
## avg_quality_fs (mean (SD)) -0.01 (0.22) 0.06 (0.27) 0.286
## avg_brand_equity_ta (mean (SD)) 21.64 (3.65) 22.24 (4.48) 0.146
##
## ### p-values for Mean Differences (t-test for continuous) for treatment_ta ###
## Rating : p-value = 0.717
## Reviews : p-value = 0.563
## h_price : p-value = 0.663
## avg_awareness_pa : p-value = 0.784
## avg_loyalty_pa : p-value = 0.372
## avg_quality_pa : p-value = 0.385
## avg_awareness_fs : p-value = 0.723
## avg_loyalty_fs : p-value = 0.412
## avg_quality_fs : p-value = 0.367
## avg_brand_equity_ta : p-value = 0.644
##
## ### Variance Ratios for treatment_ta ###
## Rating : Variance Ratio = 0.915
## Reviews : Variance Ratio = 0.272
## h_price : Variance Ratio = 1.092
## avg_awareness_pa : Variance Ratio = 1.496
## avg_loyalty_pa : Variance Ratio = 1.304
## avg_quality_pa : Variance Ratio = 1.374
## avg_awareness_fs : Variance Ratio = 1.513
## avg_loyalty_fs : Variance Ratio = 1.383
## avg_quality_fs : Variance Ratio = 1.42
## avg_brand_equity_ta : Variance Ratio = 1.507
##
## ### Kolmogorov-Smirnov Test (eCDF) for treatment_ta ###
## Rating : KS Statistic = 0.167 , p-value = 0.873
## Reviews : KS Statistic = 0.205 , p-value = 0.701
## h_price : KS Statistic = 0.126 , p-value = 0.961
## avg_awareness_pa : KS Statistic = 0.176 , p-value = 0.781
## avg_loyalty_pa : KS Statistic = 0.219 , p-value = 0.549
## avg_quality_pa : KS Statistic = 0.26 , p-value = 0.364
## avg_awareness_fs : KS Statistic = 0.176 , p-value = 0.782
## avg_loyalty_fs : KS Statistic = 0.219 , p-value = 0.551
## avg_quality_fs : KS Statistic = 0.217 , p-value = 0.547
## avg_brand_equity_ta : KS Statistic = 0.221 , p-value = 0.506
plot_density(df, "treatment_ta")
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
plot_qq(df, 'treatment_ta')
check_balance(df, "treatment_pa")
##
## ### Standardized Mean Differences for treatment_pa ###
## Stratified by treatment_pa
## 0 1 SMD
## n 21 20
## Rating (mean (SD)) 4.41 (0.24) 4.39 (0.19) 0.053
## Reviews (mean (SD)) 1030.71 (1699.78) 787.50 (685.54) 0.188
## h_price (mean (SD)) 58.95 (15.37) 59.30 (17.99) 0.021
## avg_awareness_pa (mean (SD)) 9.32 (3.27) 9.13 (2.68) 0.066
## avg_loyalty_pa (mean (SD)) 5.75 (0.84) 5.73 (0.59) 0.022
## avg_quality_pa (mean (SD)) 6.97 (0.53) 6.95 (0.43) 0.042
## avg_awareness_fs (mean (SD)) 0.06 (0.85) 0.03 (0.69) 0.043
## avg_loyalty_fs (mean (SD)) 0.02 (0.29) 0.01 (0.21) 0.018
## avg_quality_fs (mean (SD)) 0.03 (0.27) 0.02 (0.22) 0.033
## avg_brand_equity_ta (mean (SD)) 22.05 (4.53) 21.82 (3.56) 0.057
##
## ### p-values for Mean Differences (t-test for continuous) for treatment_pa ###
## Rating : p-value = 0.866
## Reviews : p-value = 0.55
## h_price : p-value = 0.947
## avg_awareness_pa : p-value = 0.834
## avg_loyalty_pa : p-value = 0.943
## avg_quality_pa : p-value = 0.893
## avg_awareness_fs : p-value = 0.89
## avg_loyalty_fs : p-value = 0.955
## avg_quality_fs : p-value = 0.916
## avg_brand_equity_ta : p-value = 0.855
##
## ### Variance Ratios for treatment_pa ###
## Rating : Variance Ratio = 0.623
## Reviews : Variance Ratio = 0.163
## h_price : Variance Ratio = 1.371
## avg_awareness_pa : Variance Ratio = 0.667
## avg_loyalty_pa : Variance Ratio = 0.495
## avg_quality_pa : Variance Ratio = 0.655
## avg_awareness_fs : Variance Ratio = 0.653
## avg_loyalty_fs : Variance Ratio = 0.504
## avg_quality_fs : Variance Ratio = 0.664
## avg_brand_equity_ta : Variance Ratio = 0.616
##
## ### Kolmogorov-Smirnov Test (eCDF) for treatment_pa ###
## Rating : KS Statistic = 0.188 , p-value = 0.753
## Reviews : KS Statistic = 0.276 , p-value = 0.317
## h_price : KS Statistic = 0.124 , p-value = 0.968
## avg_awareness_pa : KS Statistic = 0.129 , p-value = 0.957
## avg_loyalty_pa : KS Statistic = 0.238 , p-value = 0.415
## avg_quality_pa : KS Statistic = 0.119 , p-value = 0.975
## avg_awareness_fs : KS Statistic = 0.129 , p-value = 0.96
## avg_loyalty_fs : KS Statistic = 0.186 , p-value = 0.706
## avg_quality_fs : KS Statistic = 0.119 , p-value = 0.974
## avg_brand_equity_ta : KS Statistic = 0.129 , p-value = 0.949
plot_density(df, "treatment_pa")
plot_qq(df, 'treatment_pa')
check_balance(df, "treatment_fs")
##
## ### Standardized Mean Differences for treatment_fs ###
## Stratified by treatment_fs
## 0 1 SMD
## n 21 20
## Rating (mean (SD)) 4.41 (0.24) 4.39 (0.19) 0.053
## Reviews (mean (SD)) 1030.71 (1699.78) 787.50 (685.54) 0.188
## h_price (mean (SD)) 58.95 (15.37) 59.30 (17.99) 0.021
## avg_awareness_pa (mean (SD)) 9.32 (3.27) 9.13 (2.68) 0.066
## avg_loyalty_pa (mean (SD)) 5.75 (0.84) 5.73 (0.59) 0.022
## avg_quality_pa (mean (SD)) 6.97 (0.53) 6.95 (0.43) 0.042
## avg_awareness_fs (mean (SD)) 0.06 (0.85) 0.03 (0.69) 0.043
## avg_loyalty_fs (mean (SD)) 0.02 (0.29) 0.01 (0.21) 0.018
## avg_quality_fs (mean (SD)) 0.03 (0.27) 0.02 (0.22) 0.033
## avg_brand_equity_ta (mean (SD)) 22.05 (4.53) 21.82 (3.56) 0.057
##
## ### p-values for Mean Differences (t-test for continuous) for treatment_fs ###
## Rating : p-value = 0.866
## Reviews : p-value = 0.55
## h_price : p-value = 0.947
## avg_awareness_pa : p-value = 0.834
## avg_loyalty_pa : p-value = 0.943
## avg_quality_pa : p-value = 0.893
## avg_awareness_fs : p-value = 0.89
## avg_loyalty_fs : p-value = 0.955
## avg_quality_fs : p-value = 0.916
## avg_brand_equity_ta : p-value = 0.855
##
## ### Variance Ratios for treatment_fs ###
## Rating : Variance Ratio = 0.623
## Reviews : Variance Ratio = 0.163
## h_price : Variance Ratio = 1.371
## avg_awareness_pa : Variance Ratio = 0.667
## avg_loyalty_pa : Variance Ratio = 0.495
## avg_quality_pa : Variance Ratio = 0.655
## avg_awareness_fs : Variance Ratio = 0.653
## avg_loyalty_fs : Variance Ratio = 0.504
## avg_quality_fs : Variance Ratio = 0.664
## avg_brand_equity_ta : Variance Ratio = 0.616
##
## ### Kolmogorov-Smirnov Test (eCDF) for treatment_fs ###
## Rating : KS Statistic = 0.188 , p-value = 0.753
## Reviews : KS Statistic = 0.276 , p-value = 0.317
## h_price : KS Statistic = 0.124 , p-value = 0.968
## avg_awareness_pa : KS Statistic = 0.129 , p-value = 0.957
## avg_loyalty_pa : KS Statistic = 0.238 , p-value = 0.415
## avg_quality_pa : KS Statistic = 0.119 , p-value = 0.975
## avg_awareness_fs : KS Statistic = 0.129 , p-value = 0.96
## avg_loyalty_fs : KS Statistic = 0.186 , p-value = 0.706
## avg_quality_fs : KS Statistic = 0.119 , p-value = 0.974
## avg_brand_equity_ta : KS Statistic = 0.129 , p-value = 0.949
plot_density(df, "treatment_fs")
plot_qq(df, 'treatment_fs')