file_path <- "/Users/ra/Library/CloudStorage/Box-Box/2024-Summer/Misleading Claims/Data/Brand Equity Survey/02 - processed data/df_brand_long.csv"
df <- read_csv(file_path, show_col_types = FALSE)
round(cor(df[,-1]),2)
##                      awareness_1 awareness_2 awareness_3 awareness_4_reversed
## awareness_1                 1.00        0.91        0.89                 0.64
## awareness_2                 0.91        1.00        0.84                 0.61
## awareness_3                 0.89        0.84        1.00                 0.61
## awareness_4_reversed        0.64        0.61        0.61                 1.00
## loyalty_1                   0.54        0.51        0.58                 0.35
## loyalty_2                   0.46        0.43        0.47                 0.29
## loyalty_3                   0.39        0.37        0.41                 0.25
## quality_1                   0.47        0.47        0.44                 0.28
## quality_2                   0.44        0.44        0.40                 0.25
##                      loyalty_1 loyalty_2 loyalty_3 quality_1 quality_2
## awareness_1               0.54      0.46      0.39      0.47      0.44
## awareness_2               0.51      0.43      0.37      0.47      0.44
## awareness_3               0.58      0.47      0.41      0.44      0.40
## awareness_4_reversed      0.35      0.29      0.25      0.28      0.25
## loyalty_1                 1.00      0.73      0.71      0.38      0.33
## loyalty_2                 0.73      1.00      0.71      0.41      0.39
## loyalty_3                 0.71      0.71      1.00      0.33      0.29
## quality_1                 0.38      0.41      0.33      1.00      0.85
## quality_2                 0.33      0.39      0.29      0.85      1.00

Total aggregation and partial aggregation

average across people Cronbach’s alpha is good - the 9 items are reliable

# Calculate brand_equity_ta as the sum of all columns except 'brand'
# Calculate brand equity and component metrics
df_brand_equity_long <- df %>%
  mutate(
    # Overall brand equity (sum of all metrics)
    brand_equity_ta = rowSums(select(., -brand)),
    
    # Component averages - grouping related columns
    awareness_pa = rowSums(select(., starts_with("awareness"))),
    loyalty_pa = rowSums(select(., starts_with("loyalty"))),
    quality_pa = rowSums(select(., starts_with("quality")))
  ) %>%
  select(brand, brand_equity_ta, awareness_pa, loyalty_pa, quality_pa)

# Group by brand to get averages if multiple rows per brand
df_brand_equity <- df_brand_equity_long %>%
  group_by(brand) %>%
  summarize(
    avg_brand_equity_ta = mean(brand_equity_ta),
    avg_awareness_pa = mean(awareness_pa),
    avg_loyalty_pa = mean(loyalty_pa),
    avg_quality_pa = mean(quality_pa)
  )
library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
alpha_result <- psych::alpha(df[,-1])
print(paste("Cronbach's alpha for brand equity:", round(alpha_result$total$raw_alpha, 3)))
## [1] "Cronbach's alpha for brand equity: 0.896"
library(lavaan)
## Warning: package 'lavaan' was built under R version 4.3.3
## This is lavaan 0.6-19
## lavaan is FREE software! Please report any bugs.
## 
## Attaching package: 'lavaan'
## The following object is masked from 'package:psych':
## 
##     cor2cov
## The following object is masked from 'package:tm':
## 
##     inspect
cfa_model <- '
  brand_equity =~ awareness_1 + awareness_2 + awareness_3 + awareness_4_reversed +
  loyalty_1 + loyalty_2 + loyalty_3 + quality_1 + quality_2
'

configural_model <- cfa(
  model = cfa_model,
  data = df
)

summary(configural_model, fit.measures = TRUE, standardized = TRUE)
## lavaan 0.6-19 ended normally after 30 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        18
## 
##   Number of observations                          1179
## 
## Model Test User Model:
##                                                       
##   Test statistic                              2785.546
##   Degrees of freedom                                27
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                              8935.053
##   Degrees of freedom                                36
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.690
##   Tucker-Lewis Index (TLI)                       0.587
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -12905.722
##   Loglikelihood unrestricted model (H1)     -11512.949
##                                                       
##   Akaike (AIC)                               25847.444
##   Bayesian (BIC)                             25938.747
##   Sample-size adjusted Bayesian (SABIC)      25881.573
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.294
##   90 Percent confidence interval - lower         0.285
##   90 Percent confidence interval - upper         0.304
##   P-value H_0: RMSEA <= 0.050                    0.000
##   P-value H_0: RMSEA >= 0.080                    1.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.154
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   brand_equity =~                                                       
##     awareness_1       1.000                               1.335    0.964
##     awareness_2       1.042    0.016   66.339    0.000    1.391    0.928
##     awareness_3       0.911    0.014   63.293    0.000    1.217    0.917
##     awrnss_4_rvrsd    0.717    0.025   28.407    0.000    0.957    0.657
##     loyalty_1         0.416    0.017   24.563    0.000    0.556    0.598
##     loyalty_2         0.376    0.019   19.833    0.000    0.501    0.513
##     loyalty_3         0.299    0.018   16.658    0.000    0.399    0.447
##     quality_1         0.315    0.016   19.784    0.000    0.421    0.512
##     quality_2         0.293    0.016   18.061    0.000    0.391    0.477
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .awareness_1       0.137    0.012   11.850    0.000    0.137    0.071
##    .awareness_2       0.313    0.017   18.011    0.000    0.313    0.139
##    .awareness_3       0.279    0.015   19.016    0.000    0.279    0.159
##    .awrnss_4_rvrsd    1.207    0.051   23.590    0.000    1.207    0.569
##    .loyalty_1         0.554    0.023   23.775    0.000    0.554    0.642
##    .loyalty_2         0.704    0.029   23.957    0.000    0.704    0.737
##    .loyalty_3         0.638    0.027   24.055    0.000    0.638    0.800
##    .quality_1         0.498    0.021   23.959    0.000    0.498    0.738
##    .quality_2         0.519    0.022   24.014    0.000    0.519    0.772
##     brand_equity      1.782    0.079   22.419    0.000    1.000    1.000
cfa_model_second_order <- 'Awareness =~ awareness_1 + awareness_2 + awareness_3 + awareness_4_reversed
        Loyalty =~ loyalty_1 + loyalty_2 + loyalty_3
        Quality =~ quality_1 + quality_2  
        f4 =~ 1*Awareness + 1*Loyalty + 1*Quality
        f4 ~~ f4' 
secondorder <- cfa(cfa_model_second_order, data=df) 
summary(secondorder,fit.measures=TRUE,standardized=TRUE)
## lavaan 0.6-19 ended normally after 35 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        19
## 
##   Number of observations                          1179
## 
## Model Test User Model:
##                                                       
##   Test statistic                               297.687
##   Degrees of freedom                                26
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                              8935.053
##   Degrees of freedom                                36
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.969
##   Tucker-Lewis Index (TLI)                       0.958
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -11661.792
##   Loglikelihood unrestricted model (H1)     -11512.949
##                                                       
##   Akaike (AIC)                               23361.584
##   Bayesian (BIC)                             23457.960
##   Sample-size adjusted Bayesian (SABIC)      23397.609
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.094
##   90 Percent confidence interval - lower         0.085
##   90 Percent confidence interval - upper         0.104
##   P-value H_0: RMSEA <= 0.050                    0.000
##   P-value H_0: RMSEA >= 0.080                    0.993
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.128
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Awareness =~                                                          
##     awareness_1       1.000                               1.227    0.968
##     awareness_2       1.041    0.017   62.231    0.000    1.277    0.917
##     awareness_3       0.905    0.016   57.791    0.000    1.110    0.898
##     awrnss_4_rvrsd    0.718    0.027   26.319    0.000    0.881    0.627
##   Loyalty =~                                                            
##     loyalty_1         1.000                               0.825    0.870
##     loyalty_2         1.029    0.028   36.139    0.000    0.849    0.859
##     loyalty_3         0.900    0.026   34.238    0.000    0.742    0.823
##   Quality =~                                                            
##     quality_1         1.000                               0.838    0.974
##     quality_2         0.897    0.027   33.715    0.000    0.752    0.887
##   f4 =~                                                                 
##     Awareness         1.000                               0.506    0.506
##     Loyalty           1.000                               0.752    0.752
##     Quality           1.000                               0.740    0.740
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##     f4                0.385    0.024   16.334    0.000    1.000    1.000
##    .awareness_1       0.103    0.012    8.896    0.000    0.103    0.064
##    .awareness_2       0.307    0.017   17.649    0.000    0.307    0.158
##    .awareness_3       0.294    0.015   19.344    0.000    0.294    0.193
##    .awrnss_4_rvrsd    1.199    0.051   23.633    0.000    1.199    0.607
##    .loyalty_1         0.218    0.015   14.638    0.000    0.218    0.243
##    .loyalty_2         0.256    0.016   15.511    0.000    0.256    0.262
##    .loyalty_3         0.263    0.015   17.877    0.000    0.263    0.323
##    .quality_1         0.038    0.018    2.160    0.031    0.038    0.051
##    .quality_2         0.154    0.015    9.991    0.000    0.154    0.214
##    .Awareness         1.120    0.056   20.015    0.000    0.744    0.744
##    .Loyalty           0.295    0.027   11.036    0.000    0.434    0.434
##    .Quality           0.318    0.027   11.662    0.000    0.453    0.453
cfa_model_2 <- 'Awareness =~ awareness_1 + awareness_2 + awareness_3 + awareness_4_reversed
        Loyalty =~ loyalty_1 + loyalty_2 + loyalty_3
        Quality =~ quality_1 + quality_2  '
cfa_model_2_config <- cfa(cfa_model_2, data=df) 
summary(cfa_model_2_config,fit.measures=TRUE,standardized=TRUE)
## lavaan 0.6-19 ended normally after 35 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        21
## 
##   Number of observations                          1179
## 
## Model Test User Model:
##                                                       
##   Test statistic                               157.218
##   Degrees of freedom                                24
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                              8935.053
##   Degrees of freedom                                36
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.985
##   Tucker-Lewis Index (TLI)                       0.978
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)             -11591.558
##   Loglikelihood unrestricted model (H1)     -11512.949
##                                                       
##   Akaike (AIC)                               23225.116
##   Bayesian (BIC)                             23331.637
##   Sample-size adjusted Bayesian (SABIC)      23264.933
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.069
##   90 Percent confidence interval - lower         0.059
##   90 Percent confidence interval - upper         0.079
##   P-value H_0: RMSEA <= 0.050                    0.001
##   P-value H_0: RMSEA >= 0.080                    0.036
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.033
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Awareness =~                                                          
##     awareness_1       1.000                               1.348    0.973
##     awareness_2       1.033    0.015   68.953    0.000    1.393    0.929
##     awareness_3       0.899    0.014   64.138    0.000    1.211    0.913
##     awrnss_4_rvrsd    0.712    0.025   28.799    0.000    0.960    0.659
##   Loyalty =~                                                            
##     loyalty_1         1.000                               0.812    0.874
##     loyalty_2         1.021    0.029   34.853    0.000    0.829    0.848
##     loyalty_3         0.896    0.027   33.209    0.000    0.728    0.815
##   Quality =~                                                            
##     quality_1         1.000                               0.791    0.963
##     quality_2         0.918    0.030   30.821    0.000    0.726    0.886
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   Awareness ~~                                                          
##     Loyalty           0.639    0.040   15.829    0.000    0.583    0.583
##     Quality           0.539    0.036   14.801    0.000    0.505    0.505
##   Loyalty ~~                                                            
##     Quality           0.294    0.023   12.965    0.000    0.458    0.458
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .awareness_1       0.103    0.011    9.066    0.000    0.103    0.053
##    .awareness_2       0.309    0.017   17.926    0.000    0.309    0.137
##    .awareness_3       0.292    0.015   19.447    0.000    0.292    0.166
##    .awrnss_4_rvrsd    1.201    0.051   23.650    0.000    1.201    0.566
##    .loyalty_1         0.203    0.014   14.056    0.000    0.203    0.236
##    .loyalty_2         0.268    0.017   16.103    0.000    0.268    0.281
##    .loyalty_3         0.267    0.015   18.103    0.000    0.267    0.336
##    .quality_1         0.049    0.017    2.808    0.005    0.049    0.073
##    .quality_2         0.145    0.016    9.159    0.000    0.145    0.215
##     Awareness         1.816    0.080   22.815    0.000    1.000    1.000
##     Loyalty           0.660    0.037   18.075    0.000    1.000    1.000
##     Quality           0.625    0.033   19.124    0.000    1.000    1.000
# Extract factor scores for the three latent constructs
set.seed(123)
factor_scores <- lavPredict(cfa_model_2_config)

# Convert to a dataframe
factor_scores_df <- as.data.frame(factor_scores)

# Print first few rows to check
head(factor_scores_df)
##    Awareness     Loyalty    Quality
## 1 -0.2145019  0.06555883 -0.4169187
## 2  1.3820715  0.18035722  0.5507155
## 3  2.5623720  0.03916644  1.5063882
## 4  1.6013624  0.23186990  0.7782253
## 5 -0.1801002  0.91625140 -0.3999667
## 6 -1.0252646 -0.82338523 -0.4477277
df_brand_factor_score <- cbind(df_brand_equity_long, factor_scores_df)

df_brand_equity_factorscore <- df_brand_factor_score %>%
  group_by(brand) %>%
  summarize(
    avg_awareness_fs = mean(Awareness),
    avg_loyalty_fs = mean(Loyalty),
    avg_quality_fs = mean(Quality)
  )
df_merged <- merge(df_brand_equity, df_brand_equity_factorscore, by = "brand")
#write.csv(df_merged, "brand_equity_construct.csv")

Compare these measure

awareness_cor <- cor(df_merged %>% select(avg_awareness_pa, avg_awareness_fs))
loyalty_cor <- cor(df_merged %>% select(avg_loyalty_pa, avg_loyalty_fs))
quality_cor <- cor(df_merged %>% select(avg_quality_pa, avg_quality_fs))
print(awareness_cor)
##                  avg_awareness_pa avg_awareness_fs
## avg_awareness_pa        1.0000000        0.9977038
## avg_awareness_fs        0.9977038        1.0000000
print(loyalty_cor)
##                avg_loyalty_pa avg_loyalty_fs
## avg_loyalty_pa      1.0000000      0.9922517
## avg_loyalty_fs      0.9922517      1.0000000
print(quality_cor)
##                avg_quality_pa avg_quality_fs
## avg_quality_pa      1.0000000      0.9973586
## avg_quality_fs      0.9973586      1.0000000
# Create standardized versions of all measurement variables
df_merged <- df_merged %>%
  mutate(
    # Standardize the survey scores
    z_awareness_pa = scale(avg_awareness_pa),
    z_loyalty_pa = scale(avg_loyalty_pa),
    z_quality_pa = scale(avg_quality_pa),
    
    # Note: Factor scores are typically already standardized,
    # but we can ensure they're on the same scale
    z_awareness_fs = scale(avg_awareness_fs),
    z_loyalty_fs = scale(avg_loyalty_fs),
    z_quality_fs = scale(avg_quality_fs)
  )
# Reshape data for easier plotting
library(tidyr)
library(ggplot2)

# For awareness comparison
awareness_comparison <- df_merged %>%
  select(brand, z_awareness_pa, z_awareness_fs) %>%
  pivot_longer(cols = c(z_awareness_pa, z_awareness_fs),
               names_to = "measure_type",
               values_to = "standardized_value")

# Plot density comparison
ggplot(awareness_comparison, aes(x = standardized_value, fill = measure_type)) +
  geom_density(alpha = 0.5) +
  labs(title = "Comparison of Awareness Measures",
       x = "Standardized Value",
       y = "Density") +
  theme_minimal()

# You can create similar plots for loyalty and quality
# For loyalty comparison
loyalty_comparison <- df_merged %>%
  select(brand, z_loyalty_pa, z_loyalty_fs) %>%
  pivot_longer(cols = c(z_loyalty_pa, z_loyalty_fs),
               names_to = "measure_type",
               values_to = "standardized_value")

# Plot density comparison for loyalty
ggplot(loyalty_comparison, aes(x = standardized_value, fill = measure_type)) +
  geom_density(alpha = 0.5) +
  labs(title = "Comparison of Loyalty Measures",
       x = "Standardized Value",
       y = "Density") +
  theme_minimal()

# For quality comparison
quality_comparison <- df_merged %>%
  select(brand, z_quality_pa, z_quality_fs) %>%
  pivot_longer(cols = c(z_quality_pa, z_quality_fs),
               names_to = "measure_type",
               values_to = "standardized_value")

# Plot density comparison for quality
ggplot(quality_comparison, aes(x = standardized_value, fill = measure_type)) +
  geom_density(alpha = 0.5) +
  labs(title = "Comparison of Quality Measures",
       x = "Standardized Value",
       y = "Density") +
  theme_minimal()