Data Cleaning &
Preprocessing
Splitting Target and
Isolating Lifestyle Features
target <- data_raw$WORK_LIFE_BALANCE_SCORE
data_features <- data_raw %>%
select(-Timestamp, -AGE, -GENDER, -BMI_RANGE, -WORK_LIFE_BALANCE_SCORE)
data_features <- data_features %>% select(where(is.numeric))
str(data_features)
## 'data.frame': 15972 obs. of 18 variables:
## $ FRUITS_VEGGIES : int 3 2 2 3 5 3 4 3 5 4 ...
## $ PLACES_VISITED : int 2 4 3 10 3 3 10 5 6 2 ...
## $ CORE_CIRCLE : int 5 3 4 3 3 9 6 3 4 6 ...
## $ SUPPORTING_OTHERS: int 0 8 4 10 10 10 10 5 3 10 ...
## $ SOCIAL_NETWORK : int 5 10 10 7 4 10 10 7 3 10 ...
## $ ACHIEVEMENT : int 2 5 3 2 2 2 3 4 5 0 ...
## $ DONATION : int 0 2 2 5 4 3 5 0 4 4 ...
## $ TODO_COMPLETED : int 6 5 2 3 5 6 8 8 10 3 ...
## $ FLOW : int 4 2 2 5 0 1 8 2 2 2 ...
## $ DAILY_STEPS : int 5 5 4 5 5 7 7 8 1 3 ...
## $ LIVE_VISION : int 0 5 5 0 0 10 5 10 5 0 ...
## $ SLEEP_HOURS : int 7 8 8 5 7 8 7 6 10 6 ...
## $ LOST_VACATION : int 5 2 10 7 0 0 10 0 0 0 ...
## $ DAILY_SHOUTING : int 5 2 2 5 0 2 0 2 2 0 ...
## $ SUFFICIENT_INCOME: int 1 2 2 1 2 2 2 2 2 1 ...
## $ PERSONAL_AWARDS : int 4 3 4 5 8 10 10 8 10 3 ...
## $ TIME_FOR_PASSION : int 0 2 8 2 1 8 8 2 3 8 ...
## $ WEEKLY_MEDITATION: int 5 6 3 0 5 3 10 2 10 1 ...
dim(data_features)
## [1] 15972 18
summary(data_features)
## FRUITS_VEGGIES PLACES_VISITED CORE_CIRCLE SUPPORTING_OTHERS
## Min. :0.000 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.:2.000 1st Qu.: 2.000 1st Qu.: 3.000 1st Qu.: 3.000
## Median :3.000 Median : 5.000 Median : 5.000 Median : 5.000
## Mean :2.923 Mean : 5.233 Mean : 5.508 Mean : 5.616
## 3rd Qu.:4.000 3rd Qu.: 8.000 3rd Qu.: 8.000 3rd Qu.:10.000
## Max. :5.000 Max. :10.000 Max. :10.000 Max. :10.000
## SOCIAL_NETWORK ACHIEVEMENT DONATION TODO_COMPLETED
## Min. : 0.000 Min. : 0.000 Min. :0.000 Min. : 0.000
## 1st Qu.: 4.000 1st Qu.: 2.000 1st Qu.:1.000 1st Qu.: 4.000
## Median : 6.000 Median : 3.000 Median :3.000 Median : 6.000
## Mean : 6.474 Mean : 4.001 Mean :2.715 Mean : 5.746
## 3rd Qu.:10.000 3rd Qu.: 6.000 3rd Qu.:5.000 3rd Qu.: 8.000
## Max. :10.000 Max. :10.000 Max. :5.000 Max. :10.000
## FLOW DAILY_STEPS LIVE_VISION SLEEP_HOURS
## Min. : 0.000 Min. : 1.000 Min. : 0.000 Min. : 1.000
## 1st Qu.: 1.000 1st Qu.: 3.000 1st Qu.: 1.000 1st Qu.: 6.000
## Median : 3.000 Median : 5.000 Median : 3.000 Median : 7.000
## Mean : 3.195 Mean : 5.704 Mean : 3.752 Mean : 7.043
## 3rd Qu.: 5.000 3rd Qu.: 8.000 3rd Qu.: 5.000 3rd Qu.: 8.000
## Max. :10.000 Max. :10.000 Max. :10.000 Max. :10.000
## LOST_VACATION DAILY_SHOUTING SUFFICIENT_INCOME PERSONAL_AWARDS
## Min. : 0.000 Min. : 0.000 Min. :1.000 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: 1.000 1st Qu.:1.000 1st Qu.: 3.000
## Median : 0.000 Median : 2.000 Median :2.000 Median : 5.000
## Mean : 2.899 Mean : 2.931 Mean :1.729 Mean : 5.712
## 3rd Qu.: 5.000 3rd Qu.: 4.000 3rd Qu.:2.000 3rd Qu.: 9.000
## Max. :10.000 Max. :10.000 Max. :2.000 Max. :10.000
## TIME_FOR_PASSION WEEKLY_MEDITATION
## Min. : 0.000 Min. : 0.000
## 1st Qu.: 1.000 1st Qu.: 4.000
## Median : 3.000 Median : 7.000
## Mean : 3.327 Mean : 6.233
## 3rd Qu.: 5.000 3rd Qu.:10.000
## Max. :10.000 Max. :10.000
Missing Value Check
& Removal
colSums(is.na(data_features))
## FRUITS_VEGGIES PLACES_VISITED CORE_CIRCLE SUPPORTING_OTHERS
## 0 0 0 0
## SOCIAL_NETWORK ACHIEVEMENT DONATION TODO_COMPLETED
## 0 0 0 0
## FLOW DAILY_STEPS LIVE_VISION SLEEP_HOURS
## 0 0 0 0
## LOST_VACATION DAILY_SHOUTING SUFFICIENT_INCOME PERSONAL_AWARDS
## 0 0 0 0
## TIME_FOR_PASSION WEEKLY_MEDITATION
## 0 0
data_features <- na.omit(data_features)
target <- target[complete.cases(data_raw %>% select(-Timestamp, -AGE, -GENDER, -BMI_RANGE, -WORK_LIFE_BALANCE_SCORE))]
cat("Rows after removing NA:", nrow(data_features), "\n")
## Rows after removing NA: 15972
Variable Selection
for PCA & FA
Remove Zero
Variance Columns if Found
variances <- apply(data_features, 2, var, na.rm = TRUE)
zero_var_cols <- names(data_features)[!is.na(variances) & variances == 0]
print(paste("Number of columns with zero variance:", length(zero_var_cols)))
## [1] "Number of columns with zero variance: 0"
if (length(zero_var_cols) > 0) {
data_features <- data_features %>% select(-all_of(zero_var_cols))
}
Selecting Top
Features Based on Correlation with Work-Life Balance Score
cor_with_target <- sapply(data_features, function(x) cor(x, target, use = "complete.obs"))
abs_cor_with_target <- abs(cor_with_target)
n_top <- min(20, ncol(data_features))
top_features <- names(sort(abs_cor_with_target, decreasing = TRUE)[1:n_top])
print(paste("Top", n_top, "Selected Features:"))
## [1] "Top 18 Selected Features:"
print(top_features)
## [1] "ACHIEVEMENT" "SUPPORTING_OTHERS" "TODO_COMPLETED"
## [4] "PLACES_VISITED" "TIME_FOR_PASSION" "CORE_CIRCLE"
## [7] "PERSONAL_AWARDS" "FLOW" "LIVE_VISION"
## [10] "DONATION" "FRUITS_VEGGIES" "DAILY_STEPS"
## [13] "WEEKLY_MEDITATION" "SOCIAL_NETWORK" "SUFFICIENT_INCOME"
## [16] "DAILY_SHOUTING" "LOST_VACATION" "SLEEP_HOURS"
data_features <- data_features %>% select(all_of(top_features))
dim(data_features)
## [1] 15972 18
Scaling Data
data_scaled <- scale(data_features)
data_scaled <- as.data.frame(data_scaled)
summary(data_scaled[, 1:5])
## ACHIEVEMENT SUPPORTING_OTHERS TODO_COMPLETED PLACES_VISITED
## Min. :-1.4517 Min. :-1.7324 Min. :-2.1897 Min. :-1.58004
## 1st Qu.:-0.7260 1st Qu.:-0.8070 1st Qu.:-0.6654 1st Qu.:-0.97616
## Median :-0.3631 Median :-0.1901 Median : 0.0968 Median :-0.07034
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.00000
## 3rd Qu.: 0.7255 3rd Qu.: 1.3521 3rd Qu.: 0.8590 3rd Qu.: 0.83548
## Max. : 2.1769 Max. : 1.3521 Max. : 1.6211 Max. : 1.43936
## TIME_FOR_PASSION
## Min. :-1.2188
## 1st Qu.:-0.8524
## Median :-0.1197
## Mean : 0.0000
## 3rd Qu.: 0.6131
## Max. : 2.4451
Exploratory Data
Analysis (EDA)
Descriptive
Statistics Table
desc_stats <- describe(data_features)
desc_table <- desc_stats[, c("n", "mean", "sd", "median", "min", "max")]
round(desc_table, 2)
## n mean sd median min max
## ACHIEVEMENT 15972 4.00 2.76 3 0 10
## SUPPORTING_OTHERS 15972 5.62 3.24 5 0 10
## TODO_COMPLETED 15972 5.75 2.62 6 0 10
## PLACES_VISITED 15972 5.23 3.31 5 0 10
## TIME_FOR_PASSION 15972 3.33 2.73 3 0 10
## CORE_CIRCLE 15972 5.51 2.84 5 0 10
## PERSONAL_AWARDS 15972 5.71 3.09 5 0 10
## FLOW 15972 3.19 2.36 3 0 10
## LIVE_VISION 15972 3.75 3.23 3 0 10
## DONATION 15972 2.72 1.85 3 0 5
## FRUITS_VEGGIES 15972 2.92 1.44 3 0 5
## DAILY_STEPS 15972 5.70 2.89 5 1 10
## WEEKLY_MEDITATION 15972 6.23 3.02 7 0 10
## SOCIAL_NETWORK 15972 6.47 3.09 6 0 10
## SUFFICIENT_INCOME 15972 1.73 0.44 2 1 2
## DAILY_SHOUTING 15972 2.93 2.68 2 0 10
## LOST_VACATION 15972 2.90 3.69 0 0 10
## SLEEP_HOURS 15972 7.04 1.20 7 1 10
Target score
distribution plot
ggplot(data.frame(WLB_Score = target), aes(x = WLB_Score)) +
geom_histogram(binwidth = 10, fill = "#4C72B0", color = "white", alpha = 0.8) +
geom_vline(aes(xintercept = mean(WLB_Score)), color = "red", linetype = "dashed", size = 1) +
theme_minimal() +
labs(title = "Distribution of Respondents' Work-Life Balance Scores",
subtitle = "The dashed red line indicates the mean score",
x = "Work-Life Balance Score",
y = "Number of Respondents")

Correlation
Matrix
cor_matrix_top <- cor(data_scaled)
corrplot(cor_matrix_top,
method = "color",
type = "upper",
diag = TRUE,
tl.col = "black",
tl.cex = 0.75,
addCoef.col = "black",
number.cex = 0.65,
title = "Correlation Matrix of Selected Wellbeing Features",
mar = c(0, 0, 2, 0))

Principal Component
Analysis (PCA)
pca_result <- prcomp(
data_scaled,
center = FALSE,
scale. = FALSE
)
summary(pca_result)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.0146 1.20332 1.08968 1.00547 1.00077 0.96695 0.95449
## Proportion of Variance 0.2255 0.08044 0.06597 0.05616 0.05564 0.05194 0.05061
## Cumulative Proportion 0.2255 0.30593 0.37190 0.42806 0.48370 0.53565 0.58626
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Standard deviation 0.94223 0.89834 0.86757 0.85100 0.84771 0.82002 0.81156
## Proportion of Variance 0.04932 0.04483 0.04182 0.04023 0.03992 0.03736 0.03659
## Cumulative Proportion 0.63558 0.68042 0.72223 0.76247 0.80239 0.83975 0.87634
## PC15 PC16 PC17 PC18
## Standard deviation 0.79355 0.74951 0.7410 0.69669
## Proportion of Variance 0.03498 0.03121 0.0305 0.02697
## Cumulative Proportion 0.91132 0.94253 0.9730 1.00000
Eigenvalues
eigenvalues <- pca_result$sdev^2
eigenvalues
## [1] 4.0587721 1.4479889 1.1874048 1.0109600 1.0015424 0.9349846 0.9110468
## [8] 0.8877936 0.8070231 0.7526704 0.7242091 0.7186093 0.6724290 0.6586346
## [15] 0.6297165 0.5617720 0.5490603 0.4853824
Cumulative
Variance
cumulative_variance <- cumsum(eigenvalues / sum(eigenvalues))
cumulative_variance
## [1] 0.2254873 0.3059312 0.3718981 0.4280625 0.4837038 0.5356474 0.5862611
## [8] 0.6355830 0.6804176 0.7222326 0.7624664 0.8023892 0.8397463 0.8763372
## [15] 0.9113214 0.9425310 0.9730343 1.0000000
Scree Plot
plot(
eigenvalues,
type = "b",
xlab = "Principal Component",
ylab = "Eigenvalue",
main = "Scree Plot - Wellbeing PCA"
)
abline(h = 1, col = "red", lty = 2)

PCA Loadings
pca_loadings <- pca_result$rotation
round(pca_loadings[, 1:5], 3)
## PC1 PC2 PC3 PC4 PC5
## ACHIEVEMENT 0.325 0.168 0.109 0.081 0.025
## SUPPORTING_OTHERS 0.314 0.167 -0.169 0.245 -0.138
## TODO_COMPLETED 0.285 -0.109 0.117 -0.234 -0.003
## PLACES_VISITED 0.249 -0.278 -0.209 0.113 -0.008
## TIME_FOR_PASSION 0.297 0.153 0.388 0.007 0.085
## CORE_CIRCLE 0.272 0.059 -0.160 -0.046 -0.233
## PERSONAL_AWARDS 0.282 0.077 -0.166 0.250 -0.121
## FLOW 0.288 0.227 0.375 -0.124 0.097
## LIVE_VISION 0.254 0.118 0.296 -0.122 -0.050
## DONATION 0.243 0.043 -0.263 0.441 -0.155
## FRUITS_VEGGIES 0.211 -0.295 -0.203 0.016 0.378
## DAILY_STEPS 0.191 -0.124 -0.242 -0.358 0.556
## WEEKLY_MEDITATION 0.180 -0.359 0.211 0.315 0.250
## SOCIAL_NETWORK 0.233 0.261 -0.255 -0.364 -0.025
## SUFFICIENT_INCOME 0.151 -0.205 -0.232 -0.378 -0.277
## DAILY_SHOUTING -0.083 0.281 -0.243 0.221 0.425
## LOST_VACATION -0.057 0.415 0.013 0.092 0.308
## SLEEP_HOURS 0.067 -0.402 0.281 0.128 0.026
PCA Scores
pca_scores <- pca_result$x
head(pca_scores)
## PC1 PC2 PC3 PC4 PC5 PC6
## 1 -2.6816520 0.3298010 0.5206310 -0.3317646 1.3030912 -0.7043307
## 2 -0.3389519 -0.0615512 0.1294993 -0.6937508 -0.6227377 -0.4152512
## 3 -0.8936857 1.5011407 0.8333821 -0.7298388 -0.1719247 0.1898784
## 4 -0.8318005 2.2645017 -1.8261743 1.2995597 0.3669549 0.5230629
## 5 -0.6358354 -1.4195633 -1.5576141 0.8225990 -0.8845768 1.4609239
## 6 1.9306658 0.4860832 -0.2151567 -0.7654128 -1.2831454 -0.8534223
## PC7 PC8 PC9 PC10 PC11 PC12
## 1 -0.170964298 0.3921832 0.42447723 0.6307385 -0.3682183 0.02849323
## 2 0.415118873 0.3990008 -0.45294949 -0.7459945 -0.1748733 0.60549492
## 3 1.113470308 1.5764466 -0.29299642 -0.2190018 1.0408243 -0.05282522
## 4 -1.175011126 0.6609088 -0.03582509 1.6739229 0.3201395 -0.58285676
## 5 -0.499526326 0.3378168 -0.71953282 0.8816812 -0.7047024 0.52951088
## 6 -0.009193782 0.2560299 -0.82727033 -0.5435535 -0.9721564 -0.48753653
## PC13 PC14 PC15 PC16 PC17 PC18
## 1 1.3697541 0.23816199 -0.5172795 0.7915267 0.37183856 0.51224345
## 2 -0.6915919 0.06240416 1.2857572 -0.9968389 0.63233563 0.06909384
## 3 -1.5793546 0.42267593 0.7832261 0.1843213 -0.68713167 -1.38037750
## 4 -0.5649755 -1.77173941 0.7603674 0.1111018 -0.07369325 1.21004560
## 5 -0.2123966 1.23632530 0.3460966 -0.8658571 -0.51748289 0.39682827
## 6 -1.0799762 1.22243786 0.2742712 -0.1942384 -2.36671741 -0.80558262
PCA Visualization
(PC1 vs PC2)
plot(
pca_scores[, 1],
pca_scores[, 2],
xlab = "PC1",
ylab = "PC2",
main = "PCA Score Plot - Wellbeing Data",
pch = 19,
col = rgb(0.2, 0.4, 0.8, 0.3)
)

Factor Analysis
(FA)
Sample Size
Check
n <- nrow(data_scaled)
p <- ncol(data_scaled)
ratio <- n / p
cat("Number of observations :", n, "\n")
## Number of observations : 15972
cat("Number of variables :", p, "\n")
## Number of variables : 18
cat("Observation-to-variable ratio:", round(ratio, 1), "\n")
## Observation-to-variable ratio: 887.3
cat("Adequate sample size (n > 200):", ifelse(n > 200, "YES", "NO"), "\n")
## Adequate sample size (n > 200): YES
cat("Adequate ratio (>= 5:1) :", ifelse(ratio >= 5, "YES", "NO"), "\n")
## Adequate ratio (>= 5:1) : YES
Determining Number of
Factors
fa_parallel <- fa.parallel(data_scaled, fm = "pa", fa = "fa", show.legend = FALSE,
main = "Parallel Analysis Scree Plot - Wellbeing Data")

## Parallel analysis suggests that the number of factors = 7 and the number of components = NA
nfactors_optimal <- fa_parallel$nfact
cat("Optimal number of factors:", nfactors_optimal, "\n")
## Optimal number of factors: 7
Factor Loadings
print(fa_result$loadings, cutoff = 0.3, sort = TRUE)
##
## Loadings:
## PA1 PA5 PA3 PA2 PA4 PA6 PA7
## TIME_FOR_PASSION 0.649
## FLOW 0.665
## SUPPORTING_OTHERS 0.535 0.306
## DONATION 0.564
## SOCIAL_NETWORK 0.539
## DAILY_STEPS 0.569
## ACHIEVEMENT 0.462 0.483
## TODO_COMPLETED 0.346 0.348
## PLACES_VISITED 0.338
## CORE_CIRCLE 0.450
## PERSONAL_AWARDS 0.445
## LIVE_VISION 0.414
## FRUITS_VEGGIES 0.364
## WEEKLY_MEDITATION 0.380
## SUFFICIENT_INCOME 0.365
## DAILY_SHOUTING
## LOST_VACATION -0.306
## SLEEP_HOURS 0.365
##
## PA1 PA5 PA3 PA2 PA4 PA6 PA7
## SS loadings 1.660 0.946 0.753 0.732 0.664 0.643 0.402
## Proportion Var 0.092 0.053 0.042 0.041 0.037 0.036 0.022
## Cumulative Var 0.092 0.145 0.187 0.227 0.264 0.300 0.322
loadings_matrix <- as.data.frame(unclass(fa_result$loadings))
colnames(loadings_matrix) <- paste0("F", 1:ncol(loadings_matrix))
round(loadings_matrix, 3)
## F1 F2 F3 F4 F5 F6 F7
## ACHIEVEMENT 0.462 0.157 0.157 0.021 0.115 0.483 0.072
## SUPPORTING_OTHERS 0.286 0.535 0.306 0.023 0.082 0.153 0.042
## TODO_COMPLETED 0.346 0.119 0.109 0.208 0.171 0.114 0.348
## PLACES_VISITED 0.068 0.194 0.166 0.338 0.214 0.272 0.112
## TIME_FOR_PASSION 0.649 0.171 0.087 0.118 0.086 0.060 -0.023
## CORE_CIRCLE 0.197 0.208 0.450 0.175 0.054 0.167 0.065
## PERSONAL_AWARDS 0.204 0.290 0.133 0.035 0.093 0.445 0.149
## FLOW 0.665 0.082 0.135 0.020 0.087 0.064 0.049
## LIVE_VISION 0.414 0.099 0.116 0.085 0.041 0.141 0.152
## DONATION 0.131 0.564 0.076 0.033 0.089 0.116 0.112
## FRUITS_VEGGIES 0.091 0.222 0.028 0.237 0.364 0.057 0.123
## DAILY_STEPS 0.105 0.032 0.139 0.046 0.569 0.068 0.078
## WEEKLY_MEDITATION 0.175 0.149 -0.098 0.380 0.184 0.109 -0.016
## SOCIAL_NETWORK 0.209 0.105 0.539 -0.127 0.182 0.060 0.136
## SUFFICIENT_INCOME 0.034 0.083 0.090 0.138 0.092 0.081 0.365
## DAILY_SHOUTING -0.091 -0.031 0.004 -0.232 0.004 0.041 -0.134
## LOST_VACATION 0.043 -0.007 -0.041 -0.306 -0.037 -0.004 -0.071
## SLEEP_HOURS 0.047 -0.033 -0.009 0.365 0.009 0.025 0.006
Communalities
communalities <- data.frame(
Variable = names(fa_result$communality),
Communality = round(fa_result$communality, 4)
)
communalities
## Variable Communality
## ACHIEVEMENT ACHIEVEMENT 0.5148
## SUPPORTING_OTHERS SUPPORTING_OTHERS 0.4942
## TODO_COMPLETED TODO_COMPLETED 0.3528
## PLACES_VISITED PLACES_VISITED 0.3159
## TIME_FOR_PASSION TIME_FOR_PASSION 0.4831
## CORE_CIRCLE CORE_CIRCLE 0.3506
## PERSONAL_AWARDS PERSONAL_AWARDS 0.3734
## FLOW FLOW 0.4814
## LIVE_VISION LIVE_VISION 0.2468
## DONATION DONATION 0.3759
## FRUITS_VEGGIES FRUITS_VEGGIES 0.2656
## DAILY_STEPS DAILY_STEPS 0.3683
## WEEKLY_MEDITATION WEEKLY_MEDITATION 0.2527
## SOCIAL_NETWORK SOCIAL_NETWORK 0.4163
## SUFFICIENT_INCOME SUFFICIENT_INCOME 0.1837
## DAILY_SHOUTING DAILY_SHOUTING 0.0830
## LOST_VACATION LOST_VACATION 0.1038
## SLEEP_HOURS SLEEP_HOURS 0.1373
Variance Explained
per Factor
nf <- ncol(fa_result$loadings)
vaccounted <- as.data.frame(fa_result$Vaccounted)
colnames(vaccounted) <- paste0("F", 1:nf)
fa_variance <- data.frame(
Factor = paste0("F", 1:nf),
SS_Loadings = round(as.numeric(vaccounted["SS loadings", ]), 4),
Prop_Var = round(as.numeric(vaccounted["Proportion Var", ]), 4),
Cumulative_Var = round(as.numeric(vaccounted["Cumulative Var", ]), 4)
)
fa_variance
## Factor SS_Loadings Prop_Var Cumulative_Var
## 1 F1 1.6595 0.0922 0.0922
## 2 F2 0.9462 0.0526 0.1448
## 3 F3 0.7529 0.0418 0.1866
## 4 F4 0.7318 0.0407 0.2272
## 5 F5 0.6644 0.0369 0.2642
## 6 F6 0.6428 0.0357 0.2999
## 7 F7 0.4022 0.0223 0.3222
Factor Loadings
Heatmap
nf <- ncol(fa_result$loadings)
loadings_df <- as.data.frame(unclass(fa_result$loadings))
colnames(loadings_df) <- paste0("F", 1:nf)
short_names <- c(
FRUITS_VEGGIES = "Fruits & Veggies",
DAILY_STRESS = "Daily Stress",
PLACES_VISITED = "Places Visited",
CORE_CIRCLE = "Core Circle",
SUPPORTING_OTHERS = "Supporting Others",
SOCIAL_NETWORK = "Social Network",
ACHIEVEMENT = "Achievement",
DONATION = "Donation",
TODO_COMPLETED = "Todo Completed",
FLOW = "Flow",
DAILY_STEPS = "Daily Steps",
LIVE_VISION = "Live Vision",
SLEEP_HOURS = "Sleep Hours",
LOST_VACATION = "Lost Vacation",
DAILY_SHOUTING = "Daily Shouting",
SUFFICIENT_INCOME = "Sufficient Income",
PERSONAL_AWARDS = "Personal Awards",
TIME_FOR_PASSION = "Time for Passion",
WEEKLY_MEDITATION = "Weekly Meditation"
)
rownames(loadings_df) <- ifelse(
rownames(loadings_df) %in% names(short_names),
short_names[rownames(loadings_df)],
rownames(loadings_df)
)
loadings_df$Variable <- rownames(loadings_df)
loadings_melt <- melt(loadings_df, id.vars = "Variable", variable.name = "Factor", value.name = "Loading")
ggplot(loadings_melt, aes(x = Factor, y = Variable, fill = Loading)) +
geom_tile(color = "white", linewidth = 0.5) +
geom_text(aes(label = round(Loading, 2)), size = 3.2, color = "black") +
scale_fill_gradient2(
low = "#C0392B", mid = "white", high = "#1A5276",
midpoint = 0, limits = c(-1, 1), name = "Loading",
guide = guide_colorbar(
barheight = unit(1, "npc") * 0.78,
barwidth = 1.2
)
) +
scale_y_discrete(limits = rev(unique(loadings_melt$Variable))) +
labs(title = "Factor Loadings Heatmap (Varimax Rotation) - Wellbeing Data",
x = "Factor", y = NULL) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold", size = 13),
axis.text.x = element_text(face = "bold", size = 11),
axis.text.y = element_text(size = 10),
legend.position = "right",
panel.grid = element_blank(),
aspect.ratio = nrow(loadings_df) / nf
)

FA Diagram
fa_result_renamed <- fa_result
rownames(fa_result_renamed$loadings) <- ifelse(
rownames(fa_result$loadings) %in% names(short_names),
short_names[rownames(fa_result$loadings)],
rownames(fa_result$loadings)
)
fa.diagram(fa_result_renamed,
main = "Factor Analysis Diagram - Wellbeing Data",
cut = 0.3,
digits = 2,
cex = 0.85,
e.size = 0.05,
rsize = 0.15,
side = 2)

Factor Scores
fa_scores <- as.data.frame(fa_result$scores)
colnames(fa_scores) <- paste0("Factor_", 1:ncol(fa_scores))
head(fa_scores, 10)
## Factor_1 Factor_2 Factor_3 Factor_4 Factor_5 Factor_6
## 1 -0.36034334 -1.2724998 -0.35355679 -0.37580592 -0.02889258 -0.3714375
## 2 -0.17126763 -0.1318808 0.43024893 -0.16911010 -0.21239142 -0.1675657
## 3 0.47225864 -0.4316781 0.28634459 -0.69099843 -0.49695206 -0.5656426
## 4 -0.38580032 1.1291739 0.25604927 -1.33613131 -0.03838637 -0.3123650
## 5 -1.19757058 1.2017853 -0.46790448 0.13809198 0.04587299 -0.1499509
## 6 0.33074693 0.6347705 1.08463582 0.05754825 -0.14228308 -0.2257361
## 7 1.35592655 1.1198856 0.30975797 0.21717132 0.55848451 -0.1488977
## 8 -0.08020660 -0.7915100 0.07289726 -0.36727260 0.31230869 0.2733762
## 9 -0.07613462 0.2634400 -1.13464294 1.58040369 -0.45673411 0.7051340
## 10 0.04434552 0.9911836 0.75907358 -0.58196167 -0.49502725 -1.4947404
## Factor_7
## 1 -0.4177098
## 2 0.1605303
## 3 -0.3156680
## 4 -0.6845955
## 5 0.3193824
## 6 0.3494566
## 7 0.3119038
## 8 0.9279424
## 9 0.8000030
## 10 -0.7908381
Goodness of Fit
cat("RMSEA :", round(fa_result$RMSEA[1], 4), "\n")
## RMSEA : 0.0198
cat("TLI :", round(fa_result$TLI, 4), "\n")
## TLI : 0.9772
cat("BIC :", round(fa_result$BIC, 4), "\n")
## BIC : -114.9045
Factor
Interpretation Table
nf <- ncol(fa_result$loadings)
loadings_df2 <- as.data.frame(unclass(fa_result$loadings))
colnames(loadings_df2) <- paste0("F", 1:nf)
rn <- rownames(loadings_df2)
rownames(loadings_df2) <- ifelse(rn %in% names(short_names), short_names[rn], rn)
top_vars <- sapply(loadings_df2, function(f) {
vars <- rownames(loadings_df2)[abs(f) >= 0.3]
paste(vars, collapse = ", ")
})
factor_names_default <- c(
"Social Engagement & Achievements",
"Mental Wellbeing & Mindfulness",
"Physical Health & Activity",
"Stress & Emotional Strain",
"Financial & Life Satisfaction"
)
factor_table <- data.frame(
Factor = paste0("F", 1:nf),
Suggested_Name = factor_names_default[1:nf],
Key_Variables = top_vars,
row.names = NULL
)
factor_table
## Factor Suggested_Name
## 1 F1 Social Engagement & Achievements
## 2 F2 Mental Wellbeing & Mindfulness
## 3 F3 Physical Health & Activity
## 4 F4 Stress & Emotional Strain
## 5 F5 Financial & Life Satisfaction
## 6 F6 <NA>
## 7 F7 <NA>
## Key_Variables
## 1 Achievement, Todo Completed, Time for Passion, Flow, Live Vision
## 2 Supporting Others, Donation
## 3 Supporting Others, Core Circle, Social Network
## 4 Places Visited, Weekly Meditation, Lost Vacation, Sleep Hours
## 5 Fruits & Veggies, Daily Steps
## 6 Achievement, Personal Awards
## 7 Todo Completed, Sufficient Income