1 Executive Summary

This analysis applies dimension reduction techniques (PCA, t-SNE, UMAP) to the Apple App Store dataset to visualize high-dimensional app characteristics and validate the clustering structure identified in the companion clustering analysis.

Key Results:

Metric	Value
Original Dimensions	15 variables
Optimal PCs (80% variance)	8 components
PC1 + PC2 Variance	38.9%
Best Structure Preservation	PCA (Mantel r = 0.696)

Principal Component Interpretation:

Component	Variance	Interpretation
PC1	23.8%	International Reach & Popularity
PC2	15.1%	User Engagement & Quality

Method Comparison:

PCA: Best for interpretability and global structure preservation
t-SNE: Best for revealing local cluster patterns (perplexity = 50)
UMAP: Balances local and global structure, faster than t-SNE

2 Data Loading & Preparation

We load the pre-processed dataset from the clustering analysis, which already has anomalies removed and cluster assignments.

# Load pre-cleaned data with clusters from clustering analysis
clean_file <- "results/appstore_with_clusters.csv"

if (file.exists(clean_file)) {
  appstore <- read.csv(clean_file, stringsAsFactors = FALSE)
  has_clusters <- "cluster" %in% names(appstore)
  cat("Loaded pre-cleaned dataset with cluster assignments\n")
} else {
  # Fallback to original data if clustering hasn't been run
  appstore <- read.csv("DataSets/AppleStore.csv", stringsAsFactors = FALSE)
  has_clusters <- FALSE
  cat("Note: Run clustering_report.Rmd first for cluster-colored visualizations\n")
}

## Loaded pre-cleaned dataset with cluster assignments

data.frame(
  Metric = c("Total Apps", "Variables", "Has Clusters"),
  Value = c(format(nrow(appstore), big.mark = ","), ncol(appstore), ifelse(has_clusters, "Yes (3 clusters)", "No"))
) %>%
  kable(caption = "Dataset Overview") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

Dataset Overview
Metric	Value
Total Apps	6,847
Variables	34
Has Clusters	Yes (3 clusters)

if (has_clusters) {
  cat("Cluster distribution:\n")
  print(table(appstore$cluster))
}

## Cluster distribution:
## 
##    1    2    3 
## 2517 2760 1570

cat("Dataset dimensions:", nrow(appstore), "apps ×", ncol(appstore), "variables\n")

## Dataset dimensions: 6847 apps × 34 variables

3 Feature Engineering

# Create numeric encoding for categorical variables
appstore$cont_rating_num <- as.numeric(factor(appstore$cont_rating))
appstore$prime_genre_num <- as.numeric(factor(appstore$prime_genre))

# Create price tiers and engineered features
appstore <- appstore %>%
  mutate(
    price_tier = cut(price,
                     breaks = c(-Inf, 0, 2.99, 9.99, 29.99, Inf),
                     labels = c("Free", "Budget", "Mid", "Premium", "Ultra")),
    
    # Log transformations for skewed variables
    log_rating_count = log1p(rating_count_tot),
    log_size = log1p(size_bytes),
    log_price = log1p(price),
    
    # Engagement and quality metrics
    engagement_ratio = ifelse(rating_count_tot > 0,
                              rating_count_ver / rating_count_tot, 0),
    quality_delta = user_rating_ver - user_rating,
    
    # Composite indices
    international_index = ifelse(size_bytes > 0,
                                 (lang.num * sup_devices.num) / log1p(size_bytes), 0),
    richness_score = ipadSc_urls.num + lang.num
  )

# Feature summary table
data.frame(
  Feature = c("log_size", "log_price", "log_rating_count", "engagement_ratio", 
              "quality_delta", "international_index", "richness_score", "price_tier"),
  Purpose = c("Normalize size distribution", "Normalize price distribution",
              "Normalize popularity distribution", "Version engagement rate",
              "Rating trend direction", "Global reach potential",
              "App comprehensiveness", "Categorical price segments")
) %>%
  kable(caption = "Engineered Features") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))

Engineered Features
Feature	Purpose
log_size	Normalize size distribution
log_price	Normalize price distribution
log_rating_count	Normalize popularity distribution
engagement_ratio	Version engagement rate
quality_delta	Rating trend direction
international_index	Global reach potential
richness_score	App comprehensiveness
price_tier	Categorical price segments

3.0.1 Key Findings

8 new features created from original variables
Log transformations applied to highly skewed variables (size, price, ratings)
Composite indices capture multi-dimensional concepts (international reach, app richness)

4 Variable Selection & Standardization

# Select numerical variables for dimension reduction
clustering_vars <- c(
  "log_price", "log_size", "log_rating_count",
  "user_rating", "user_rating_ver",
  "cont_rating_num", "prime_genre_num",
  "sup_devices.num", "ipadSc_urls.num",
  "lang.num", "vpp_lic",
  "engagement_ratio", "quality_delta",
  "international_index", "richness_score"
)

cat("Selected", length(clustering_vars), "variables for dimension reduction:\n")

## Selected 15 variables for dimension reduction:

print(clustering_vars)

##  [1] "log_price"           "log_size"            "log_rating_count"   
##  [4] "user_rating"         "user_rating_ver"     "cont_rating_num"    
##  [7] "prime_genre_num"     "sup_devices.num"     "ipadSc_urls.num"    
## [10] "lang.num"            "vpp_lic"             "engagement_ratio"   
## [13] "quality_delta"       "international_index" "richness_score"

# Extract and clean the data
data_for_reduction <- appstore[, clustering_vars]

# Handle missing/infinite values
data_for_reduction[is.na(data_for_reduction)] <- 0
data_for_reduction[is.infinite(as.matrix(data_for_reduction))] <- 0

# Standardize the data (Z-score normalization)
data_scaled <- scale(data_for_reduction)

# Handle any NA/NaN values that may arise from zero-variance columns
data_scaled[is.na(data_scaled)] <- 0
data_scaled[is.nan(data_scaled)] <- 0

data_scaled_df <- as.data.frame(data_scaled)

cat("\nScaled data dimensions:", nrow(data_scaled_df), "×", ncol(data_scaled_df))

## 
## Scaled data dimensions: 6847 × 15

5 Anomaly Detection

Before dimension reduction, we remove outliers using Isolation Forest to prevent them from distorting principal components and visualizations.

# Set seed to match clustering analysis (ensures same anomalies removed)
set.seed(123)

# Train Isolation Forest
iso_forest <- isolationForest$new(sample_size = 256, num_trees = 100, max_depth = ceiling(log2(256)))
iso_forest$fit(as.data.frame(data_scaled))
anomaly_scores <- iso_forest$predict(as.data.frame(data_scaled))

# Add scores and identify anomalies (top 5%)
appstore$anomaly_score <- anomaly_scores$anomaly_score
threshold <- quantile(anomaly_scores$anomaly_score, 0.95)
is_anomaly <- anomaly_scores$anomaly_score > threshold

data.frame(
  Metric = c("Original dataset", "Anomalies detected", "Clean dataset"),
  Value = c(paste(nrow(appstore), "apps"), 
            paste(sum(is_anomaly), "(", round(mean(is_anomaly) * 100, 1), "%)"),
            paste(sum(!is_anomaly), "apps"))
) %>%
  kable(caption = "Anomaly Detection Results") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

Anomaly Detection Results
Metric	Value
Original dataset	6847 apps
Anomalies detected	336 ( 4.9 %)
Clean dataset	6511 apps

par(mfrow = c(1, 2))

# Histogram of anomaly scores
hist(anomaly_scores$anomaly_score, breaks = 50, 
     main = "Distribution of Anomaly Scores", 
     xlab = "Anomaly Score", col = "lightblue", border = "white")
abline(v = threshold, col = "red", lwd = 2, lty = 2)
legend("topright", legend = "95th percentile", col = "red", lty = 2, lwd = 2)

# Anomaly score vs log rating count
plot(appstore$log_rating_count, anomaly_scores$anomaly_score, 
     main = "Anomaly Score vs Popularity", 
     xlab = "Log(Rating Count + 1)", ylab = "Anomaly Score", pch = 16, 
     col = ifelse(is_anomaly, "red", "gray70"), cex = 0.5)
abline(h = threshold, col = "red", lwd = 2, lty = 2)

Anomaly Score Distribution

# Remove anomalies from all datasets
data_scaled <- data_scaled[!is_anomaly, ]
data_scaled_df <- data_scaled_df[!is_anomaly, ]
appstore <- appstore[!is_anomaly, ]
data_for_reduction <- data_for_reduction[!is_anomaly, ]

cat("Clean dataset:", nrow(appstore), "apps (anomalies removed)")

## Clean dataset: 6511 apps (anomalies removed)

5.0.1 Key Findings

Isolation Forest identified extreme outliers based on multi-dimensional patterns
5% threshold removes apps with unusual feature combinations
Cleaner PCA - Anomalies would distort principal components and visualizations
Consistency with clustering analysis which used the same anomaly removal

6 Correlation Analysis

Understanding correlations between variables justifies the need for dimension reduction and helps interpret principal components.

# Remove cluster column if present for correlation calculation
cor_data <- data_scaled_df[, !names(data_scaled_df) %in% c("cluster")]

# Remove any columns with zero variance (cause NA in correlation)
col_vars <- apply(cor_data, 2, var, na.rm = TRUE)
valid_cols <- col_vars > 1e-10 & !is.na(col_vars)
cor_data <- cor_data[, valid_cols]

cor_matrix <- cor(cor_data, use = "pairwise.complete.obs")

# Replace any remaining NA with 0 for visualization
cor_matrix[is.na(cor_matrix)] <- 0

corrplot(cor_matrix,
         method = "color",
         type = "upper",
         order = "hclust",
         tl.col = "black",
         tl.srt = 45,
         tl.cex = 0.8,
         title = "Correlation Matrix - App Store Variables",
         mar = c(0, 0, 2, 0))

Correlation Matrix of App Features

# Identify highly correlated pairs
high_cor <- which(abs(cor_matrix) > 0.7 & abs(cor_matrix) < 1, arr.ind = TRUE)

if (nrow(high_cor) > 0) {
  high_cor_pairs <- data.frame(
    Variable_1 = character(),
    Variable_2 = character(),
    Correlation = numeric()
  )
  
  for (i in 1:nrow(high_cor)) {
    if (high_cor[i, 1] < high_cor[i, 2]) {
      high_cor_pairs <- rbind(high_cor_pairs, data.frame(
        Variable_1 = colnames(cor_matrix)[high_cor[i, 1]],
        Variable_2 = colnames(cor_matrix)[high_cor[i, 2]],
        Correlation = round(cor_matrix[high_cor[i, 1], high_cor[i, 2]], 3)
      ))
    }
  }
  
  high_cor_pairs %>%
    arrange(desc(abs(Correlation))) %>%
    kable(caption = "Highly Correlated Variable Pairs (|r| > 0.7)") %>%
    kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)
}

Highly Correlated Variable Pairs (|r| > 0.7)
Variable_1	Variable_2	Correlation
lang.num	international_index	0.993
lang.num	richness_score	0.942
international_index	richness_score	0.930
user_rating	user_rating_ver	0.788

6.0.1 Key Findings

Strong correlations between richness_score and lang.num (expected, as richness includes language count)
Moderate correlations between rating metrics and engagement features
Multicollinearity justifies dimension reduction to avoid redundant information

7 Principal Component Analysis (PCA)

7.1 Variance Explained

pca_result <- PCA(data_scaled_df, graph = FALSE, ncp = ncol(data_scaled_df))

# Extract eigenvalues
eigenvalues <- get_eigenvalue(pca_result)
if (is.matrix(eigenvalues)) {
  eigenvalues <- as.data.frame(eigenvalues)
}

# Display first 10 components
eigenvalues %>%
  head(10) %>%
  mutate(
    Component = 1:10,
    eigenvalue = round(eigenvalue, 3),
    variance.percent = round(variance.percent, 2),
    cumulative.variance.percent = round(cumulative.variance.percent, 2)
  ) %>%
  select(Component, eigenvalue, variance.percent, cumulative.variance.percent) %>%
  kable(
    col.names = c("Component", "Eigenvalue", "Variance %", "Cumulative %"),
    caption = "Variance Explained by Principal Components"
  ) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

Variance Explained by Principal Components
	Component	Eigenvalue	Variance %	Cumulative %
Dim.1	1	3.815	27.25	27.25
Dim.2	2	2.047	14.62	41.87
Dim.3	3	1.453	10.38	52.25
Dim.4	4	1.249	8.92	61.17
Dim.5	5	1.094	7.82	68.99
Dim.6	6	1.001	7.15	76.14
Dim.7	7	0.916	6.55	82.68
Dim.8	8	0.785	5.60	88.29
Dim.9	9	0.726	5.18	93.47
Dim.10	10	0.675	4.82	98.29

cumvar <- eigenvalues$cumulative.variance.percent
optimal_pcs_80 <- which(cumvar >= 80)[1]
optimal_pcs_90 <- which(cumvar >= 90)[1]

cat("Components for 80% variance:", optimal_pcs_80, "\n")

## Components for 80% variance: 7

cat("Components for 90% variance:", optimal_pcs_90, "\n")

## Components for 90% variance: 9

cat("Dimension reduction:", round((1 - optimal_pcs_80 / ncol(data_scaled_df)) * 100, 1), "%")

## Dimension reduction: 53.3 %

7.2 Scree Plot

fviz_eig(pca_result,
         addlabels = TRUE,
         ylim = c(0, 30),
         main = "Scree Plot - Variance Explained by Components",
         barfill = "steelblue",
         barcolor = "steelblue",
         linecolor = "red")

Scree Plot - Variance Explained by Components

7.3 Cumulative Variance

plot(1:nrow(eigenvalues), eigenvalues$cumulative.variance.percent,
     type = "b", pch = 19, col = "darkblue",
     xlab = "Number of Components",
     ylab = "Cumulative Variance Explained (%)",
     main = "Cumulative Variance Explained by Principal Components",
     ylim = c(0, 100))
abline(h = c(80, 90), col = c("red", "orange"), lty = 2)
abline(v = optimal_pcs_80, col = "red", lty = 2)
text(optimal_pcs_80 + 0.5, 50, paste0(optimal_pcs_80, " PCs\n(80% var)"), pos = 4, col = "red")
grid()

Cumulative Variance Explained

7.3.1 Key Findings

PC1 explains 23.8% of total variance - largest single component
8 components needed for 80% variance (46.7% dimension reduction)
First 2 PCs capture 38.9% - sufficient for 2D visualization

7.4 Variable Contributions

par(mfrow = c(1, 2))
fviz_contrib(pca_result, choice = "var", axes = 1, top = 10,
             title = "Top 10 Variables Contributing to PC1")

Top Contributing Variables to PC1 and PC2

fviz_contrib(pca_result, choice = "var", axes = 2, top = 10,
             title = "Top 10 Variables Contributing to PC2")

Top Contributing Variables to PC1 and PC2

loadings <- get_pca_var(pca_result)$coord
loadings_df <- as.data.frame(loadings)
loadings_df$Variable <- rownames(loadings_df)

# Rename Dim columns to PC for clarity
names(loadings_df) <- gsub("Dim\\.", "PC", names(loadings_df))

# Top PC1 loadings
pc1_top <- loadings_df %>%
  arrange(desc(abs(PC1))) %>%
  head(5) %>%
  select(Variable, PC1) %>%
  mutate(PC1 = round(PC1, 3))

# Top PC2 loadings
pc2_top <- loadings_df %>%
  arrange(desc(abs(PC2))) %>%
  head(5) %>%
  select(Variable, PC2) %>%
  mutate(PC2 = round(PC2, 3))

cat("PC1 - Top Contributing Variables (International Reach & Popularity):\n")

## PC1 - Top Contributing Variables (International Reach & Popularity):

kable(pc1_top, col.names = c("Variable", "PC1 Loading")) %>%
  kable_styling(bootstrap_options = c("striped"), full_width = FALSE)

	Variable	PC1 Loading
richness_score	richness_score	0.882
lang.num	lang.num	0.811
international_index	international_index	0.796
user_rating_ver	user_rating_ver	0.687
log_rating_count	log_rating_count	0.652

cat("\nPC2 - Top Contributing Variables (Engagement & Quality):\n")

## 
## PC2 - Top Contributing Variables (Engagement & Quality):

kable(pc2_top, col.names = c("Variable", "PC2 Loading")) %>%
  kable_styling(bootstrap_options = c("striped"), full_width = FALSE)

	Variable	PC2 Loading
user_rating_ver	user_rating_ver	0.603
engagement_ratio	engagement_ratio	0.551
international_index	international_index	-0.548
lang.num	lang.num	-0.534
user_rating	user_rating	0.481

7.5 Variable Correlation Circle

fviz_pca_var(pca_result,
             col.var = "contrib",
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE,
             title = "PCA - Variable Correlation Circle")

PCA Variable Correlation Circle

7.5.1 Key Findings

PC1 (International Reach): Dominated by richness_score, lang.num, international_index
- High PC1 → Apps with broad international support and popularity
PC2 (Engagement & Quality): Dominated by user_rating_ver, user_rating, engagement_ratio
- High PC2 → Apps with strong user engagement and quality ratings

7.6 Integration with Clustering

We perform K-means clustering (k=3) on the scaled data to visualize how clusters appear in reduced dimensional space.

# Perform K-means clustering (k=3) - same as clustering project
set.seed(123)
k_optimal <- 3
km_result <- kmeans(data_scaled, centers = k_optimal, nstart = 50, iter.max = 300)

# Add cluster assignments
appstore$cluster <- km_result$cluster
data_scaled_df$cluster <- km_result$cluster

cat("K-means clustering (k =", k_optimal, ") completed\n")

## K-means clustering (k = 3 ) completed

cat("Cluster distribution:\n")

## Cluster distribution:

print(table(appstore$cluster))

## 
##    1    2    3 
## 1509 3545 1457

# Cluster sizes
data.frame(
  Cluster = 1:k_optimal,
  Size = as.numeric(table(km_result$cluster)),
  Percentage = paste0(round(as.numeric(table(km_result$cluster)) / nrow(appstore) * 100, 1), "%")
) %>%
  kable(caption = "K-means Cluster Distribution") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

K-means Cluster Distribution
Cluster	Size	Percentage
1	1509	23.2%
2	3545	54.4%
3	1457	22.4%

# Calculate cluster profiles (mean values for each cluster)
cluster_profiles <- appstore %>%
  group_by(cluster) %>%
  summarise(
    n_apps = n(),
    avg_price = mean(price, na.rm = TRUE),
    avg_rating = mean(user_rating, na.rm = TRUE),
    avg_rating_count = mean(rating_count_tot, na.rm = TRUE),
    avg_size_mb = mean(size_bytes / 1e6, na.rm = TRUE),
    avg_languages = mean(lang.num, na.rm = TRUE),
    avg_devices = mean(sup_devices.num, na.rm = TRUE),
    pct_free = mean(price == 0, na.rm = TRUE) * 100,
    .groups = "drop"
  )

# Determine cluster characteristics based on profiles
cluster_labels <- sapply(1:k_optimal, function(i) {
  profile <- cluster_profiles[cluster_profiles$cluster == i, ]
  
  # Identify dominant characteristics
  chars <- c()
  
  if (profile$pct_free > 70) {
    chars <- c(chars, "Free/Low-cost")
  } else if (profile$avg_price > 5) {
    chars <- c(chars, "Premium-priced")
  }
  
  if (profile$avg_rating_count > mean(cluster_profiles$avg_rating_count) * 1.5) {
    chars <- c(chars, "High popularity")
  } else if (profile$avg_rating_count < mean(cluster_profiles$avg_rating_count) * 0.5) {
    chars <- c(chars, "Niche/Low visibility")
  }
  
  if (profile$avg_languages > mean(cluster_profiles$avg_languages) * 1.3) {
    chars <- c(chars, "International reach")
  }
  
  if (profile$avg_rating > 4.0) {
    chars <- c(chars, "High quality")
  }
  
  if (length(chars) == 0) chars <- "Mainstream apps"
  paste(chars, collapse = ", ")
})

# Display cluster interpretation
cluster_interpretation <- data.frame(
  Cluster = 1:k_optimal,
  Apps = cluster_profiles$n_apps,
  Label = cluster_labels,
  Avg_Price = paste0("$", round(cluster_profiles$avg_price, 2)),
  Avg_Rating = round(cluster_profiles$avg_rating, 2),
  Avg_Downloads = format(round(cluster_profiles$avg_rating_count), big.mark = ","),
  Avg_Languages = round(cluster_profiles$avg_languages, 1),
  Pct_Free = paste0(round(cluster_profiles$pct_free, 1), "%")
)

cluster_interpretation %>%
  kable(
    col.names = c("Cluster", "# Apps", "Interpretation", "Avg Price", "Avg Rating", 
                  "Avg Rating Count", "Avg Languages", "% Free"),
    caption = "Cluster Profiles and Interpretation"
  ) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

Cluster Profiles and Interpretation
Cluster	# Apps	Interpretation	Avg Price	Avg Rating	Avg Rating Count	Avg Languages	% Free
1	1509	Free/Low-cost, Niche/Low visibility	$0.8	1.47	493	1.9	72%
2	3545	High quality	$1.81	4.11	8,841	1.7	50.1%
3	1457	High popularity, International reach, High quality	$1.58	4.27	24,598	12.5	59.7%

cat("\n### Cluster Interpretation\n\n")

7.6.1 Cluster Interpretation

for (i in 1:k_optimal) {
  profile <- cluster_profiles[cluster_profiles$cluster == i, ]
  
  cat(paste0("**Cluster ", i, " - ", cluster_labels[i], "**\n\n"))
  cat(paste0("- **Size**: ", format(profile$n_apps, big.mark = ","), " apps (", 
             round(profile$n_apps / nrow(appstore) * 100, 1), "%)\n"))
  cat(paste0("- **Average Price**: $", round(profile$avg_price, 2), 
             " (", round(profile$pct_free, 0), "% free)\n"))
  cat(paste0("- **Average Rating**: ", round(profile$avg_rating, 2), " stars\n"))
  cat(paste0("- **Average Popularity**: ", format(round(profile$avg_rating_count), big.mark = ","), 
             " ratings\n"))
  cat(paste0("- **International Support**: ", round(profile$avg_languages, 1), " languages\n\n"))
}

Cluster 1 - Free/Low-cost, Niche/Low visibility

Size: 1,509 apps (23.2%)
Average Price: $0.8 (72% free)
Average Rating: 1.47 stars
Average Popularity: 493 ratings
International Support: 1.9 languages

Cluster 2 - High quality

Size: 3,545 apps (54.4%)
Average Price: $1.81 (50% free)
Average Rating: 4.11 stars
Average Popularity: 8,841 ratings
International Support: 1.7 languages

Cluster 3 - High popularity, International reach, High quality

Size: 1,457 apps (22.4%)
Average Price: $1.58 (60% free)
Average Rating: 4.27 stars
Average Popularity: 24,598 ratings
International Support: 12.5 languages

7.6.2 How to Read the PCA Cluster Plot

The PCA plot displays apps in a 2D space based on their characteristics:

Understanding the Axes:

PC1 (Dim1, horizontal axis): Represents International Reach & Popularity
- Apps on the right have more languages, broader device support, higher popularity
- Apps on the left have fewer languages, limited reach, lower visibility
PC2 (Dim2, vertical axis): Represents User Engagement & Quality
- Apps at the top have higher ratings (4+ stars), better user engagement
- Apps at the bottom have lower ratings, less user engagement

Cluster Positions Explained:

Cluster	Position on Plot	Why?
Cluster 1 (Green)	Bottom-left quadrant	Low languages (1.9), low ratings (1.47★), low popularity → Low on both PC1 and PC2
Cluster 2 (Orange)	Top-center area	High quality (4.11★) but average international reach → High PC2, medium PC1
Cluster 3 (Blue)	Right side	Many languages (12.5), high popularity (24,598 ratings) → High PC1, varies on PC2

Visual Guide:

                    High Quality (PC2+)
                          ↑
                    ┌─────┴─────┐
                    │  Cluster 2 │
                    │  (Orange)  │
                    │ 4.11★ avg  │
    Low Reach ←─────┼───────────┼─────→ High Reach (PC1+)
    (PC1-)          │           │       Many Languages
                    │ Cluster 1 │       High Popularity
                    │  (Green)  │       Cluster 3 (Blue)
                    │ 1.47★ avg │       12.5 languages
                    └─────┬─────┘
                          ↓
                    Low Quality (PC2-)

fviz_pca_ind(pca_result,
             geom.ind = "point",
             col.ind = as.factor(appstore$cluster),
             palette = "Set2",
             addEllipses = TRUE,
             ellipse.type = "convex",
             legend.title = "Cluster",
             title = "PCA - Apps Colored by K-means Clusters (k=3)",
             pointsize = 1.5,
             alpha.ind = 0.6)

PCA - Apps Colored by K-means Clusters

7.7 PCA Biplot

fviz_pca_biplot(pca_result,
                geom.ind = "point",
                pointsize = 0.5,
                alpha.ind = 0.3,
                col.var = "red",
                repel = TRUE,
                title = "PCA Biplot - Apps and Variables")

PCA Biplot - Apps and Variables Combined

7.8 Interactive 3D PCA

pca_coords <- get_pca_ind(pca_result)$coord[, 1:3]

plot_data <- data.frame(
  PC1 = pca_coords[, 1],
  PC2 = pca_coords[, 2],
  PC3 = pca_coords[, 3],
  price_tier = appstore$price_tier,
  track_name = appstore$track_name
)

plot_data$cluster <- as.factor(appstore$cluster)

plot_ly(plot_data, x = ~PC1, y = ~PC2, z = ~PC3,
        color = ~cluster, colors = "Set2",
        type = "scatter3d", mode = "markers",
        marker = list(size = 3, opacity = 0.6),
        text = ~track_name, hoverinfo = "text") %>%
  layout(
    title = "3D PCA - Colored by K-means Clusters (k=3)",
    scene = list(
      xaxis = list(title = paste0("PC1 (", round(eigenvalues$variance.percent[1], 1), "%)")),
      yaxis = list(title = paste0("PC2 (", round(eigenvalues$variance.percent[2], 1), "%)")),
      zaxis = list(title = paste0("PC3 (", round(eigenvalues$variance.percent[3], 1), "%)"))
    )
  )

Interactive 3D PCA Visualization

8 t-SNE Analysis

t-SNE (t-Distributed Stochastic Neighbor Embedding) is a non-linear technique that excels at preserving local structure and revealing cluster patterns in high-dimensional data.

# Sample data for t-SNE (computational efficiency)
if (nrow(data_scaled_df) > 5000) {
  cat("Sampling 5000 apps for t-SNE analysis...\n")
  sample_idx <- sample(1:nrow(data_scaled_df), 5000)
  data_tsne <- data_scaled_df[sample_idx, ]
  appstore_tsne <- appstore[sample_idx, ]
} else {
  sample_idx <- 1:nrow(data_scaled_df)
  data_tsne <- data_scaled_df
  appstore_tsne <- appstore
}

## Sampling 5000 apps for t-SNE analysis...

cat("t-SNE sample size:", nrow(data_tsne), "apps\n")

## t-SNE sample size: 5000 apps

8.1 Perplexity Comparison

Perplexity controls the balance between local and global structure. We compare three values:

cat("Running t-SNE with perplexity = 30...\n")

## Running t-SNE with perplexity = 30...

tsne_30 <- Rtsne(data_tsne, dims = 2, perplexity = 30, verbose = FALSE, max_iter = 500)

cat("Running t-SNE with perplexity = 50...\n")

## Running t-SNE with perplexity = 50...

tsne_50 <- Rtsne(data_tsne, dims = 2, perplexity = 50, verbose = FALSE, max_iter = 500)

cat("Running t-SNE with perplexity = 100...\n")

## Running t-SNE with perplexity = 100...

tsne_100 <- Rtsne(data_tsne, dims = 2, perplexity = 100, verbose = FALSE, max_iter = 500)

tsne_30_df <- data.frame(
  tSNE1 = tsne_30$Y[, 1], tSNE2 = tsne_30$Y[, 2],
  price_tier = appstore_tsne$price_tier
)

tsne_50_df <- data.frame(
  tSNE1 = tsne_50$Y[, 1], tSNE2 = tsne_50$Y[, 2],
  price_tier = appstore_tsne$price_tier
)

tsne_100_df <- data.frame(
  tSNE1 = tsne_100$Y[, 1], tSNE2 = tsne_100$Y[, 2],
  price_tier = appstore_tsne$price_tier
)

# Add cluster assignments
tsne_30_df$cluster <- as.factor(appstore_tsne$cluster)
tsne_50_df$cluster <- as.factor(appstore_tsne$cluster)
tsne_100_df$cluster <- as.factor(appstore_tsne$cluster)

p1 <- ggplot(tsne_30_df, aes(x = tSNE1, y = tSNE2, color = price_tier)) +
  geom_point(alpha = 0.6, size = 1.5) +
  scale_color_brewer(palette = "Set2") +
  theme_minimal() +
  labs(title = "Perplexity = 30", color = "Price Tier") +
  theme(legend.position = "bottom")

p2 <- ggplot(tsne_50_df, aes(x = tSNE1, y = tSNE2, color = price_tier)) +
  geom_point(alpha = 0.6, size = 1.5) +
  scale_color_brewer(palette = "Set2") +
  theme_minimal() +
  labs(title = "Perplexity = 50", color = "Price Tier") +
  theme(legend.position = "bottom")

p3 <- ggplot(tsne_100_df, aes(x = tSNE1, y = tSNE2, color = price_tier)) +
  geom_point(alpha = 0.6, size = 1.5) +
  scale_color_brewer(palette = "Set2") +
  theme_minimal() +
  labs(title = "Perplexity = 100", color = "Price Tier") +
  theme(legend.position = "bottom")

grid.arrange(p1, p2, p3, ncol = 3, top = "t-SNE Perplexity Comparison")

t-SNE with Different Perplexity Values

8.2 t-SNE by Clusters

ggplot(tsne_50_df, aes(x = tSNE1, y = tSNE2, color = cluster)) +
  geom_point(alpha = 0.6, size = 2) +
  scale_color_brewer(palette = "Set2") +
  theme_minimal() +
  labs(
    title = "t-SNE Projection Colored by K-means Clusters (Perplexity = 50)",
    subtitle = "Non-linear dimension reduction reveals cluster structure",
    color = "Cluster"
  ) +
  theme(
    legend.position = "right",
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 10)
  )

t-SNE Colored by K-means Clusters (Perplexity = 50)

8.2.1 Key Findings

Perplexity 30: Tight, well-separated clusters but may miss global patterns
Perplexity 50: Balanced local and global structure (recommended)
Perplexity 100: More emphasis on global structure, clusters less distinct
Free apps form a distinct cluster in t-SNE space

9 UMAP Analysis

UMAP (Uniform Manifold Approximation and Projection) is a newer technique that often preserves both local and global structure better than t-SNE, while being faster to compute.

cat("Running UMAP with default parameters...\n")

## Running UMAP with default parameters...

umap_default <- umap(data_tsne)

cat("Running UMAP with n_neighbors=15, min_dist=0.1...\n")

## Running UMAP with n_neighbors=15, min_dist=0.1...

umap_custom <- umap(data_tsne, n_neighbors = 15, min_dist = 0.1)

umap_default_df <- data.frame(
  UMAP1 = umap_default$layout[, 1],
  UMAP2 = umap_default$layout[, 2],
  price_tier = appstore_tsne$price_tier
)

umap_custom_df <- data.frame(
  UMAP1 = umap_custom$layout[, 1],
  UMAP2 = umap_custom$layout[, 2],
  price_tier = appstore_tsne$price_tier
)

# Add cluster assignments
umap_default_df$cluster <- as.factor(appstore_tsne$cluster)
umap_custom_df$cluster <- as.factor(appstore_tsne$cluster)

9.1 UMAP Parameter Comparison

p1 <- ggplot(umap_default_df, aes(x = UMAP1, y = UMAP2, color = price_tier)) +
  geom_point(alpha = 0.6, size = 1.5) +
  scale_color_brewer(palette = "Set2") +
  theme_minimal() +
  labs(title = "UMAP (Default Parameters)", color = "Price Tier") +
  theme(legend.position = "bottom")

p2 <- ggplot(umap_custom_df, aes(x = UMAP1, y = UMAP2, color = price_tier)) +
  geom_point(alpha = 0.6, size = 1.5) +
  scale_color_brewer(palette = "Set2") +
  theme_minimal() +
  labs(title = "UMAP (n_neighbors=15, min_dist=0.1)", color = "Price Tier") +
  theme(legend.position = "bottom")

grid.arrange(p1, p2, ncol = 2, top = "UMAP Parameter Comparison")

UMAP with Different Parameters

9.2 UMAP by Clusters

ggplot(umap_custom_df, aes(x = UMAP1, y = UMAP2, color = cluster)) +
  geom_point(alpha = 0.6, size = 2) +
  scale_color_brewer(palette = "Set2") +
  theme_minimal() +
  labs(
    title = "UMAP Projection Colored by K-means Clusters",
    subtitle = "Preserves both local and global structure",
    color = "Cluster"
  ) +
  theme(
    legend.position = "right",
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 10)
  )

UMAP Colored by K-means Clusters

9.2.1 Key Findings

Default UMAP: Good balance between local and global structure
Custom UMAP (n_neighbors=15, min_dist=0.1): Tighter clusters, more local focus
UMAP runs significantly faster than t-SNE on large datasets
Cluster separation is comparable to t-SNE

10 Method Comparison

10.1 Side-by-Side Visualization

# PCA coordinates for sampled data
pca_coords_sample <- get_pca_ind(pca_result)$coord[sample_idx, 1:2]

pca_comparison_df <- data.frame(
  PC1 = pca_coords_sample[, 1],
  PC2 = pca_coords_sample[, 2],
  price_tier = appstore_tsne$price_tier,
  cluster = as.factor(appstore_tsne$cluster)
)

p1 <- ggplot(pca_comparison_df, aes(x = PC1, y = PC2, color = cluster)) +
  geom_point(alpha = 0.5, size = 1.5) +
  scale_color_brewer(palette = "Set2") +
  theme_minimal() +
  labs(title = "PCA", color = "Cluster") +
  theme(legend.position = "bottom")

p2 <- ggplot(tsne_50_df, aes(x = tSNE1, y = tSNE2, color = cluster)) +
  geom_point(alpha = 0.5, size = 1.5) +
  scale_color_brewer(palette = "Set2") +
  theme_minimal() +
  labs(title = "t-SNE (perplexity=50)", color = "Cluster") +
  theme(legend.position = "bottom")

p3 <- ggplot(umap_custom_df, aes(x = UMAP1, y = UMAP2, color = cluster)) +
  geom_point(alpha = 0.5, size = 1.5) +
  scale_color_brewer(palette = "Set2") +
  theme_minimal() +
  labs(title = "UMAP", color = "Cluster") +
  theme(legend.position = "bottom")

grid.arrange(p1, p2, p3, ncol = 3, top = "Dimension Reduction Methods Comparison")

Dimension Reduction Methods Comparison

10.2 Mantel Test Validation

The Mantel test measures how well each method preserves the distance structure from the original high-dimensional space.

cat("Computing distance matrices...\n")

## Computing distance matrices...

# PCA 2D distances
pca_2d <- get_pca_ind(pca_result)$coord[sample_idx, 1:2]
dist_pca <- dist(pca_2d, method = "euclidean")

# t-SNE 2D distances
dist_tsne <- dist(tsne_50$Y, method = "euclidean")

# UMAP 2D distances
dist_umap <- dist(umap_custom$layout, method = "euclidean")

# Original 15D distances
dist_original <- dist(data_scaled_df[sample_idx, ], method = "euclidean")

cat("Running Mantel tests (999 permutations)...\n")

## Running Mantel tests (999 permutations)...

mantel_pca <- mantel(dist_original, dist_pca, method = "pearson", permutations = 999)
mantel_tsne <- mantel(dist_original, dist_tsne, method = "pearson", permutations = 999)
mantel_umap <- mantel(dist_original, dist_umap, method = "pearson", permutations = 999)

mantel_results <- data.frame(
  Method = c("PCA", "t-SNE", "UMAP"),
  Mantel_r = round(c(mantel_pca$statistic, mantel_tsne$statistic, mantel_umap$statistic), 4),
  p_value = c(mantel_pca$signif, mantel_tsne$signif, mantel_umap$signif),
  Significance = c(
    ifelse(mantel_pca$signif < 0.001, "***",
           ifelse(mantel_pca$signif < 0.01, "**",
                  ifelse(mantel_pca$signif < 0.05, "*", "ns"))),
    ifelse(mantel_tsne$signif < 0.001, "***",
           ifelse(mantel_tsne$signif < 0.01, "**",
                  ifelse(mantel_tsne$signif < 0.05, "*", "ns"))),
    ifelse(mantel_umap$signif < 0.001, "***",
           ifelse(mantel_umap$signif < 0.01, "**",
                  ifelse(mantel_umap$signif < 0.05, "*", "ns")))
  ),
  Quality = c(
    ifelse(mantel_pca$statistic > 0.7, "Excellent",
           ifelse(mantel_pca$statistic > 0.5, "Good",
                  ifelse(mantel_pca$statistic > 0.3, "Fair", "Poor"))),
    ifelse(mantel_tsne$statistic > 0.7, "Excellent",
           ifelse(mantel_tsne$statistic > 0.5, "Good",
                  ifelse(mantel_tsne$statistic > 0.3, "Fair", "Poor"))),
    ifelse(mantel_umap$statistic > 0.7, "Excellent",
           ifelse(mantel_umap$statistic > 0.5, "Good",
                  ifelse(mantel_umap$statistic > 0.3, "Fair", "Poor")))
  )
)

mantel_results %>%
  kable(caption = "Mantel Test Results - Structure Preservation") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

Mantel Test Results - Structure Preservation
Method	Mantel_r	p_value	Significance	Quality
PCA	0.6876	0.001	**	Good
t-SNE	0.4250	0.001	**	Fair
UMAP	0.5282	0.001	**	Good

barplot(mantel_results$Mantel_r,
        names.arg = mantel_results$Method,
        col = c("steelblue", "coral", "seagreen"),
        main = "Mantel Test: Structure Preservation Comparison",
        ylab = "Mantel r (correlation)",
        ylim = c(-0.1, 1),
        border = NA)
abline(h = c(0.3, 0.5, 0.7), col = c("orange", "gold", "darkgreen"), lty = 2, lwd = 1.5)
text(1:3, mantel_results$Mantel_r + 0.05,
     sprintf("r = %.3f", mantel_results$Mantel_r), cex = 0.9)
legend("topright",
       legend = c("Excellent (>0.7)", "Good (>0.5)", "Fair (>0.3)"),
       col = c("darkgreen", "gold", "orange"),
       lty = 2, lwd = 2, cex = 0.8)

Mantel Test: Structure Preservation Comparison

10.2.1 Key Findings

PCA best preserves the original distance structure (Mantel r ≈ 0.7)
t-SNE and UMAP show low Mantel r values - this is expected and not bad
- These methods intentionally distort global distances to reveal local cluster patterns
- Low Mantel r indicates they’re doing their job (emphasizing local structure)
Use PCA when you need to preserve original relationships
Use t-SNE/UMAP when you want to visualize cluster structure

10.3 Method Characteristics

data.frame(
  Method = c("PCA", "t-SNE", "UMAP"),
  Type = c("Linear", "Non-linear", "Non-linear"),
  Structure_Preserved = c("Global", "Local", "Local + Global"),
  Interpretability = c("High (loadings)", "Low", "Low"),
  Speed = c("Fast", "Slow", "Moderate"),
  Best_For = c("Feature reduction, interpretability", "Cluster visualization", "Large datasets, balanced view")
) %>%
  kable(caption = "Method Comparison Summary") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))

Method Comparison Summary
Method	Type	Structure_Preserved	Interpretability	Speed	Best_For
PCA	Linear	Global	High (loadings)	Fast	Feature reduction, interpretability
t-SNE	Non-linear	Local	Low	Slow	Cluster visualization
UMAP	Non-linear	Local + Global	Low	Moderate	Large datasets, balanced view

11 Business Insights & Conclusions

11.1 Key Takeaways

Dimensionality Reduction Achieved
- Reduced from 15 dimensions to 8 (80% variance preserved)
- First 2 PCs capture 38.9% for visualization
Principal Component Interpretation
- PC1 (23.8%): International Reach & Popularity
  - High scores: Apps with many languages, broad device support, high ratings
- PC2 (15.1%): User Engagement & Quality
  - High scores: Apps with strong version ratings and engagement
Cluster Validation
- All three methods reveal similar cluster structure
- K-means clusters are visually separable in reduced dimensional space
- Non-linear methods (t-SNE, UMAP) show tighter cluster boundaries
Method Selection Guidelines
- PCA: Use for feature reduction before modeling, interpretable axes
- t-SNE: Use for publication-quality cluster visualizations
- UMAP: Use for large datasets when speed matters

11.2 Limitations

2D projections lose information (only 38.9% variance in PC1+PC2)
t-SNE perplexity choice affects results - always compare multiple values
Non-linear methods are stochastic - results vary between runs

11.3 Recommendations

For App Developers:

Apps in the high PC1 quadrant (international reach) show stronger market penetration
Focus on multi-language support and broad device compatibility

For Further Analysis:

Use PCA scores as input for supervised learning models
Apply clustering directly in reduced PCA space (8 components)
Combine insights with clustering profiles for targeted marketing strategies

12 Save Results

# Create output directory
dir.create("results/dimension_reduction", showWarnings = FALSE, recursive = TRUE)

# Save PCA scores
pca_scores <- as.data.frame(get_pca_ind(pca_result)$coord[, 1:min(10, ncol(get_pca_ind(pca_result)$coord))])
pca_scores$id <- appstore$id
write.csv(pca_scores, "results/dimension_reduction/pca_scores.csv", row.names = FALSE)

# Save PCA loadings
write.csv(loadings_df, "results/dimension_reduction/pca_loadings.csv", row.names = FALSE)

# Save variance explained
variance_explained <- eigenvalues
variance_explained$component <- 1:nrow(variance_explained)
write.csv(variance_explained, "results/dimension_reduction/pca_variance_explained.csv", row.names = FALSE)

# Save t-SNE coordinates
tsne_output <- data.frame(
  id = appstore_tsne$id,
  tsne1_perp30 = tsne_30$Y[, 1],
  tsne2_perp30 = tsne_30$Y[, 2],
  tsne1_perp50 = tsne_50$Y[, 1],
  tsne2_perp50 = tsne_50$Y[, 2],
  tsne1_perp100 = tsne_100$Y[, 1],
  tsne2_perp100 = tsne_100$Y[, 2]
)
write.csv(tsne_output, "results/dimension_reduction/tsne_coordinates.csv", row.names = FALSE)

# Save UMAP coordinates
umap_output <- data.frame(
  id = appstore_tsne$id,
  umap1_default = umap_default$layout[, 1],
  umap2_default = umap_default$layout[, 2],
  umap1_custom = umap_custom$layout[, 1],
  umap2_custom = umap_custom$layout[, 2]
)
write.csv(umap_output, "results/dimension_reduction/umap_coordinates.csv", row.names = FALSE)

# Save Mantel test results
write.csv(mantel_results, "results/dimension_reduction/mantel_test_results.csv", row.names = FALSE)

# Save summary
summary_report <- data.frame(
  Metric = c(
    "Original Dimensions",
    "Optimal PCs (80% variance)",
    "Optimal PCs (90% variance)",
    "PC1 Variance Explained (%)",
    "PC2 Variance Explained (%)",
    "Total Variance (PC1+PC2) (%)",
    "Dimension Reduction (%)",
    "Sample Size for t-SNE/UMAP"
  ),
  Value = c(
    ncol(data_scaled_df),
    optimal_pcs_80,
    optimal_pcs_90,
    round(eigenvalues$variance.percent[1], 2),
    round(eigenvalues$variance.percent[2], 2),
    round(sum(eigenvalues$variance.percent[1:2]), 2),
    round((1 - optimal_pcs_80 / ncol(data_scaled_df)) * 100, 1),
    nrow(data_tsne)
  )
)
write.csv(summary_report, "results/dimension_reduction/summary.csv", row.names = FALSE)

cat("Results saved to results/dimension_reduction/ directory\n")

## Results saved to results/dimension_reduction/ directory

cat("Files: pca_scores.csv, pca_loadings.csv, pca_variance_explained.csv,\n")

## Files: pca_scores.csv, pca_loadings.csv, pca_variance_explained.csv,

cat("       tsne_coordinates.csv, umap_coordinates.csv, mantel_test_results.csv, summary.csv")

##        tsne_coordinates.csv, umap_coordinates.csv, mantel_test_results.csv, summary.csv

13 Session Info

sessionInfo()

## R version 4.5.2 (2025-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Tahoe 26.0
## 
## Matrix products: default
## BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Warsaw
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] future_1.69.0      solitude_1.1.3     vegan_2.7-2        permute_0.9-8     
##  [5] kableExtra_1.4.0   knitr_1.51         reshape2_1.4.5     RColorBrewer_1.1-3
##  [9] gridExtra_2.3      plotly_4.12.0      corrplot_0.95      tidyr_1.3.2       
## [13] dplyr_1.1.4        umap_0.2.10.0      Rtsne_0.17         factoextra_1.0.7  
## [17] ggplot2_4.0.1      FactoMineR_2.13   
## 
## loaded via a namespace (and not attached):
##  [1] rlang_1.1.7          magrittr_2.0.4       otel_0.2.0          
##  [4] compiler_4.5.2       mgcv_1.9-3           png_0.1-8           
##  [7] systemfonts_1.3.1    vctrs_0.7.1          stringr_1.6.0       
## [10] pkgconfig_2.0.3      fastmap_1.2.0        backports_1.5.0     
## [13] labeling_0.4.3       rmarkdown_2.30       purrr_1.2.1         
## [16] xfun_0.56            cachem_1.1.0         jsonlite_2.0.0      
## [19] flashClust_1.01-2    broom_1.0.12         parallel_4.5.2      
## [22] cluster_2.1.8.1      R6_2.6.1             bslib_0.10.0        
## [25] stringi_1.8.7        ranger_0.18.0        reticulate_1.44.1   
## [28] parallelly_1.46.1    car_3.1-3            jquerylib_0.1.4     
## [31] estimability_1.5.1   Rcpp_1.1.1           future.apply_1.20.1 
## [34] Matrix_1.7-4         splines_4.5.2        igraph_2.2.1        
## [37] tidyselect_1.2.1     rstudioapi_0.18.0    abind_1.4-8         
## [40] yaml_2.3.12          codetools_0.2-20     listenv_0.10.0      
## [43] lattice_0.22-7       tibble_3.3.1         plyr_1.8.9          
## [46] withr_3.0.2          S7_0.2.1             askpass_1.2.1       
## [49] evaluate_1.0.5       xml2_1.5.2           pillar_1.11.1       
## [52] ggpubr_0.6.2         carData_3.0-6        DT_0.34.0           
## [55] generics_0.1.4       scales_1.4.0         globals_0.18.0      
## [58] leaps_3.2            glue_1.8.0           emmeans_2.0.1       
## [61] scatterplot3d_0.3-44 lazyeval_0.2.2       tools_4.5.2         
## [64] data.table_1.18.2.1  RSpectra_0.16-2      ggsignif_0.6.4      
## [67] mvtnorm_1.3-3        grid_4.5.2           crosstalk_1.2.2     
## [70] nlme_3.1-168         Formula_1.2-5        cli_3.6.5           
## [73] textshaping_1.0.4    viridisLite_0.4.2    svglite_2.2.2       
## [76] gtable_0.3.6         rstatix_0.7.3        sass_0.4.10         
## [79] digest_0.6.39        ggrepel_0.9.6        lgr_0.5.2           
## [82] htmlwidgets_1.6.4    farver_2.1.2         htmltools_0.5.9     
## [85] lifecycle_1.0.5      httr_1.4.7           multcompView_0.1-10 
## [88] openssl_2.3.4        MASS_7.3-65

Report Generated: 2026-02-01 Author: Mariyam Babayeva

Apple App Store Dimension Reduction Analysis

Mariyam Babayeva

2026-02-01