# Load tidyverse for convenience
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.2
## Warning: package 'ggplot2' was built under R version 4.5.3
## Warning: package 'lubridate' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyverse)
df <- read_csv("C:/Users/user/Downloads/Market research.csv")
## New names:
## Rows: 28 Columns: 12
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): ...1 dbl (11): DGA, Formalighting, XAL, Loupi, Ljusdesign, Stoane, UFO,
## SV, Zumto...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
names(df)
##  [1] "...1"              "DGA"               "Formalighting"    
##  [4] "XAL"               "Loupi"             "Ljusdesign"       
##  [7] "Stoane"            "UFO"               "SV"               
## [10] "Zumtobel"          "Luminii"           "corporate friends"
df_selected <- df %>% 
  select(...1, DGA, Loupi, Ljusdesign, UFO,`corporate friends`)


df_clean <- df_selected %>% 
  slice(1:12)

df_clean <- df_clean %>% 
  rename(Variable = ...1)

df_imputed <- df_clean %>%
  mutate(
    row_mean = rowMeans(across(DGA:UFO), na.rm = TRUE),
    across(DGA:UFO, ~ ifelse(is.na(.x), row_mean, .x))
  ) %>%
  select(-row_mean)
numdata <- df_imputed[c("DGA", "Loupi", "Ljusdesign", "UFO", "corporate friends")]
numdata_scaled <- scale(numdata)
pca <- prcomp(numdata_scaled, center = TRUE, scale. = TRUE)
scores <- as.data.frame(pca$x)
scores
##           PC1         PC2        PC3         PC4         PC5
## 1  -2.4517281 -2.91370817 -0.1460718 -0.22056065  0.11259439
## 2   1.7600911 -0.40253530 -0.3179456  0.26551252  0.09499453
## 3   0.5403199 -0.94223192  0.2992002  0.57139750 -0.63933553
## 4   1.4069217 -0.34122851 -0.3543150 -0.04127012  0.47821912
## 5  -0.9019474  0.72864819  1.7376010 -0.86067912 -0.34307032
## 6  -0.9494411  0.90537412 -0.9079681  0.44153859 -0.59126457
## 7   2.2375036 -0.05288567 -0.3045231 -0.42281373 -0.07072235
## 8  -1.6557798  1.02798768 -0.9807067 -0.17202669  0.17518462
## 9  -0.2838320  0.50773878  1.1299794  0.51687106  0.33946891
## 10  2.2375036 -0.05288567 -0.3045231 -0.42281373 -0.07072235
## 11 -1.6557798  1.02798768 -0.9807067 -0.17202669  0.17518462
## 12 -0.2838320  0.50773878  1.1299794  0.51687106  0.33946891
install.packages("ggplot2")
## Warning: package 'ggplot2' is in use and will not be installed
library(ggplot2)

# Companies (points)
companies <- as.data.frame(pca$rotation[, 1:2])
companies$Company <- rownames(companies)

# Variables (arrows)
vars <- as.data.frame(pca$x[, 1:2])
vars$Variable <- rownames(vars)

# Scale arrows more strongly
arrow_scale <- 4
vars_scaled <- vars
vars_scaled$PC1 <- vars_scaled$PC1 / arrow_scale
vars_scaled$PC2 <- vars_scaled$PC2 / arrow_scale

ggplot() +
  geom_point(data = companies, aes(PC1, PC2), size = 4) +
  geom_text(data = companies, aes(PC1, PC2, label = Company),
            vjust = -0.6) +
  theme_minimal() +
  labs(
    title = "PCA Map of Companies",
    x = "PC1: Traditional offering ↔ Modern / differentiated offering",
    y = "PC2: Service‑oriented ↔ Technical‑oriented"
  )+
  
  scale_x_continuous(expand = expansion(mult = 0.3)) +
  scale_y_continuous(expand = expansion(mult = 0.3))