Impoting and cleaning data

options(repos = c(CRAN = "https://cran.rstudio.com/"))
install.packages("readxl")
## 
## The downloaded binary packages are in
##  /var/folders/qv/ynxnwkw51zz9p5kmfd4gz8bh0000gn/T//Rtmp5u2XxH/downloaded_packages
library(readxl)

mydata <- read_excel("~/Program R/Strategic Management/Anketa TS Strategic Management.xlsx") # Data was partialy cleaned in 1KA and Excel

mydata$ID <- seq(1,nrow(mydata)) # Adding variable ID for better understanding of data

head(mydata)
## # A tibble: 6 × 62
##      Q2    Q3   Q4a   Q4b   Q4c   Q4d   Q4e   Q4f   Q4g    Q6 Q7a_1 Q7b_1 Q7c_1
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1     3     2    -2    -2    -2    -2    -2    -2    -2     3     4     2     3
## 2     4     1     0     1     1     1     1     0     0     3     3     2     4
## 3     4     2    -2    -2    -2    -2    -2    -2    -2     2     2     2     2
## 4     4     1     0     0     1     0     1     0     0     4     4     2     4
## 5     5     1     0     1     0     1     1     0     0     4     4     2     4
## 6     2     1     0     1     0     0     0     0     0     3     2     3     3
## # ℹ 49 more variables: Q7d_1 <dbl>, Q7e_1 <dbl>, Q7f_1 <dbl>, Q7g_1 <dbl>,
## #   Q7h_1 <dbl>, Q8a_1 <dbl>, Q8b_1 <dbl>, Q8c_1 <dbl>, Q8d_1 <dbl>,
## #   Q8e_1 <dbl>, Q8f_1 <dbl>, Q8g_1 <dbl>, Q8h_1 <dbl>, Q8i_1 <dbl>, Q9 <dbl>,
## #   Q10 <dbl>, Q11_Q12a <dbl>, Q11_Q12b <dbl>, Q11_Q12c <dbl>, Q13a_1 <dbl>,
## #   Q13b_1 <dbl>, Q13c_1 <dbl>, Q31_2a_1 <dbl>, Q31_2b_1 <dbl>, Q31_2c_1 <dbl>,
## #   Q31_2d_1 <dbl>, Q31_2e_1 <dbl>, Q31_2f_1 <dbl>, Q15 <dbl>, Q16 <dbl>,
## #   Q17 <dbl>, Q18 <dbl>, Q20 <dbl>, Q21 <dbl>, Q22 <dbl>, Q23 <dbl>, …
mydata$Q2 <- as.numeric(mydata$Q2)
mydata$Q6 <- as.numeric(mydata$Q6)
mydata$Q7a_1 <- as.numeric(mydata$Q7a_1)
mydata$Q7b_1 <- as.numeric(mydata$Q7b_1)
mydata$Q7c_1 <- as.numeric(mydata$Q7c_1)
mydata$Q7d_1 <- as.numeric(mydata$Q7d_1)
mydata$Q7e_1 <- as.numeric(mydata$Q7e_1)
mydata$Q7f_1 <- as.numeric(mydata$Q7f_1)
mydata$Q7g_1 <- as.numeric(mydata$Q7g_1)
mydata$Q7h_1 <- as.numeric(mydata$Q7h_1)
mydata$Q8a_1 <- as.numeric(mydata$Q8a_1)
mydata$Q8b_1 <- as.numeric(mydata$Q8b_1)
mydata$Q8c_1 <- as.numeric(mydata$Q8c_1)
mydata$Q8d_1 <- as.numeric(mydata$Q8d_1)
mydata$Q8e_1 <- as.numeric(mydata$Q8e_1)
mydata$Q8f_1 <- as.numeric(mydata$Q8f_1)
mydata$Q8g_1 <- as.numeric(mydata$Q8g_1)
mydata$Q8h_1 <- as.numeric(mydata$Q8h_1)
mydata$Q8i_1 <- as.numeric(mydata$Q8i_1)
mydata$Q10 <- as.numeric(mydata$Q10)
mydata$Q13a_1 <- as.numeric(mydata$Q13a_1)
mydata$Q13b_1 <- as.numeric(mydata$Q13b_1)
mydata$Q13c_1 <- as.numeric(mydata$Q13c_1)
mydata$Q31_2a_1 <- as.numeric(mydata$Q31_2a_1)
mydata$Q31_2b_1 <- as.numeric(mydata$Q31_2b_1)
mydata$Q31_2c_1 <- as.numeric(mydata$Q31_2c_1)
mydata$Q31_2d_1 <- as.numeric(mydata$Q31_2d_1)
mydata$Q31_2e_1 <- as.numeric(mydata$Q31_2e_1)
mydata$Q31_2f_1 <- as.numeric(mydata$Q31_2f_1)
mydata$Q16 <- as.numeric(mydata$Q16)
# Transforming variables to numeric
mydata$Q3 <- factor(mydata$Q3,
                        levels = c(1,2),
                        labels = c("Yes", "No"))

mydata$Q9 <- factor(mydata$Q9,
                           levels = c(1,2,3),
                           labels = c("Yes","No","Not sure"))

mydata$Q11_Q12a <- factor(mydata$Q11_Q12a,
                           levels = c(1,2,3),
                           labels = c("Yes","No","Not sure"))
mydata$Q11_Q12b <- factor(mydata$Q11_Q12b,
                           levels = c(1,2,3),
                           labels = c("Yes","No","Not sure"))
mydata$Q11_Q12c <- factor(mydata$Q11_Q12c,
                           levels = c(1,2,3),
                           labels = c("Yes","No","Not sure"))


mydata$Q15 <- factor(mydata$Q15,
                           levels = c(1,2,3,4,5,6,7,8,9,10),
                           labels = c("Reels","Posts","Podcast, Video","Reddit","Bank App","University","Books","Family","School","I don't like"))

mydata$Q17 <- factor(mydata$Q17,
                           levels = c(1,2,3),
                           labels = c("Yes","No","Not sure"))

mydata$Q18 <- factor(mydata$Q18,
                           levels = c(1,2,3,4,5,6,7),
                           labels = c("Friends","Family","Financial Experts","Influencers","School","My own research","Other"))


mydata$Q20 <- factor(mydata$Q20,
                           levels = c(1,2,3,4,5),
                           labels = c("A higher return always means it’s safer.","Investments with higher expected returns are usually more risky.","Risk and return are unrelated.","Government bonds always offer the highest returns.","I’m not sure."))

mydata$Q21 <- factor(mydata$Q21,
                           levels = c(1,2,3,4),
                           labels = c("Buy more","Buy same","Buy less","Don't know"))

mydata$Q22 <- factor(mydata$Q22,
                           levels = c(1,2,3,4),
                           labels = c("Earn more","Less risk","No tax","Not sure"))



mydata$Q23 <- factor(mydata$Q23,
                           levels = c(1,2,3,4),
                           labels = c("You’ll always earn more.","It helps reduce risk by spreading your money out.","You won’t have to pay taxes.","Not sure."))


mydata$Q24 <- factor(mydata$Q24,
                           levels = c(1,2,3,4),
                           labels = c("18-20","21-23","24-26","27-29"))


mydata$Q25 <- factor(mydata$Q25,
                           levels = c(1,2,3),
                           labels = c("Man","Woman","Dont want to answer"))

mydata$Q26 <- factor(mydata$Q26,
                           levels = c(1,2,3,4,5),
                           labels = c("High School","University","Employed","Unemployed","Other"))

mydata$Q27 <- factor(mydata$Q27,
                           levels = c(1,2,3),
                           labels = c("High School","Bachelor","Master"))

mydata$Q28 <- factor(mydata$Q28,
                           levels = c(1,2,3),
                           labels = c("Urban","Suburban","Rural"))

mydata$Q29 <- factor(mydata$Q29,
                           levels = c(1,2,3,4,5,6),
                           labels = c("Max 500","500-1000","1000-1500","1500-2000","Min 2000","Dont say"))
mydata$`Financialy literate` <- factor(mydata$`Financialy literate`,
                           levels = c("Yes","No"),
                           labels = c("Financially literate","Financially iliterate"))
summary(mydata)
##        Q2          Q3           Q4a               Q4b         
##  Min.   :1.000   Yes:210   Min.   :-2.0000   Min.   :-2.0000  
##  1st Qu.:2.000   No :116   1st Qu.:-2.0000   1st Qu.:-2.0000  
##  Median :4.000             Median : 0.0000   Median : 0.0000  
##  Mean   :3.337             Mean   :-0.5736   Mean   :-0.3988  
##  3rd Qu.:4.000             3rd Qu.: 0.0000   3rd Qu.: 1.0000  
##  Max.   :5.000             Max.   : 1.0000   Max.   : 1.0000  
##                                                               
##       Q4c               Q4d               Q4e               Q4f         
##  Min.   :-2.0000   Min.   :-2.0000   Min.   :-2.0000   Min.   :-2.0000  
##  1st Qu.:-2.0000   1st Qu.:-2.0000   1st Qu.:-2.0000   1st Qu.:-2.0000  
##  Median : 0.0000   Median : 0.0000   Median : 0.0000   Median : 0.0000  
##  Mean   :-0.5491   Mean   :-0.3742   Mean   :-0.2178   Mean   :-0.6626  
##  3rd Qu.: 0.0000   3rd Qu.: 1.0000   3rd Qu.: 1.0000   3rd Qu.: 0.0000  
##  Max.   : 1.0000   Max.   : 1.0000   Max.   : 1.0000   Max.   : 1.0000  
##                                                                         
##       Q4g               Q6            Q7a_1           Q7b_1      
##  Min.   :-2.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:-2.000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000  
##  Median : 0.000   Median :2.500   Median :3.000   Median :3.000  
##  Mean   :-0.635   Mean   :2.758   Mean   :3.166   Mean   :3.215  
##  3rd Qu.: 0.000   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   : 1.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##                                                                  
##      Q7c_1           Q7d_1           Q7e_1           Q7f_1      
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000  
##  Median :4.000   Median :3.000   Median :3.000   Median :3.000  
##  Mean   :3.439   Mean   :2.979   Mean   :2.972   Mean   :2.813  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##                                                                 
##      Q7g_1           Q7h_1           Q8a_1           Q8b_1           Q8c_1     
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.00  
##  1st Qu.:1.000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:4.000   1st Qu.:3.00  
##  Median :2.000   Median :3.000   Median :4.000   Median :4.000   Median :4.00  
##  Mean   :2.417   Mean   :2.942   Mean   :3.304   Mean   :4.058   Mean   :3.85  
##  3rd Qu.:3.000   3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:5.000   3rd Qu.:5.00  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.00  
##                                                                                
##      Q8d_1           Q8e_1           Q8f_1           Q8g_1           Q8h_1     
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.00  
##  1st Qu.:3.000   1st Qu.:2.000   1st Qu.:3.000   1st Qu.:3.000   1st Qu.:3.00  
##  Median :4.000   Median :3.000   Median :4.000   Median :4.000   Median :4.00  
##  Mean   :3.534   Mean   :2.586   Mean   :3.693   Mean   :3.663   Mean   :3.58  
##  3rd Qu.:4.000   3rd Qu.:3.750   3rd Qu.:4.000   3rd Qu.:5.000   3rd Qu.:4.00  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.00  
##                                                                                
##      Q8i_1              Q9           Q10             Q11_Q12a       Q11_Q12b  
##  Min.   :1.000   Yes     :254   Min.   :-2.000   Yes     :267   Yes     : 95  
##  1st Qu.:4.000   No      : 44   1st Qu.: 1.000   No      : 43   No      :202  
##  Median :4.000   Not sure: 28   Median : 2.000   Not sure: 16   Not sure: 29  
##  Mean   :3.979                  Mean   : 1.463                                
##  3rd Qu.:5.000                  3rd Qu.: 3.000                                
##  Max.   :5.000                  Max.   : 5.000                                
##                                                                               
##      Q11_Q12c       Q13a_1          Q13b_1         Q13c_1        Q31_2a_1    
##  Yes     :212   Min.   :1.000   Min.   :1.00   Min.   :1.00   Min.   :1.000  
##  No      : 87   1st Qu.:1.000   1st Qu.:1.00   1st Qu.:1.00   1st Qu.:4.000  
##  Not sure: 27   Median :3.000   Median :2.00   Median :2.00   Median :4.000  
##                 Mean   :2.564   Mean   :2.04   Mean   :2.38   Mean   :4.175  
##                 3rd Qu.:4.000   3rd Qu.:3.00   3rd Qu.:3.00   3rd Qu.:5.000  
##                 Max.   :5.000   Max.   :5.00   Max.   :5.00   Max.   :5.000  
##                                                                              
##     Q31_2b_1        Q31_2c_1        Q31_2d_1        Q31_2e_1    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:3.000   1st Qu.:4.000   1st Qu.:4.000  
##  Median :4.000   Median :4.000   Median :4.000   Median :4.000  
##  Mean   :3.681   Mean   :3.525   Mean   :4.012   Mean   :4.101  
##  3rd Qu.:4.000   3rd Qu.:4.000   3rd Qu.:5.000   3rd Qu.:5.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##                                                                 
##     Q31_2f_1                 Q15          Q16              Q17     
##  Min.   :1.000   Podcast, Video:60   Min.   :1.000   Yes     : 99  
##  1st Qu.:2.000   Reels         :53   1st Qu.:2.000   No      :166  
##  Median :4.000   Bank App      :53   Median :3.000   Not sure: 61  
##  Mean   :3.261   School        :35   Mean   :3.126                 
##  3rd Qu.:4.000   Books         :27   3rd Qu.:4.000                 
##  Max.   :5.000   Posts         :26   Max.   :5.000                 
##                  (Other)       :72                                 
##                 Q18     
##  Friends          : 40  
##  Family           : 54  
##  Financial Experts:122  
##  Influencers      :  3  
##  School           :  2  
##  My own research  :105  
##  Other            :  0  
##                                                                Q20     
##  A higher return always means it’s safer.                        : 11  
##  Investments with higher expected returns are usually more risky.:280  
##  Risk and return are unrelated.                                  : 10  
##  Government bonds always offer the highest returns.              :  4  
##  I’m not sure.                                                   : 21  
##                                                                        
##                                                                        
##          Q21             Q22     
##  Buy more  :  9   Earn more:  9  
##  Buy same  :  4   Less risk:280  
##  Buy less  :300   No tax   :  4  
##  Don't know: 13   Not sure : 33  
##                                  
##                                  
##                                  
##                                                 Q23         Q24     
##  You’ll always earn more.                         : 21   18-20: 33  
##  It helps reduce risk by spreading your money out.:265   21-23:144  
##  You won’t have to pay taxes.                     :  0   24-26: 94  
##  Not sure.                                        : 40   27-29: 55  
##                                                                     
##                                                                     
##                                                                     
##                   Q25               Q26               Q27            Q28     
##  Man                :190   High School:  8   High School:118   Urban   :229  
##  Woman              :136   University :221   Bachelor   :160   Suburban: 68  
##  Dont want to answer:  0   Employed   : 91   Master     : 48   Rural   : 29  
##                            Unemployed :  6                                   
##                            Other      :  0                                   
##                                                                              
##                                                                              
##         Q29       Q20 correct      Q21 correct      Q22 correcrt   
##  Max 500  :111   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  500-1000 : 61   1st Qu.:1.0000   1st Qu.:1.0000   1st Qu.:1.0000  
##  1000-1500: 47   Median :1.0000   Median :1.0000   Median :1.0000  
##  1500-2000: 51   Mean   :0.8589   Mean   :0.9202   Mean   :0.8589  
##  Min 2000 : 38   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Dont say : 18   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##                                                                    
##   Q23 correct       3 correct                Financialy literate
##  Min.   :0.0000   Min.   :0.000   Financially literate :278     
##  1st Qu.:1.0000   1st Qu.:3.250   Financially iliterate: 48     
##  Median :1.0000   Median :4.000                                 
##  Mean   :0.8129   Mean   :3.451                                 
##  3rd Qu.:1.0000   3rd Qu.:4.000                                 
##  Max.   :1.0000   Max.   :4.000                                 
##                                                                 
##        ID        
##  Min.   :  1.00  
##  1st Qu.: 82.25  
##  Median :163.50  
##  Mean   :163.50  
##  3rd Qu.:244.75  
##  Max.   :326.00  
## 
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
mydata_summary1 <- mydata %>%
  group_by(mydata$Q25) %>%
  summarise(
    'Smaller minimal investment' = mean(Q8a_1, na.rm = TRUE),
    'Smaller management fees' = mean(Q8b_1, na.rm = TRUE),
    'Clearer information about risk and return' = mean(Q8c_1, na.rm = TRUE),
    'Better user experience' = mean(Q8d_1, na.rm = TRUE),
    'ESG focus' = mean(Q8e_1, na.rm = TRUE),
    'Transparent investment strategy' = mean(Q8f_1, na.rm = TRUE),
    'Good customer support' = mean(Q8g_1, na.rm = TRUE),
    'Peer recommendations' = mean(Q8h_1, na.rm = TRUE),
    'Easy to understand content' = mean(Q8i_1, na.rm = TRUE)
  )

print(mydata_summary1)
## # A tibble: 2 × 10
##   `mydata$Q25` `Smaller minimal investment` `Smaller management fees`
##   <fct>                               <dbl>                     <dbl>
## 1 Man                                  3.06                      4.13
## 2 Woman                                3.64                      3.96
## # ℹ 7 more variables: `Clearer information about risk and return` <dbl>,
## #   `Better user experience` <dbl>, `ESG focus` <dbl>,
## #   `Transparent investment strategy` <dbl>, `Good customer support` <dbl>,
## #   `Peer recommendations` <dbl>, `Easy to understand content` <dbl>
library(tidyr)
library(ggplot2)
library(dplyr)

# Rename the grouping column to "Group" for clarity
mydata_summary1 <- mydata_summary1 %>%
  rename(Group = `mydata$Q25`)  # This is crucial

# Pivot longer for ggplot
mydata_long1 <- mydata_summary1 %>%
  pivot_longer(cols = -Group, names_to = "variable", values_to = "mean_score")

# Plot
ggplot(mydata_long1, aes(x = variable, y = mean_score, fill = Group)) +
  geom_col(position = "dodge") +
  theme_minimal() +
  labs(
    title = "Likelyhood of investments into mutual funds based on features by gender",
    x = "Perception Attribute",
    y = "Average Score"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggsave("~/Program R/Strategic Management/Gender.png", width = 12, height = 6, dpi = 300)
library(dplyr)

mydata_summary2 <- mydata %>%
  group_by(mydata$'Financialy literate') %>%
  summarise(
    'Smaller minimal investment' = mean(Q8a_1, na.rm = TRUE),
    'Smaller management fees' = mean(Q8b_1, na.rm = TRUE),
    'Clearer information about risk and return' = mean(Q8c_1, na.rm = TRUE),
    'Better user experience' = mean(Q8d_1, na.rm = TRUE),
    'ESG focus' = mean(Q8e_1, na.rm = TRUE),
    'Transparent investment strategy' = mean(Q8f_1, na.rm = TRUE),
    'Good customer support' = mean(Q8g_1, na.rm = TRUE),
    'Peer recommendations' = mean(Q8h_1, na.rm = TRUE),
    'Easy to understand content' = mean(Q8i_1, na.rm = TRUE)
  )

print(mydata_summary2)
## # A tibble: 2 × 10
##   `mydata$"Financialy literate"` Smaller minimal invest…¹ Smaller management f…²
##   <fct>                                             <dbl>                  <dbl>
## 1 Financially literate                               3.25                   4.13
## 2 Financially iliterate                              3.60                   3.65
## # ℹ abbreviated names: ¹​`Smaller minimal investment`,
## #   ²​`Smaller management fees`
## # ℹ 7 more variables: `Clearer information about risk and return` <dbl>,
## #   `Better user experience` <dbl>, `ESG focus` <dbl>,
## #   `Transparent investment strategy` <dbl>, `Good customer support` <dbl>,
## #   `Peer recommendations` <dbl>, `Easy to understand content` <dbl>
library(tidyr)
library(ggplot2)
library(dplyr)

# Rename the grouping column to "Group" for clarity
mydata_summary2 <- mydata_summary2 %>%
  rename(Group = `mydata$"Financialy literate"`)  # This is crucial

# Pivot longer for ggplot
mydata_long2 <- mydata_summary2 %>%
  pivot_longer(cols = -Group, names_to = "variable", values_to = "mean_score")

# Plot
ggplot(mydata_long2, aes(x = variable, y = mean_score, fill = Group)) +
  geom_col(position = "dodge") +
  theme_minimal() +
  labs(
    title = "Likelyhood of investments into mutual funds based on features by financial literacy",
    x = "Perception Attribute",
    y = "Average Score"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggsave("~/Program R/Strategic Management/FinancialLiteracy.png", width = 12, height = 6, dpi = 300)
library(dplyr)

mydata_summary3 <- mydata %>%
  group_by(mydata$Q24) %>%
  summarise(
    'Smaller minimal investment' = mean(Q8a_1, na.rm = TRUE),
    'Smaller management fees' = mean(Q8b_1, na.rm = TRUE),
    'Clearer information about risk and return' = mean(Q8c_1, na.rm = TRUE),
    'Better user experience' = mean(Q8d_1, na.rm = TRUE),
    'ESG focus' = mean(Q8e_1, na.rm = TRUE),
    'Transparent investment strategy' = mean(Q8f_1, na.rm = TRUE),
    'Good customer support' = mean(Q8g_1, na.rm = TRUE),
    'Peer recommendations' = mean(Q8h_1, na.rm = TRUE),
    'Easy to understand content' = mean(Q8i_1, na.rm = TRUE)
  )

print(mydata_summary3)
## # A tibble: 4 × 10
##   `mydata$Q24` `Smaller minimal investment` `Smaller management fees`
##   <fct>                               <dbl>                     <dbl>
## 1 18-20                                3.58                      4.09
## 2 21-23                                3.26                      4.22
## 3 24-26                                3.33                      3.66
## 4 27-29                                3.22                      4.29
## # ℹ 7 more variables: `Clearer information about risk and return` <dbl>,
## #   `Better user experience` <dbl>, `ESG focus` <dbl>,
## #   `Transparent investment strategy` <dbl>, `Good customer support` <dbl>,
## #   `Peer recommendations` <dbl>, `Easy to understand content` <dbl>
library(tidyr)
library(ggplot2)
library(dplyr)

# Rename the grouping column to "Group" for clarity
mydata_summary3 <- mydata_summary3 %>%
  rename(Group = `mydata$Q24`)  # This is crucial

# Pivot longer for ggplot
mydata_long3 <- mydata_summary3 %>%
  pivot_longer(cols = -Group, names_to = "variable", values_to = "mean_score")

# Plot
ggplot(mydata_long3, aes(x = variable, y = mean_score, fill = Group)) +
  geom_col(position = "dodge") +
  theme_minimal() +
  labs(
    title = "Likelyhood of investments into mutual funds based on features by age group",
    x = "Perception Attribute",
    y = "Average Score"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggsave("~/Program R/Strategic Management/AgeGroup.png", width = 12, height = 6, dpi = 300)
library(dplyr)

mydata_summary4 <- mydata %>%
  group_by(mydata$Q27) %>%
  summarise(
    'Smaller minimal investment' = mean(Q8a_1, na.rm = TRUE),
    'Smaller management fees' = mean(Q8b_1, na.rm = TRUE),
    'Clearer information about risk and return' = mean(Q8c_1, na.rm = TRUE),
    'Better user experience' = mean(Q8d_1, na.rm = TRUE),
    'ESG focus' = mean(Q8e_1, na.rm = TRUE),
    'Transparent investment strategy' = mean(Q8f_1, na.rm = TRUE),
    'Good customer support' = mean(Q8g_1, na.rm = TRUE),
    'Peer recommendations' = mean(Q8h_1, na.rm = TRUE),
    'Easy to understand content' = mean(Q8i_1, na.rm = TRUE)
  )

print(mydata_summary3)
## # A tibble: 4 × 10
##   Group Smaller minimal investme…¹ Smaller management f…² Clearer information …³
##   <fct>                      <dbl>                  <dbl>                  <dbl>
## 1 18-20                       3.58                   4.09                   4.03
## 2 21-23                       3.26                   4.22                   4.09
## 3 24-26                       3.33                   3.66                   3.63
## 4 27-29                       3.22                   4.29                   3.49
## # ℹ abbreviated names: ¹​`Smaller minimal investment`,
## #   ²​`Smaller management fees`, ³​`Clearer information about risk and return`
## # ℹ 6 more variables: `Better user experience` <dbl>, `ESG focus` <dbl>,
## #   `Transparent investment strategy` <dbl>, `Good customer support` <dbl>,
## #   `Peer recommendations` <dbl>, `Easy to understand content` <dbl>
library(tidyr)
library(ggplot2)
library(dplyr)

# Rename the grouping column to "Group" for clarity
mydata_summary4 <- mydata_summary4 %>%
  rename(Group = `mydata$Q27`)  # This is crucial

# Pivot longer for ggplot
mydata_long4 <- mydata_summary4 %>%
  pivot_longer(cols = -Group, names_to = "variable", values_to = "mean_score")

# Plot
ggplot(mydata_long4, aes(x = variable, y = mean_score, fill = Group)) +
  geom_col(position = "dodge") +
  theme_minimal() +
  labs(
    title = "Likelyhood of investments into mutual funds based on features by highest education attained",
    x = "Perception Attribute",
    y = "Average Score"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggsave("~/Program R/Strategic Management/Highest education.png", width = 12, height = 6, dpi = 300)
library(dplyr)

mydata_summary5 <- mydata %>%
  group_by(mydata$Q26) %>%
  summarise(
    'Smaller minimal investment' = mean(Q8a_1, na.rm = TRUE),
    'Smaller management fees' = mean(Q8b_1, na.rm = TRUE),
    'Clearer information about risk and return' = mean(Q8c_1, na.rm = TRUE),
    'Better user experience' = mean(Q8d_1, na.rm = TRUE),
    'ESG focus' = mean(Q8e_1, na.rm = TRUE),
    'Transparent investment strategy' = mean(Q8f_1, na.rm = TRUE),
    'Good customer support' = mean(Q8g_1, na.rm = TRUE),
    'Peer recommendations' = mean(Q8h_1, na.rm = TRUE),
    'Easy to understand content' = mean(Q8i_1, na.rm = TRUE)
  )

print(mydata_summary5)
## # A tibble: 4 × 10
##   `mydata$Q26` `Smaller minimal investment` `Smaller management fees`
##   <fct>                               <dbl>                     <dbl>
## 1 High School                          4                         4   
## 2 University                           3.28                      4.04
## 3 Employed                             3.32                      4.13
## 4 Unemployed                           3                         3.67
## # ℹ 7 more variables: `Clearer information about risk and return` <dbl>,
## #   `Better user experience` <dbl>, `ESG focus` <dbl>,
## #   `Transparent investment strategy` <dbl>, `Good customer support` <dbl>,
## #   `Peer recommendations` <dbl>, `Easy to understand content` <dbl>
library(tidyr)
library(ggplot2)
library(dplyr)

# Rename the grouping column to "Group" for clarity
mydata_summary5 <- mydata_summary5 %>%
  rename(Group = `mydata$Q26`)  # This is crucial

# Pivot longer for ggplot
mydata_long5 <- mydata_summary5 %>%
  pivot_longer(cols = -Group, names_to = "variable", values_to = "mean_score")

# Plot
ggplot(mydata_long5, aes(x = variable, y = mean_score, fill = Group)) +
  geom_col(position = "dodge") +
  theme_minimal() +
  labs(
    title = "Likelyhood of investments into mutual funds based on features by current status",
    x = "Perception Attribute",
    y = "Average Score"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggsave("~/Program R/Strategic Management/CurrentStatus.png", width = 12, height = 6, dpi = 300)
library(dplyr)

mydata_summary6 <- mydata %>%
  group_by(mydata$Q28) %>%
  summarise(
    'Smaller minimal investment' = mean(Q8a_1, na.rm = TRUE),
    'Smaller management fees' = mean(Q8b_1, na.rm = TRUE),
    'Clearer information about risk and return' = mean(Q8c_1, na.rm = TRUE),
    'Better user experience' = mean(Q8d_1, na.rm = TRUE),
    'ESG focus' = mean(Q8e_1, na.rm = TRUE),
    'Transparent investment strategy' = mean(Q8f_1, na.rm = TRUE),
    'Good customer support' = mean(Q8g_1, na.rm = TRUE),
    'Peer recommendations' = mean(Q8h_1, na.rm = TRUE),
    'Easy to understand content' = mean(Q8i_1, na.rm = TRUE)
  )

print(mydata_summary6)
## # A tibble: 3 × 10
##   `mydata$Q28` `Smaller minimal investment` `Smaller management fees`
##   <fct>                               <dbl>                     <dbl>
## 1 Urban                                3.36                      4.07
## 2 Suburban                             2.91                      4   
## 3 Rural                                3.79                      4.07
## # ℹ 7 more variables: `Clearer information about risk and return` <dbl>,
## #   `Better user experience` <dbl>, `ESG focus` <dbl>,
## #   `Transparent investment strategy` <dbl>, `Good customer support` <dbl>,
## #   `Peer recommendations` <dbl>, `Easy to understand content` <dbl>
library(tidyr)
library(ggplot2)
library(dplyr)

# Rename the grouping column to "Group" for clarity
mydata_summary6 <- mydata_summary6 %>%
  rename(Group = `mydata$Q28`)  # This is crucial

# Pivot longer for ggplot
mydata_long6 <- mydata_summary6 %>%
  pivot_longer(cols = -Group, names_to = "variable", values_to = "mean_score")

# Plot
ggplot(mydata_long6, aes(x = variable, y = mean_score, fill = Group)) +
  geom_col(position = "dodge") +
  theme_minimal() +
  labs(
    title = "Likelyhood of investments into mutual funds based on features by area of living",
    x = "Perception Attribute",
    y = "Average Score"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggsave("~/Program R/Strategic Management/AreaOfLiving.png", width = 12, height = 6, dpi = 300)
library(dplyr)

mydata_summary7 <- mydata %>%
  group_by(mydata$Q29) %>%
  summarise(
    'Smaller minimal investment' = mean(Q8a_1, na.rm = TRUE),
    'Smaller management fees' = mean(Q8b_1, na.rm = TRUE),
    'Clearer information about risk and return' = mean(Q8c_1, na.rm = TRUE),
    'Better user experience' = mean(Q8d_1, na.rm = TRUE),
    'ESG focus' = mean(Q8e_1, na.rm = TRUE),
    'Transparent investment strategy' = mean(Q8f_1, na.rm = TRUE),
    'Good customer support' = mean(Q8g_1, na.rm = TRUE),
    'Peer recommendations' = mean(Q8h_1, na.rm = TRUE),
    'Easy to understand content' = mean(Q8i_1, na.rm = TRUE)
  )

print(mydata_summary7)
## # A tibble: 6 × 10
##   `mydata$Q29` `Smaller minimal investment` `Smaller management fees`
##   <fct>                               <dbl>                     <dbl>
## 1 Max 500                              3.38                      4.11
## 2 500-1000                             3.10                      4.15
## 3 1000-1500                            3.47                      3.98
## 4 1500-2000                            3.04                      4.02
## 5 Min 2000                             3.45                      4   
## 6 Dont say                             3.56                      3.89
## # ℹ 7 more variables: `Clearer information about risk and return` <dbl>,
## #   `Better user experience` <dbl>, `ESG focus` <dbl>,
## #   `Transparent investment strategy` <dbl>, `Good customer support` <dbl>,
## #   `Peer recommendations` <dbl>, `Easy to understand content` <dbl>
library(tidyr)
library(ggplot2)
library(dplyr)

# Rename the grouping column to "Group" for clarity
mydata_summary7 <- mydata_summary7 %>%
  rename(Group = `mydata$Q29`)  # This is crucial

# Pivot longer for ggplot
mydata_long7 <- mydata_summary7 %>%
  pivot_longer(cols = -Group, names_to = "variable", values_to = "mean_score")

# Plot
ggplot(mydata_long7, aes(x = variable, y = mean_score, fill = Group)) +
  geom_col(position = "dodge") +
  theme_minimal() +
  labs(
    title = "Likelyhood of investments into mutual funds based on features by average net monthly income",
    x = "Perception Attribute",
    y = "Average Score"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggsave("~/Program R/Strategic Management/NetIncome.png", width = 12, height = 6, dpi = 300)