Practical Task 6

library(readr)
library(ggplot2)
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.3.2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)

Motivation <- read_csv("c:/users/dell/downloads/Motivation.csv")
## Rows: 665 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (14): job_market, job_areas, income, requirement, interest, broader_hori...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Display summary statistics
summary(Motivation)
##    job_market      job_areas        income       requirement       interest    
##  Min.   :1.000   Min.   :1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:1.000   1st Qu.:1.00   1st Qu.:2.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :2.000   Median :2.00   Median :2.000   Median :2.000   Median :2.000  
##  Mean   :1.738   Mean   :1.97   Mean   :2.463   Mean   :2.483   Mean   :1.761  
##  3rd Qu.:2.000   3rd Qu.:2.00   3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:2.000  
##  Max.   :5.000   Max.   :5.00   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##  broader_horizon specializations further_education    friends     
##  Min.   :1.000   Min.   :1.000   Min.   :1.000     Min.   :1.000  
##  1st Qu.:1.000   1st Qu.:2.000   1st Qu.:3.000     1st Qu.:4.000  
##  Median :2.000   Median :2.000   Median :5.000     Median :5.000  
##  Mean   :1.949   Mean   :2.552   Mean   :3.835     Mean   :4.415  
##  3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:5.000     3rd Qu.:5.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000     Max.   :5.000  
##     no_idea          family       student_life   academic_title 
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:3.000   1st Qu.:3.000   1st Qu.:2.000  
##  Median :5.000   Median :5.000   Median :4.000   Median :3.000  
##  Mean   :4.108   Mean   :4.138   Mean   :3.471   Mean   :3.033  
##  3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.:4.000  
##  Max.   :5.000   Max.   :5.000   Max.   :5.000   Max.   :5.000  
##  WU_reputation  
##  Min.   :1.000  
##  1st Qu.:2.000  
##  Median :2.000  
##  Mean   :2.525  
##  3rd Qu.:3.000  
##  Max.   :5.000

Checking for correlations between variables

correlation_matrix <- cor(Motivation)
print(correlation_matrix)
##                     job_market   job_areas      income requirement    interest
## job_market         1.000000000  0.41019350  0.45075453  0.30090409  0.21456939
## job_areas          0.410193504  1.00000000  0.37712136  0.17692839  0.12077740
## income             0.450754529  0.37712136  1.00000000  0.21210709 -0.03705413
## requirement        0.300904092  0.17692839  0.21210709  1.00000000  0.13246736
## interest           0.214569391  0.12077740 -0.03705413  0.13246736  1.00000000
## broader_horizon    0.147702597  0.15639767  0.04357249  0.06833456  0.33236720
## specializations    0.156477928  0.34510943  0.21255949  0.11011069  0.18369769
## further_education  0.001418820 -0.03433017  0.06735935 -0.03705719  0.02709140
## friends           -0.032796056  0.01035598  0.13050858 -0.01684908 -0.17780082
## no_idea           -0.189122973 -0.15537733  0.06202983 -0.13260813 -0.42098928
## family             0.013953380  0.02969020  0.08268655  0.17627173 -0.11433691
## student_life       0.002563345  0.01451251  0.07408213  0.11627585 -0.03642876
## academic_title     0.187793060  0.20229699  0.30321732  0.18749622 -0.06199301
## WU_reputation      0.259231578  0.39651851  0.23495616  0.17397241  0.06511413
##                   broader_horizon specializations further_education     friends
## job_market             0.14770260      0.15647793        0.00141882 -0.03279606
## job_areas              0.15639767      0.34510943       -0.03433017  0.01035598
## income                 0.04357249      0.21255949        0.06735935  0.13050858
## requirement            0.06833456      0.11011069       -0.03705719 -0.01684908
## interest               0.33236720      0.18369769        0.02709140 -0.17780082
## broader_horizon        1.00000000      0.19778885        0.05867910 -0.06653369
## specializations        0.19778885      1.00000000        0.09747622  0.09722971
## further_education      0.05867910      0.09747622        1.00000000  0.05526779
## friends               -0.06653369      0.09722971        0.05526779  1.00000000
## no_idea               -0.24173968     -0.14826377       -0.05736411  0.20441328
## family                -0.08354939      0.01279697       -0.05694739  0.09448856
## student_life           0.09707589      0.06030416       -0.10569963  0.18483861
## academic_title         0.01364916      0.13018098       -0.07528844  0.06244046
## WU_reputation          0.08664678      0.35531435        0.02668760  0.10120262
##                       no_idea      family student_life academic_title
## job_market        -0.18912297  0.01395338  0.002563345     0.18779306
## job_areas         -0.15537733  0.02969020  0.014512506     0.20229699
## income             0.06202983  0.08268655  0.074082132     0.30321732
## requirement       -0.13260813  0.17627173  0.116275845     0.18749622
## interest          -0.42098928 -0.11433691 -0.036428757    -0.06199301
## broader_horizon   -0.24173968 -0.08354939  0.097075893     0.01364916
## specializations   -0.14826377  0.01279697  0.060304157     0.13018098
## further_education -0.05736411 -0.05694739 -0.105699629    -0.07528844
## friends            0.20441328  0.09448856  0.184838611     0.06244046
## no_idea            1.00000000  0.12804234  0.187318684     0.02177788
## family             0.12804234  1.00000000  0.178383376     0.38990326
## student_life       0.18731868  0.17838338  1.000000000     0.10549284
## academic_title     0.02177788  0.38990326  0.105492839     1.00000000
## WU_reputation     -0.03074069  0.11528328  0.116886057     0.28061359
##                   WU_reputation
## job_market           0.25923158
## job_areas            0.39651851
## income               0.23495616
## requirement          0.17397241
## interest             0.06511413
## broader_horizon      0.08664678
## specializations      0.35531435
## further_education    0.02668760
## friends              0.10120262
## no_idea             -0.03074069
## family               0.11528328
## student_life         0.11688606
## academic_title       0.28061359
## WU_reputation        1.00000000

This correlation table provides insights into the relationships between various variables in the dataset. The values in the table range from -1 to 1, where:

1 indicates a perfect positive correlation: As one variable increases, the other also increases proportionally. -1 indicates a perfect negative correlation: As one variable increases, the other decreases proportionally. 0 indicates no correlation: The variables are independent of each other. Here’s a breakdown of the interpretation:

Positive Correlations:

The variables “job_areas” and “WU_reputation” exhibit a relatively strong positive correlation of 0.3965. This implies that students valuing a diverse range of job areas are also likely to value the university’s reputation.

Another positive correlation of 0.2592 is observed between “job_market” and “WU_reputation,” suggesting that individuals motivated by high job market opportunities also tend to value the university’s reputation.

Negative Correlations:

Notably, “interest” and “no_idea” display a significant negative correlation of -0.4209. This suggests that students strongly interested in Business and Administration are less likely to respond with “no idea” as a motive.

Other Interesting Correlations:

“Interest” and “broader_horizon” show a positive correlation of 0.3324, indicating that those interested in Business and Administration are also likely to have a broader interest in learning new things.

A positive correlation of 0.3451 is observed between “specializations” and “job_areas,” suggesting that students valuing diverse job areas are also likely to appreciate diversity in specialization possibilities.

Weak Correlations:

Several variables exhibit weak correlations, indicating relative independence.

Important to remember:

It’s crucial to emphasize that correlation does not imply causation. While the correlation values provide insights into variable relationships, they do not establish a cause-and-effect relationship. For example, a positive correlation does not necessarily imply that one variable causes the other to change.

Preparing the data for clustering

# Selecting relevant columns for clustering
columns_for_clustering <- Motivation[, c("job_market", "job_areas", "income", "requirement", "interest", 
                                        "broader_horizon", "specializations", "further_education", 
                                        "friends", "no_idea", "family", "student_life", 
                                        "academic_title", "WU_reputation")]

# Standardizing the data (important for k-means)
scaled_data <- scale(columns_for_clustering)

Determining the optimal number of clusters using Elbow method or Silhouette method

From the plot, we can identify the ‘elbow’ point where the decrease in WSS slows down. From that, we can see that the ideal number of clusters is 4.

Performing k-means clustering with the determined number of clusters

# Choose the optimal number of clusters (replace 'k' with the chosen number)
k <- 4

# Perform k-means clustering
kmeans_result <- kmeans(scaled_data, centers = k, nstart = 25)

# Add the cluster assignment to your original dataset
Motivation$cluster <- as.factor(kmeans_result$cluster)

# Extract the coordinates of the cluster centers
cluster_centers <- kmeans_result$centers

# Print the cluster centers
print(cluster_centers)
##   job_market  job_areas     income requirement   interest broader_horizon
## 1  0.4652318  0.3684486 -0.1713572  0.31817653  1.2390525     0.852597029
## 2  0.7082947  0.7608538  1.0245274  0.31848900 -0.2259343     0.003551109
## 3 -0.4195476 -0.4898367 -0.4221918 -0.41412201 -0.2401925    -0.255887691
## 4 -0.4984379 -0.3806587 -0.3650127  0.02522093 -0.3226476    -0.261669282
##   specializations further_education    friends    no_idea     family
## 1       0.4181543         0.1210440 -0.5503061 -1.5098520 -0.5098258
## 2       0.5082697         0.1315514  0.3366540  0.2803297  0.3826915
## 3      -0.3962963         0.4028866 -0.1111568  0.3667372 -0.3802831
## 4      -0.2807600        -0.8224256  0.2061931  0.2792791  0.5204004
##   student_life academic_title WU_reputation
## 1   -0.3443948     -0.2042589    0.18901139
## 2    0.2155850      0.6789638    0.69943229
## 3   -0.2977632     -0.7272117   -0.57960042
## 4    0.4579315      0.4806511   -0.04949444
Cluster 1:
Key Characteristics::

High job_market: Indicates a strong inclination towards choosing WU Vienna University of Economics and Business due to favorable job market opportunities. High job_areas: Demonstrates a keen interest in a diverse range of potential job areas. High income: Signifies a significant preference for higher income as a motivating factor. Positive requirement: Suggests that considerations for future job requirements are important. Slightly negative interest: Implies a comparatively lower interest in Business and Administration. Positive but close to zero broader_horizon: Indicates a neutral stance on learning something new or broadening horizons.

Cluster 2:
Balanced or Neutral Factors:

All factors hover around zero, reflecting a neutral stance on various motives for studying at WU Vienna University of Economics and Business. This cluster lacks distinct preferences or aversions to any specific motive.

Cluster 3:
Positive Factors with a Focus on Personal Development:

Positive job_market: Shows a preference for studying at WU due to favorable job market opportunities. Positive job_areas: Suggests an interest in a variety of potential job areas. Negative income: Indicates a lower preference for higher income as a motive. Positive requirement: Highlights the importance given to future job requirements. Very high interest: Points to a strong interest in Business and Administration. Very high broader_horizon: Indicates a strong interest in learning something new or broadening horizons. Positive specializations: Reflects a preference for diversity in specialization possibilities.

Cluster 4:
Mixed Preferences:

Slightly negative job_market: Indicates a relatively lower preference for high job market opportunities. Negative job_areas: Points to a lower interest in a variety of potential job areas. Negative income: Suggests a lower preference for higher income. Positive requirement: Indicates that considerations for future job requirements are important. Slightly negative interest: Suggests a relatively lower interest in Business and Administration. Negative broader_horizon: Indicates a lower interest in learning something new or broadening horizons.