title: “Bachelor Thesis” author: “Anna Themistokleous” date: “2024-05-23” output: html_document: default pdf_document: default —

library(readr)
motivation <- read_csv("~/Downloads/motivation.csv")

## Rows: 14 Columns: 30
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (6): Gender, Afdeling_coschap, Previous_Ed, Failed, relevance_coschap, ...
## dbl (24): User_ID, Intrinsic Motivation_Med, Identified Regulation_Med, Intr...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ purrr     1.0.2
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

# Load necessary libraries
library(dplyr)
library(tidyr)
library(ggplot2)

# Check the column names
names(motivation)

##  [1] "User_ID"                    "Intrinsic Motivation_Med"  
##  [3] "Identified Regulation_Med"  "Introjected Regulation_Med"
##  [5] "External Regulation_Med"    "Amotivation_Med"           
##  [7] "AM_Med"                     "CM_Med"                    
##  [9] "RAM_Med"                    "Intrinsic Motivation_KNO"  
## [11] "Identified Regulation KNO"  "Introjected Regulation_KNO"
## [13] "External Regulation_KNO"    "Amotivation_KNO"           
## [15] "AM_KNO"                     "CM_KNO"                    
## [17] "RAM_KNO"                    "Level_Completed"           
## [19] "Questions_Attemped"         "Gender"                    
## [21] "Block"                      "Afdeling_coschap"          
## [23] "Previous_Ed"                "Failed"                    
## [25] "relevance_coschap"          "relevance_career"          
## [27] "Both"                       "Questionaire_only"         
## [29] "Questions_only"             "None"

# Summarize the data to count the number of 1s in each category
counts <- motivation %>%
  summarise(
    Questionnaire = sum(Questionaire_only == 1, na.rm = TRUE),
    Both = sum(Both == 1, na.rm = TRUE),
    Questions = sum(Questions_only == 1, na.rm = TRUE),
    None = sum(None == 1, na.rm = TRUE)
  )

# Convert the summary to a long format for ggplot2
counts_long <- counts %>%
  pivot_longer(cols = everything(), names_to = "Category", values_to = "Count")

# Create a custom color palette
custom_colors <- c("Questionnaire" = "#FFB6C1",   # Baby Pink
                   "Questions" = "#89CFF0",       # Baby Blue
                   "None" = "#98FB98",                 # Baby Green
                   "Both" = "#FFFFE0")                 # Light Yellow

# Create the bar graph with custom colors
ggplot(counts_long, aes(x = Category, y = Count, fill = Category)) +
  geom_bar(stat = "identity", position = position_dodge(width = 10)) +
  scale_fill_manual(values = custom_colors) +  # Apply custom colors
  labs(title = "Participant Response Distribution", x = "Category", y = "Count") +
  theme_minimal()

# Load necessary libraries
library(ggplot2)

# Create the histogram
ggplot(motivation, aes(x = Gender, fill = Gender)) +
  geom_bar() +
  labs(title = "Number of Women and Men", x = "Gender", y = "Count") +
  scale_fill_manual(values = c("vrouw" = "pink", "man" = "#89CFF0")) +  # Custom colors for genders
  theme_minimal() +
  scale_x_discrete(labels = c("vrouw" = "Female", "man" = "Male"))

library(scales)

## 
## Attaching package: 'scales'

## The following object is masked from 'package:purrr':
## 
##     discard

## The following object is masked from 'package:readr':
## 
##     col_factor

ggplot(motivation, aes(x = RAM_Med, y = RAM_KNO, color = as.factor(row.names(motivation)))) +
  geom_point(size = 5) +
  scale_color_discrete() +
  labs(title = "Scatterplot of RAM Medicine vs RAM KNO",
       x = "RAM Medicine",
       y = "RAM KNO") +
  xlim(-5, 15) + 
  ylim(-5, 15) +  
  theme_minimal() +
   theme(legend.position = "none")

# Check normality for RAM_Med
shapiro.test(motivation$RAM_Med)

## 
##  Shapiro-Wilk normality test
## 
## data:  motivation$RAM_Med
## W = 0.94022, p-value = 0.5842

# Check normality for RAM_KNO
shapiro.test(motivation$RAM_KNO)

## 
##  Shapiro-Wilk normality test
## 
## data:  motivation$RAM_KNO
## W = 0.89273, p-value = 0.2481

Answer: The results of the Shapiro-Wilk normality tests indicate that both RAM_Med and RAM_KNO are not significantly different from a normal distribution, as the p-values are greater than 0.05. This suggests that you can use Pearson’s correlation coefficient to assess the correlation between RAM_Med and RAM_KNO

motivation_clean <- na.omit(motivation)

# Calculate Pearson's correlation
correlation_pearson_clean <- cor(motivation_clean$RAM_Med, motivation_clean$RAM_KNO, method = "pearson")
print(correlation_pearson_clean)

## [1] -0.0809967

# Perform a significance test for Pearson's correlation
test_pearson_clean <- cor.test(motivation_clean$RAM_Med, motivation_clean$RAM_KNO, method = "pearson")
print(test_pearson_clean)

## 
##  Pearson's product-moment correlation
## 
## data:  motivation_clean$RAM_Med and motivation_clean$RAM_KNO
## t = -0.19905, df = 6, p-value = 0.8488
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.7432479  0.6614280
## sample estimates:
##        cor 
## -0.0809967

correlation coefficient: The Pearson correlation coefficient is approximately -0.081. This value is very close to 0, indicating a very weak negative linear relationship between RAM_Med and RAM_KNO

significance test: - The t-value is -0.19905. - The degrees of freedom (df) are 6. - The p-value is 0.8488. - The p-value of 0.8488 is much greater than the common significance level of 0.05. This indicates that the correlation is not statistically significant.

There is no significant correlation/linear relationship between RAM_Med and RAM_KNO

motivation$Group <- ifelse(motivation$Afdeling_coschap == "KNO", "KNO", "non-KNO")


ggplot(motivation, aes(x = Group)) +
  geom_bar(fill = c("yellow", "pink")) +
  labs(x = "Group", y = "Count", title = "Distribution of KNO and non-KNO") +
   theme_minimal()

motivation$Group <- ifelse(motivation$Afdeling_coschap == "KNO", "KNO", "non-KNO")

ggplot(data= motivation, aes(x = Group, fill=relevance_coschap)) +
  geom_bar(position='fill') +
  labs(x = "Group", y = "Proportion", fill= 'relevance coschap', title='Distribution of relevance by afdeling coschap') +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

install.packages("ggmosaic", repos = "https://cloud.r-project.org/")

## 
## The downloaded binary packages are in
##  /var/folders/3g/ln4t2cvs3250dv7p12p13c_c0000gn/T//RtmpyDG1vC/downloaded_packages

library(ggmosaic)
library(ggplot2)

# Assuming your data frame is named motivation
motivation$Afdeling_coschap <- as.factor(motivation$Afdeling_coschap)
motivation$relevance_coschap <- as.factor(motivation$relevance_coschap)

ggplot(data = motivation) +
  geom_mosaic(aes(x = product(Afdeling_coschap), fill = relevance_coschap), na.rm = TRUE) +
  labs(x = "Afdeling_coschap", fill = "Relevance_coschap") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(data = motivation, aes(x = Afdeling_coschap, fill = relevance_coschap)) +
  geom_bar(position = "fill") +
  labs(y = "Proportion", x = "Afdeling_coschap", fill = "Relevance_coschap") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(data = motivation, aes(x = Afdeling_coschap, fill = relevance_coschap)) +
  geom_bar() +
  labs(y = "Count", x = "Afdeling_coschap", fill = "Relevance_coschap") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

library(ggplot2)

# Exclude rows with NA values in Afdeling_coschap or relevance_coschap
motivation_clean <- na.omit(motivation[, c("Afdeling_coschap", "relevance_coschap")])

# Create the plot
ggplot(data = motivation_clean, aes(x = Afdeling_coschap, fill = relevance_coschap)) +
  geom_bar(position = "fill") +
  labs(y = "Proportion", x = "Afdeling_coschap", fill = "Relevance_coschap", title='Distribution of relevance by afdeling coschap') +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(data = motivation_clean, aes(x = Afdeling_coschap, fill = relevance_coschap)) +
  geom_bar() +
  labs(y = "Count", x = "Afdeling_coschap", fill = "Relevance_coschap", title='Distribution of relevance by afdeling coschap') +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(data = motivation, aes(x = relevance_coschap, y = RAM_KNO)) +
  geom_boxplot() +
  labs(
    title = "Effect of Relevance Coschap on Motivation to Study KNO",
    x = "Relevance Coschap",
    y = "Motivation to Study KNO (RAM)"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(data = motivation, aes(x = relevance_coschap, y = RAM_KNO)) +
  geom_violin() +
  labs(
    title = "Effect of Relevance Coschap on Motivation to Study KNO",
    x = "Relevance Coschap",
    y = "Motivation to Study KNO (RAM)"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(data = motivation, aes(x = relevance_coschap, y = RAM_KNO)) +
  geom_violin(trim = FALSE) +  # Violin plot without trimming the tails
  geom_boxplot(width = 0.1, fill = "white") +  # Box plot inside the violin plot
  labs(
    title = "Effect of Relevance Coschap on Motivation to Study KNO",
    x = "Relevance Coschap",
    y = "Motivation to Study KNO (RAM)"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

library(ggplot2)
motivation$relevance_coschap <- as.factor(motivation$relevance_coschap)
# Box plot for AM_KNO
p1 <- ggplot(data = motivation, aes(x = relevance_coschap, y = AM_KNO)) +
  geom_boxplot() +
  labs(
    title = "Effect of Relevance Coschap on AM_KNO",
    x = "Relevance Coschap",
    y = "Autonomous Motivation for KNO"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
   ylim(0,7)

# Box plot for CM_KNO
p2 <- ggplot(data = motivation, aes(x = relevance_coschap, y = CM_KNO)) +
  geom_boxplot() +
  labs(
    title = "Effect of Relevance Coschap on CM_KNO",
    x = "Relevance Coschap",
    y = "Controlled Motivation for KNO"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  ylim(0,7)

# Display plots side by side
install.packages("gridExtra", repos = "https://cloud.r-project.org/")

## 
## The downloaded binary packages are in
##  /var/folders/3g/ln4t2cvs3250dv7p12p13c_c0000gn/T//RtmpyDG1vC/downloaded_packages

library(gridExtra)

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

grid.arrange(p1, p2, ncol = 2)

library(ggplot2)
motivation$relevance_career <- as.factor(motivation$relevance_career)
# Box plot for AM_KNO
p1 <- ggplot(data = motivation, aes(x = relevance_career, y = AM_KNO)) +
  geom_boxplot() +
  labs(
    title = "Effect of Relevance Career on AM_KNO",
    x = "Relevance Career",
    y = "Autonomous Motivation for KNO"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
   ylim(0,7)

# Box plot for CM_KNO
p2 <- ggplot(data = motivation, aes(x = relevance_career, y = CM_KNO)) +
  geom_boxplot() +
  labs(
    title = "Effect of Relevance Career on CM_KNO",
    x = "Relevance Career",
    y = "Controlled Motivation for KNO"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  ylim(0,7)

# Display plots side by side
library(gridExtra)
grid.arrange(p1, p2, ncol = 2)

install.packages("ggbeeswarm", repos = "https://cloud.r-project.org/")

## 
## The downloaded binary packages are in
##  /var/folders/3g/ln4t2cvs3250dv7p12p13c_c0000gn/T//RtmpyDG1vC/downloaded_packages

library(ggbeeswarm)
library(ggplot2)
library(gridExtra)

motivation$RowID <- as.factor(seq_len(nrow(motivation)))
motivation$relevance_coschap <- as.factor(motivation$relevance_coschap)
motivation$relevance_career <- as.factor(motivation$relevance_career)

# Bee swarm plot for AM_KNO with relevance_coschap
p1 <- ggplot(data = motivation, aes(x = relevance_coschap, y = AM_KNO, color = RowID)) +
  geom_quasirandom(width = 0.2) +
  labs(
    title = "Effect of Relevance Coschap on AM_KNO",
    x = "Relevance Coschap",
    y = "Autonomous Motivation for KNO",
    color = "User ID"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  ylim(0, 7)

# Bee swarm plot for CM_KNO with relevance_coschap
p2 <- ggplot(data = motivation, aes(x = relevance_coschap, y = CM_KNO, color = RowID)) +
  geom_quasirandom(width = 0.2) +
  labs(
    title = "Effect of Relevance Coschap on CM_KNO",
    x = "Relevance Coschap",
    y = "Controlled Motivation for KNO",
    color = "User ID"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  ylim(0, 7)

# Display plots side by side
grid.arrange(p1, p2, ncol = 2)

# Bee swarm plot for AM_KNO with relevance_career
p3 <- ggplot(data = motivation, aes(x = relevance_career, y = AM_KNO, color = RowID)) +
  geom_quasirandom(width = 0.2) +
  labs(
    title = "Effect of Relevance Career on AM_KNO",
    x = "Relevance Career",
    y = "Autonomous Motivation for KNO",
    color = "User ID"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  ylim(0, 7)

# Bee swarm plot for CM_KNO with relevance_career
p4 <- ggplot(data = motivation, aes(x = relevance_career, y = CM_KNO, color = RowID)) +
  geom_quasirandom(width = 0.2) +
  labs(
    title = "Effect of Relevance Career on CM_KNO",
    x = "Relevance Career",
    y = "Controlled Motivation for KNO",
    color = "User ID"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  ylim(0, 7)

# Display plots side by side
grid.arrange(p3, p4, ncol = 2)

library(ggbeeswarm)
library(ggplot2)
library(gridExtra)

motivation$RowID <- as.factor(seq_len(nrow(motivation)))
motivation$relevance_coschap <- as.factor(motivation$relevance_coschap)
motivation$relevance_career <- as.factor(motivation$relevance_career)

# Bee swarm plot for RAM_Med with relevance_coschap
p1 <- ggplot(data = motivation, aes(x = relevance_coschap, y = RAM_Med, color = RowID)) +
  geom_quasirandom(width = 0.2) +
  labs(
    title = "Effect of Relevance Coschap on RAM_Med",
    x = "Relevance Coschap",
    y = "RAM Med",
    color = "User ID"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) 
  

# Bee swarm plot for RAM_KNO with relevance_coschap
p2 <- ggplot(data = motivation, aes(x = relevance_coschap, y = RAM_KNO, color = RowID)) +
  geom_quasirandom(width = 0.2) +
  labs(
    title = "Effect of Relevance Coschap on RAM KNO",
    x = "Relevance Coschap",
    y = "RAM for KNO",
    color = "User ID"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) 

# Display plots side by side
grid.arrange(p1, p2, ncol = 2)

## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`position_quasirandom()`).

## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`position_quasirandom()`).

# Bee swarm plot for RAM_Med with relevance_career
p3 <- ggplot(data = motivation, aes(x = relevance_career, y = RAM_Med, color = RowID)) +
  geom_quasirandom(width = 0.2) +
  labs(
    title = "Effect of Relevance Career on RAM Med",
    x = "Relevance Career",
    y = "RAM for Med",
    color = "User ID"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Bee swarm plot for RAM_KNO with relevance_career
p4 <- ggplot(data = motivation, aes(x = relevance_career, y = RAM_KNO, color = RowID)) +
  geom_quasirandom(width = 0.2) +
  labs(
    title = "Effect of Relevance Career on CM_KNO",
    x = "Relevance Career",
    y = "RAM for KNO",
    color = "User ID"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) 

# Display plots side by side
grid.arrange(p3, p4, ncol = 2)

## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`position_quasirandom()`).
## Removed 6 rows containing missing values or values outside the scale range
## (`position_quasirandom()`).

library(ggbeeswarm)
library(ggplot2)
library(gridExtra)

motivation$RowID <- as.factor(seq_len(nrow(motivation)))
motivation$relevance_coschap <- as.factor(motivation$relevance_coschap)
motivation$relevance_career <- as.factor(motivation$relevance_career)

# Bee swarm plot for AM_KNO with relevance_coschap
p1 <- ggplot(data = motivation, aes(x = relevance_coschap, y = Questions_Attemped, color = RowID)) +
  geom_quasirandom(width = 0.2) +
  labs(
    title = "Effect of Relevance Coschap App Engagement",
    x = "Relevance Coschap",
    y = "Questions Attempted",
    color = "User ID"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) 

# Bee swarm plot for CM_KNO with relevance_coschap
p2 <- ggplot(data = motivation, aes(x = relevance_coschap, y = Level_Completed, color = RowID)) +
  geom_quasirandom(width = 0.2) +
  labs(
    title = "Effect of Relevance Coschap on App Engagement",
    x = "Relevance Coschap",
    y = "Levels Completed",
    color = "User ID"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Display plots side by side
grid.arrange(p1, p2, ncol = 2)

# Bee swarm plot for AM_KNO with relevance_career
p3 <- ggplot(data = motivation, aes(x = relevance_career, y = Questions_Attemped, color = RowID)) +
  geom_quasirandom(width = 0.2) +
  labs(
    title = "Effect of Relevance Career on App Engagement",
    x = "Relevance Career",
    y = "Questions Attempted",
    color = "User ID"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) 

# Bee swarm plot for CM_KNO with relevance_career
p4 <- ggplot(data = motivation, aes(x = relevance_career, y = Level_Completed, color = RowID)) +
  geom_quasirandom(width = 0.2) +
  labs(
    title = "Effect of Relevance Career on App Engagement",
    x = "Relevance Career",
    y = "Levels Completed",
    color = "User ID"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) 

# Display plots side by side
grid.arrange(p3, p4, ncol = 2)

# Install the package if you haven't already
install.packages("forcats", repos = "https://cloud.r-project.org/")

## 
## The downloaded binary packages are in
##  /var/folders/3g/ln4t2cvs3250dv7p12p13c_c0000gn/T//RtmpyDG1vC/downloaded_packages

# Load the necessary libraries
library(dplyr)
library(forcats)

motivation_cool <- na.omit(motivation[, c("relevance_coschap", "RAM_Med", "RAM_KNO", "relevance_career","Questions_Attemped", "Level_Completed")])

# Simplify categorical variables if necessary
motivation_cool <- motivation_cool %>%
  mutate(relevance_coschap = fct_collapse(relevance_coschap,
                                          "No" = c("Niet", "Waarschijnlijk niet"),
                                          "Yes" = c("Waarschijnlijk wel", "Zeker wel")),
         relevance_career = fct_collapse(relevance_career,
                                         "No" = c("Niet", "Waarschijnlijk niet"),
                                         "Yes" = c("Waarschijnlijk wel", "Zeker wel")))

# Check the distribution after combining
table(motivation_cool$relevance_coschap)

## 
## Geen idee/neutraal                 No                Yes 
##                  1                  4                  3

table(motivation_cool$relevance_career)

## 
## Geen idee/neutraal                 No                Yes 
##                  2                  3                  3

# Simplified regression model for Level_Completed
model_level <- lm(Level_Completed ~ RAM_Med + RAM_KNO + relevance_coschap + relevance_career, data = motivation_cool)
summary(model_level)

## 
## Call:
## lm(formula = Level_Completed ~ RAM_Med + RAM_KNO + relevance_coschap + 
##     relevance_career, data = motivation_cool)
## 
## Residuals:
##          1          2          3          4          5          6          7 
## -2.031e+00  3.771e-01 -4.577e-01 -2.031e+00  2.489e+00  1.636e-15  2.031e+00 
##          8 
## -3.771e-01 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)
## (Intercept)          -15.8854    14.9195  -1.065    0.480
## RAM_Med                0.8007     0.9092   0.881    0.540
## RAM_KNO                0.8273     0.7466   1.108    0.467
## relevance_coschapNo    7.6523     6.4116   1.194    0.444
## relevance_coschapYes   0.9407     6.3178   0.149    0.906
## relevance_careerNo    11.6306     8.7332   1.332    0.410
## relevance_careerYes    3.6319     6.8596   0.529    0.690
## 
## Residual standard error: 4.366 on 1 degrees of freedom
## Multiple R-squared:  0.7879, Adjusted R-squared:  -0.4849 
## F-statistic: 0.619 on 6 and 1 DF,  p-value: 0.7492

# Load necessary libraries
library(ggplot2)
library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

## The following object is masked from 'package:purrr':
## 
##     some

library(coefplot)

# Fit the multiple regression model for Level_Completed
model_level <- lm(Level_Completed ~ RAM_Med + RAM_KNO + relevance_coschap + relevance_career, data = motivation_cool)

# Load necessary libraries
# Set the CRAN mirror
options(repos = c(CRAN = "https://cloud.r-project.org/"))

# Install the 'car' package
install.packages("car")

## 
## The downloaded binary packages are in
##  /var/folders/3g/ln4t2cvs3250dv7p12p13c_c0000gn/T//RtmpyDG1vC/downloaded_packages

install.packages("car")

## 
## The downloaded binary packages are in
##  /var/folders/3g/ln4t2cvs3250dv7p12p13c_c0000gn/T//RtmpyDG1vC/downloaded_packages

install.packages("coefplot")

## 
## The downloaded binary packages are in
##  /var/folders/3g/ln4t2cvs3250dv7p12p13c_c0000gn/T//RtmpyDG1vC/downloaded_packages

library(ggplot2)
library(car)
library(coefplot)

# Diagnostic plots
par(mfrow = c(2, 2))
plot(model_level)

# Partial regression plots
avPlots(model_level)

# Coefficient plot
coefplot(model_level, intercept = FALSE)

Coefficients: - Intercept:The intercept is -15.8854. It represents the estimated value of Level_Completed when all predictor variables are zero. - RAM_Med: For a one-unit increase in RAM_Med, Level_Completed is estimated to increase by 0.8007 units. - RAM_KNO: For a one-unit increase in RAM_KNO, Level_Completed is estimated to increase by 0.8273 units. - relevance_coschapNo: Compared to when relevance_coschap is “Yes”, when it is “No”, Level_Completed is estimated to increase by 7.6523 units. relevance_coschapYes: This coefficient is not statistically significant (p-value > 0.05), so we cannot conclude that there is a significant effect of relevance_coschap being “Yes” on Level_Completed. - relevance_careerNo: Compared to when relevance_career is “Yes”, when it is “No”, Level_Completed is estimated to increase by 11.6306 units. relevance_careerYes: This coefficient is not statistically significant (p-value > 0.05), so we cannot conclude that there is a significant effect of relevance_career being “Yes” on Level_Completed.

Multiple R squared: 0.7879, indicating that approximately 78.79% of the variance in Level_Completed is explained by the predictors.

Adjusted R squared: This is the R-squared value adjusted for the number of predictors in the model. It penalizes the addition of unnecessary predictors. In this case, it is -0.4849, which suggests that the model may be overfitting or that some predictors are not contributing meaningfully to explaining the variance in Level_Completed.

F statistic and p-value: This tests whether the overall regression model is statistically significant. In this case, the F-statistic is 0.619 with a p-value of 0.7492, suggesting that the overall model is not statistically significant at the conventional significance level of 0.05.

Partial Regression Plot: Linearity Assumption: the relationship between each predictor variable and the response variable is adequately represented by a straight-line model, even after accounting for the effects of other predictors in the model

Model Validity: The linear relationship observed in the partial regression plots suggests that the model adequately fits the data.It indicates that the model captures the essential features of the relationship between the predictors and the response variable.

Interpredability: The straight-line pattern enhances the interpretability of the model. It implies that the effects of predictor variables on the response variable are consistent and linear, making it easier to understand and interpret the model coefficients.

Predictive Power: The linear relationship observed in the partial regression plots implies that the model’s predictive capabilities are reliable. It suggests that the model can accurately predict the response variable based on the values of the predictor variables, within the range of observed data.

Coefficient Plot: Magntiude: Higher values suggest a stronger effect of the predictor variable on the response variable. Confidence Intervals: if the confidence interval includes zero, it indicates that the coefficient estimate is not statistically significant, and the predictor variable may not have a significant effect on the response variable.

Assessing the model: If the confidence intervals for certain coefficients are wide or include zero, it may indicate uncertainty in the estimation process or lack of precision in estimating the true population coefficients.

model_questions <- lm(Questions_Attemped ~ RAM_Med + RAM_KNO + relevance_coschap + relevance_career, data = motivation_cool)
summary(model_questions)

## 
## Call:
## lm(formula = Questions_Attemped ~ RAM_Med + RAM_KNO + relevance_coschap + 
##     relevance_career, data = motivation_cool)
## 
## Residuals:
##          1          2          3          4          5          6          7 
## -5.002e+01  9.285e+00 -1.127e+01 -5.002e+01  6.129e+01  2.862e-14  5.002e+01 
##          8 
## -9.285e+00 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)
## (Intercept)          -449.573    367.392  -1.224    0.436
## RAM_Med                25.063     22.389   1.119    0.464
## RAM_KNO                25.812     18.385   1.404    0.394
## relevance_coschapNo   189.489    157.885   1.200    0.442
## relevance_coschapYes    3.431    155.575   0.022    0.986
## relevance_careerNo    316.184    215.055   1.470    0.380
## relevance_careerYes    88.992    168.919   0.527    0.691
## 
## Residual standard error: 107.5 on 1 degrees of freedom
## Multiple R-squared:  0.8274, Adjusted R-squared:  -0.2085 
## F-statistic: 0.7987 on 6 and 1 DF,  p-value: 0.694

library(ggplot2)
library(car)
library(coefplot)

# Diagnostic plots
par(mfrow = c(2, 2))
plot(model_questions)

# Partial regression plots
avPlots(model_questions)

# Coefficient plot
coefplot(model_questions, intercept = FALSE)

Insufficient evidence to suggest that any predictor has an effect on Questions Attempted. Approximately 82.74% of the variance in Questions_Attempted is explained by the predictor variables in the model. The adjusted R squared is -0.2085, which is negative. This suggests that the model may be overfitting the data or that the predictors in the model do not improve the model’s fit compared to a model with no predictors.

# Fit the regression model
model_level <- lm(Level_Completed ~ RAM_Med + RAM_KNO + relevance_coschap + relevance_career, data = motivation_cool)

# Calculate VIF values
vif_values <- vif(model_level)

# Print the VIF values
print(vif_values)

##                        GVIF Df GVIF^(1/(2*Df))
## RAM_Med            3.643833  1        1.908883
## RAM_KNO            3.535269  1        1.880231
## relevance_coschap  5.229461  2        1.512217
## relevance_career  11.977425  2        1.860334

# Fit the regression model
model_questions<- lm(Questions_Attemped ~ RAM_Med + RAM_KNO + relevance_coschap + relevance_career, data = motivation_cool)

# Calculate VIF values
vif_values <- vif(model_questions)

# Print the VIF values
print(vif_values)

##                        GVIF Df GVIF^(1/(2*Df))
## RAM_Med            3.643833  1        1.908883
## RAM_KNO            3.535269  1        1.880231
## relevance_coschap  5.229461  2        1.512217
## relevance_career  11.977425  2        1.860334

Interpretation of VIF VIF = 1: No correlation between the predictor and other predictors. 1 < VIF < 5: Moderate correlation. VIF > 5 (or > 10, depending on the threshold you use): High correlation, indicating multicollinearity.

RAM_Med and RAM_KNO have moderate multicollinearity. The GVIF^(1/(2*Df)) values are approximately 1.9, suggesting that multicollinearity is not severe for these variables.

Relevance Coschap: moderate multicollinearity. The GVIF^(1/(2*Df)) value is around 1.51, suggesting some level of multicollinearity but not severe.

Relevance Career: strong multicollinearity. The GVIF^(1/(2*Df)) value is approximately 1.86, indicating that multicollinearity is present and should be addressed.

(Multicollinearity refers to a situation in which two or more predictor variables in a regression model are highly correlated with each other. )

# Load the necessary libraries
library(dplyr)
library(forcats)

motivation_cool <- na.omit(motivation[, c("relevance_coschap", "RAM_Med", "RAM_KNO", "relevance_career","Questions_Attemped", "Level_Completed")])

# Simplify categorical variables if necessary
motivation_cool <- motivation_cool %>%
  mutate(relevance_coschap = fct_collapse(relevance_coschap,
                                          "No" = c("Niet", "Waarschijnlijk niet"),
                                          "Yes" = c("Waarschijnlijk wel", "Zeker wel")),
         relevance_career = fct_collapse(relevance_career,
                                         "No" = c("Niet", "Waarschijnlijk niet"),
                                         "Yes" = c("Waarschijnlijk wel", "Zeker wel")))


# Simplified regression model for Level_Completed
model_level <- lm(Level_Completed ~ RAM_Med + RAM_KNO + relevance_coschap, data = motivation_cool)
summary(model_level)

## 
## Call:
## lm(formula = Level_Completed ~ RAM_Med + RAM_KNO + relevance_coschap, 
##     data = motivation_cool)
## 
## Residuals:
##          1          2          3          4          5          6          7 
##  6.896e-02  3.030e+00 -1.594e+00 -1.862e+00  1.525e+00 -5.471e-16  3.868e+00 
##          8 
## -5.036e+00 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)
## (Intercept)           0.83724    6.23916   0.134    0.902
## RAM_Med              -0.08303    0.54485  -0.152    0.889
## RAM_KNO               0.06654    0.46867   0.142    0.896
## relevance_coschapNo   4.84781    5.74113   0.844    0.460
## relevance_coschapYes  1.97732    5.83595   0.339    0.757
## 
## Residual standard error: 4.391 on 3 degrees of freedom
## Multiple R-squared:  0.3564, Adjusted R-squared:  -0.5017 
## F-statistic: 0.4153 on 4 and 3 DF,  p-value: 0.7923

# Simplified regression model for Questions_Attemped
model_questions <- lm(Questions_Attemped~ RAM_Med + RAM_KNO + relevance_coschap, data = motivation_cool)
summary(model_questions)

## 
## Call:
## lm(formula = Questions_Attemped ~ RAM_Med + RAM_KNO + relevance_coschap, 
##     data = motivation_cool)
## 
## Residuals:
##          1          2          3          4          5          6          7 
##  1.178e+01  8.431e+01 -4.783e+01 -4.804e+01  3.604e+01 -3.058e-14  9.915e+01 
##          8 
## -1.354e+02 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)
## (Intercept)            0.8236   166.7439   0.005    0.996
## RAM_Med                1.3062    14.5614   0.090    0.934
## RAM_KNO                4.3359    12.5254   0.346    0.752
## relevance_coschapNo  115.7817   153.4339   0.755    0.505
## relevance_coschapYes  27.8012   155.9680   0.178    0.870
## 
## Residual standard error: 117.4 on 3 degrees of freedom
## Multiple R-squared:  0.3831, Adjusted R-squared:  -0.4395 
## F-statistic: 0.4657 on 4 and 3 DF,  p-value: 0.763

# Simplified regression model for Level_Completed
model_level <- lm(Level_Completed ~ RAM_KNO, data = motivation_cool)
summary(model_level)

## 
## Call:
## lm(formula = Level_Completed ~ RAM_KNO, data = motivation_cool)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1855 -2.1506 -0.8909  0.5113  6.6307 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   2.7191     1.4879   1.828    0.117
## RAM_KNO       0.2915     0.3312   0.880    0.413
## 
## Residual standard error: 3.642 on 6 degrees of freedom
## Multiple R-squared:  0.1143, Adjusted R-squared:  -0.03328 
## F-statistic: 0.7746 on 1 and 6 DF,  p-value: 0.4127

# Simplified regression model for Questions_Attemped
model_questions <- lm(Questions_Attemped~ RAM_KNO, data = motivation_cool)
summary(model_questions)

## 
## Call:
## lm(formula = Questions_Attemped ~ RAM_KNO, data = motivation_cool)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -84.85 -50.57 -33.33  20.42 160.19 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   69.400     39.355   1.763    0.128
## RAM_KNO        9.656      8.761   1.102    0.313
## 
## Residual standard error: 96.34 on 6 degrees of freedom
## Multiple R-squared:  0.1683, Adjusted R-squared:  0.02974 
## F-statistic: 1.215 on 1 and 6 DF,  p-value: 0.3127

model_level <- lm(Level_Completed ~ RAM_Med + RAM_KNO, data = motivation_cool)
summary(model_level)

## 
## Call:
## lm(formula = Level_Completed ~ RAM_Med + RAM_KNO, data = motivation_cool)
## 
## Residuals:
##       1       2       3       4       5       6       7       8 
## -0.4057  4.3195 -1.7559 -1.9753 -0.5722 -2.2751  5.9072 -3.2424 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   4.4863     4.3815   1.024    0.353
## RAM_Med      -0.1857     0.4287  -0.433    0.683
## RAM_KNO       0.2790     0.3574   0.781    0.470
## 
## Residual standard error: 3.917 on 5 degrees of freedom
## Multiple R-squared:  0.1464, Adjusted R-squared:  -0.1951 
## F-statistic: 0.4287 on 2 and 5 DF,  p-value: 0.6732

model_questions <- lm(Questions_Attemped ~ RAM_Med + RAM_KNO, data = motivation_cool)
summary(model_questions)

## 
## Call:
## lm(formula = Questions_Attemped ~ RAM_Med + RAM_KNO, data = motivation_cool)
## 
## Residuals:
##      1      2      3      4      5      6      7      8 
## -10.59 127.72 -56.40 -50.40 -26.60 -48.53 150.42 -85.62 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   93.244    117.493   0.794    0.463
## RAM_Med       -2.506     11.496  -0.218    0.836
## RAM_KNO        9.486      9.584   0.990    0.368
## 
## Residual standard error: 105 on 5 degrees of freedom
## Multiple R-squared:  0.1762, Adjusted R-squared:  -0.1534 
## F-statistic: 0.5346 on 2 and 5 DF,  p-value: 0.616

# Regression for Level_Completed
model_relevance_level <- lm(Level_Completed ~ relevance_coschap + relevance_career, data = motivation_cool)
summary(model_relevance_level)

## 
## Call:
## lm(formula = Level_Completed ~ relevance_coschap + relevance_career, 
##     data = motivation_cool)
## 
## Residuals:
##          1          2          3          4          5          6          7 
## -1.429e+00  4.000e+00 -1.286e+00 -1.429e+00  2.714e+00  1.911e-15  1.429e+00 
##          8 
## -4.000e+00 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)
## (Intercept)            -3.571      5.899  -0.605    0.588
## relevance_coschapNo     7.571      5.190   1.459    0.241
## relevance_coschapYes    3.429      5.190   0.661    0.556
## relevance_careerNo      3.571      4.368   0.818    0.473
## relevance_careerYes     1.429      4.368   0.327    0.765
## 
## Residual standard error: 3.964 on 3 degrees of freedom
## Multiple R-squared:  0.4755, Adjusted R-squared:  -0.2239 
## F-statistic: 0.6798 on 4 and 3 DF,  p-value: 0.6508

# Regression for Questions_Attemped
model_relevance_questions <- lm(Questions_Attemped ~ relevance_coschap + relevance_career, data = motivation_cool)
summary(model_relevance_questions)

## 
## Call:
## lm(formula = Questions_Attemped ~ relevance_coschap + relevance_career, 
##     data = motivation_cool)
## 
## Residuals:
##          1          2          3          4          5          6          7 
## -3.129e+01  1.225e+02 -3.686e+01 -3.129e+01  6.814e+01  5.049e-14  3.129e+01 
##          8 
## -1.225e+02 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)
## (Intercept)            -64.21     169.56  -0.379    0.730
## relevance_coschapNo    186.71     149.19   1.252    0.299
## relevance_coschapYes    81.29     149.19   0.545    0.624
## relevance_careerNo      64.21     125.56   0.511    0.644
## relevance_careerYes     19.79     125.56   0.158    0.885
## 
## Residual standard error: 113.9 on 3 degrees of freedom
## Multiple R-squared:  0.4183, Adjusted R-squared:  -0.3572 
## F-statistic: 0.5394 on 4 and 3 DF,  p-value: 0.722

# Regression for RAM_Med
model_ram_med <- lm(RAM_Med ~ relevance_coschap + relevance_career, data = motivation_cool)
summary(model_ram_med)

## 
## Call:
## lm(formula = RAM_Med ~ relevance_coschap + relevance_career, 
##     data = motivation_cool)
## 
## Residuals:
##          1          2          3          4          5          6          7 
## -8.857e-01  2.200e+00  3.143e+00 -8.857e-01 -2.257e+00  1.838e-15  8.857e-01 
##          8 
## -2.200e+00 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)  
## (Intercept)            14.486      4.465   3.245   0.0477 *
## relevance_coschapNo    -3.186      3.928  -0.811   0.4768  
## relevance_coschapYes    2.886      3.928   0.735   0.5158  
## relevance_careerNo     -6.486      3.306  -1.962   0.1446  
## relevance_careerYes    -5.814      3.306  -1.759   0.1769  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3 on 3 degrees of freedom
## Multiple R-squared:  0.6787, Adjusted R-squared:  0.2502 
## F-statistic: 1.584 on 4 and 3 DF,  p-value: 0.3676

# Regression for RAM_KNO
model_ram_kno <- lm(RAM_KNO ~ relevance_coschap + relevance_career, data = motivation_cool)
summary(model_ram_kno)

## 
## Call:
## lm(formula = RAM_KNO ~ relevance_coschap + relevance_career, 
##     data = motivation_cool)
## 
## Residuals:
##          1          2          3          4          5          6          7 
##  1.586e+00  2.250e+00 -4.043e+00  1.586e+00  2.457e+00 -1.979e-15 -1.586e+00 
##          8 
## -2.250e+00 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)
## (Intercept)            0.8643     5.4370   0.159    0.884
## relevance_coschapNo    2.9857     4.7839   0.624    0.577
## relevance_coschapYes   0.2143     4.7839   0.045    0.967
## relevance_careerNo    -3.4643     4.0263  -0.860    0.453
## relevance_careerYes    2.9643     4.0263   0.736    0.515
## 
## Residual standard error: 3.654 on 3 degrees of freedom
## Multiple R-squared:  0.6688, Adjusted R-squared:  0.2272 
## F-statistic: 1.514 on 4 and 3 DF,  p-value: 0.3818

not significant

# Load necessary libraries
library(ggplot2)

# Scatter plot for Level_Completed
ggplot(motivation, aes(x = Amotivation_KNO, y = Level_Completed,color=RowID)) +
  geom_point() +
  labs(x = "Amotivation for KNO", y = "Level Completed") +
  ggtitle("Scatter Plot of Amotivation for KNO vs. Level Completed")

# Scatter plot for Questions_Attempted
ggplot(motivation, aes(x = Amotivation_KNO, y = Questions_Attemped, color=RowID)) +
  geom_point() +
  labs(x = "Amotivation for KNO", y = "Questions Attempted") +
  ggtitle("Scatter Plot of Amotivation for KNO vs. Questions Attempted")

library(dplyr)


model <- lm(cbind(Questions_Attemped) ~ Amotivation_KNO, data = motivation)

# View the summary of the model
summary(model)

## 
## Call:
## lm(formula = cbind(Questions_Attemped) ~ Amotivation_KNO, data = motivation)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -88.44 -58.60 -41.58  62.15 150.92 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)
## (Intercept)       168.01      87.04   1.930    0.102
## Amotivation_KNO   -29.57      30.69  -0.963    0.373
## 
## Residual standard error: 98.31 on 6 degrees of freedom
##   (6 observations deleted due to missingness)
## Multiple R-squared:  0.134,  Adjusted R-squared:  -0.01036 
## F-statistic: 0.9282 on 1 and 6 DF,  p-value: 0.3725

there is insufficient evidence to support the hypothesis that the degree of amotivation_KNO influences the number of questions attempted.

```{r. more amotivation, warning=FALSE, error=FALSE} library(dplyr)

model <- lm(cbind(Level_Completed) ~ Amotivation_KNO, data = motivation)

View the summary of the model

summary(model)

 
 there is insufficient evidence to support the hypothesis that the degree of amotivation_KNO influences the level completed.


```r
# Replace 'YourUsername' with your actual username
setwd("~/Desktop/bachelor project")
# Verify the working directory
getwd()

## [1] "/Users/annathemistokleous/Desktop/bachelor project"

library(dplyr)

# Subset the dataset for female and male students
female_data <- filter(motivation, Gender == "vrouw")
male_data <- filter(motivation, Gender == "man")

# Conduct independent samples t-test for AM_Med (Intrinsic Motivation)
t_test_AM_Med <- t.test(female_data$AM_Med, male_data$AM_Med, alternative = "greater")

# Conduct independent samples t-test for CM_Med (Extrinsic Motivation)
t_test_CM_Med <- t.test(female_data$CM_Med, male_data$CM_Med, alternative = "less")

# Print the results
print(t_test_AM_Med)

## 
##  Welch Two Sample t-test
## 
## data:  female_data$AM_Med and male_data$AM_Med
## t = -2.2773, df = 6, p-value = 0.9685
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  -1.231099       Inf
## sample estimates:
## mean of x mean of y 
##  5.685714  6.350000

print(t_test_CM_Med)

## 
##  Welch Two Sample t-test
## 
## data:  female_data$CM_Med and male_data$CM_Med
## t = 0.37557, df = 1.2435, p-value = 0.6194
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##      -Inf 3.533005
## sample estimates:
## mean of x mean of y 
##  2.742857  2.475000

For the first test the p-value is greater than the significance level (typically 0.05), we fail to reject the null hypothesis. This suggests that there is insufficient evidence to conclude that the mean level of intrinsic motivation differs between female and male students. The confidence interval for the true difference in means (-1.231099 to Inf) also contains zero, further supporting this conclusion.

Additionally, the sample means are as follows:

Mean of AM_Med for female students: 5.685714 Mean of AM_Med for male students: 6.35 These results indicate that, on average, male students have slightly higher levels of intrinsic motivation compared to female students, but the difference is not statistically significant.

*for the second test** p-value is greater than the significance level (typically 0.05), we fail to reject the null hypothesis. This suggests that there is insufficient evidence to conclude that the mean level of extrinsic motivation differs between female and male students. The confidence interval for the true difference in means (-Inf to 3.533005) contains zero, further supporting this conclusion.

Additionally, the sample means are as follows:

Mean of CM_Med for female students: 2.742857 Mean of CM_Med for male students: 2.475 These results indicate that, on average, female students have slightly higher levels of extrinsic motivation compared to male students, but the difference is not statistically significant

# Subset the data for female and male students
female_data <- motivation[motivation$Gender == "vrouw", ]
male_data <- motivation[motivation$Gender == "man", ]

# Perform the independent samples t-test
t_test_result <- t.test(female_data$AM_Med, male_data$AM_Med, alternative = "two.sided")

# Display the results
t_test_result

## 
##  Welch Two Sample t-test
## 
## data:  female_data$AM_Med and male_data$AM_Med
## t = -2.2773, df = 6, p-value = 0.06303
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.3780337  0.0494623
## sample estimates:
## mean of x mean of y 
##  5.685714  6.350000

Since the p-value (0.06303) is greater than the typical significance level of 0.05, we fail to reject the null hypothesis. There is insufficient evidence to conclude that there is a statistically significant difference in intrinsic motivation between male and female students at the 0.05 significance level.

However, it’s worth noting that the p-value is relatively close to 0.05, indicating a potential trend towards significance.

null hypothesis: there is no significant difference in autonomous motivation between males and females.

Alternative Hypothesis: there is significant difference in autonomous motivation between males and females.

# Subset the data for female and male students
female_data <- motivation[motivation$Gender == "vrouw", ]
male_data <- motivation[motivation$Gender == "man", ]

# Perform the independent samples t-test
t_test_result <- t.test(female_data$AM_KNO, male_data$AM_KNO, alternative = "two.sided")

# Display the results
t_test_result

## 
##  Welch Two Sample t-test
## 
## data:  female_data$AM_KNO and male_data$AM_KNO
## t = -1.8235, df = 4.8717, p-value = 0.1294
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.8762098  0.3262098
## sample estimates:
## mean of x mean of y 
##     3.875     4.650

Since the p-value (0.1294) is greater than the typical significance level of 0.05, we fail to reject the null hypothesis. There is insufficient evidence to conclude that there is a statistically significant difference in amotivation between male and female students at the 0.05 significance level.

# Conduct independent samples t-test for AM_Med (Intrinsic Motivation)
t_test_AM_KNO <- t.test(female_data$AM_KNO, male_data$AM_KNO, alternative = "greater")

# Conduct independent samples t-test for CM_Med (Extrinsic Motivation)
t_test_CM_KNO <- t.test(female_data$CM_KNO, male_data$CM_KNO, alternative = "less")

# Print the results
print(t_test_AM_Med)

## 
##  Welch Two Sample t-test
## 
## data:  female_data$AM_Med and male_data$AM_Med
## t = -2.2773, df = 6, p-value = 0.9685
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  -1.231099       Inf
## sample estimates:
## mean of x mean of y 
##  5.685714  6.350000

print(t_test_CM_Med)

## 
##  Welch Two Sample t-test
## 
## data:  female_data$CM_Med and male_data$CM_Med
## t = 0.37557, df = 1.2435, p-value = 0.6194
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##      -Inf 3.533005
## sample estimates:
## mean of x mean of y 
##  2.742857  2.475000

WE FAIL TO REJECT THE NULL HYPOTHESIS

# Load the necessary libraries
library(dplyr)
library(forcats)

# Exclude rows with NA values in Afdeling_coschap or relevance_coschap
motivation_clean <- na.omit(motivation[, c("Afdeling_coschap", "relevance_coschap")])

# Simplify categorical variables if necessary
motivation_clean <- motivation_clean %>%
  mutate(relevance_coschap = fct_collapse(relevance_coschap,
                                          "No" = c("Waarschijnlijk niet"),
                                          "Yes" = c("Waarschijnlijk wel", "Zeker wel")),
         Afdeling_coschap = fct_collapse(Afdeling_coschap,
                                         "KNO" = c("KNO"),
                                         "NON-KNO" = c("Plastische Chirugie", "Chirurgie", "nog niet bekend", "Weet ik nog niet", "Radiotherapie", "IC")))

ggplot(data= motivation_clean, aes(x = Afdeling_coschap, fill=relevance_coschap)) +
  geom_bar(position='fill') +
  labs(x = "Afdeling_Coschap", y = "Proportion", fill= 'relevance coschap', title='Distribution of relevance by afdeling coschap') +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+scale_fill_manual(values = c("No" = "lightblue", "Yes" = "lightpink", "Geen idee/neutraal" = "lightgreen"))

library(ggplot2)
library(dplyr)
library(forcats)  # Ensure you load the forcats library for fct_collapse

# Exclude rows with NA values in Afdeling_coschap or relevance_coschap
motivation_clean <- na.omit(motivation[, c("Afdeling_coschap", "relevance_coschap")])

# Simplify categorical variables if necessary
motivation_clean <- motivation_clean %>%
  mutate(
    relevance_coschap = fct_collapse(relevance_coschap,
                                     "No" = c("Waarschijnlijk niet"),
                                     "Yes" = c("Waarschijnlijk wel", "Zeker wel")),
    Afdeling_coschap = fct_collapse(Afdeling_coschap,
                                    "Weet ik nog niet" = c("Weet ik nog niet", "nog niet bekend"))
  )

# Create the plot with proportions
ggplot(data = motivation_clean, aes(x = Afdeling_coschap, fill = relevance_coschap)) +
  geom_bar(position = "fill") +
  labs(y = "Proportion", x = "Afdeling_coschap", fill = "Relevance_coschap", title = 'Distribution of relevance by afdeling coschap') +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_fill_manual(values = c("No" = "lightblue", "Yes" = "lightpink", "Geen idee/neutraal" = "lightgreen"))

# Create the plot with counts
ggplot(data = motivation_clean, aes(x = Afdeling_coschap, fill = relevance_coschap)) +
  geom_bar() +
  labs(y = "Count", x = "Afdeling_coschap", fill = "Relevance_coschap", title = 'Distribution of relevance by afdeling coschap') +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_fill_manual(values = c("No" = "lightblue", "Yes" = "lightpink", "Geen idee/neutraal" = "lightgreen"))

library(ggbeeswarm)
library(ggplot2)
library(gridExtra)
library(dplyr)
library(forcats)

# Exclude rows with NA values in the specified columns
motivation_yolo <- na.omit(motivation[c("relevance_coschap", "RAM_Med", "RAM_KNO", "relevance_career", "Questions_Attemped", "Level_Completed", "AM_Med", "AM_KNO", "CM_Med", "CM_KNO", "User_ID")])

# Simplify categorical variables if necessary
motivation_yolo <- motivation_yolo %>%
  mutate(
    relevance_coschap = fct_collapse(relevance_coschap,
                                     "No" = c("Waarschijnlijk niet"),
                                     "Yes" = c("Waarschijnlijk wel", "Zeker wel")),
    relevance_career = fct_collapse(relevance_career,
                                    "No" = c("Waarschijnlijk niet"),
                                    "Yes" = c("Waarschijnlijk wel", "Zeker wel"))
  )

# Create a factor variable for User_ID
motivation_yolo$User_ID <- as.factor(seq_len(nrow(motivation_yolo)))

# Convert relevance_coschap and relevance_career to factors
motivation_yolo$relevance_coschap <- as.factor(motivation_yolo$relevance_coschap)
motivation_yolo$relevance_career <- as.factor(motivation_yolo$relevance_career)

# Bee swarm plot for AM_KNO with relevance_coschap
p1 <- ggplot(data = motivation_yolo, aes(x = relevance_coschap, y = AM_KNO, color = User_ID)) +
  geom_quasirandom(width = 0.2) +
  labs(
    title = "Effect of Relevance Coschap on AM_KNO",
    x = "Relevance Coschap",
    y = "Autonomous Motivation for KNO",
    color = "User ID"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  ylim(0, 7)

# Bee swarm plot for CM_KNO with relevance_coschap
p2 <- ggplot(data = motivation_yolo, aes(x = relevance_coschap, y = CM_KNO, color = User_ID)) +
  geom_quasirandom(width = 0.2) +
  labs(
    title = "Effect of Relevance Coschap on CM_KNO",
    x = "Relevance Coschap",
    y = "Controlled Motivation for KNO",
    color = "User ID"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  ylim(0, 7)

# Display plots side by side
grid.arrange(p1, p2, ncol = 2)

# Bee swarm plot for AM_KNO with relevance_career
p3 <- ggplot(data = motivation_yolo, aes(x = relevance_career, y = AM_KNO, color = User_ID)) +
  geom_quasirandom(width = 0.2) +
  labs(
    title = "Effect of Relevance Career on AM_KNO",
    x = "Relevance Career",
    y = "Autonomous Motivation for KNO",
    color = "User ID"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  ylim(0, 7)

# Bee swarm plot for CM_KNO with relevance_career
p4 <- ggplot(data = motivation_yolo, aes(x = relevance_career, y = CM_KNO, color = User_ID)) +
  geom_quasirandom(width = 0.2) +
  labs(
    title = "Effect of Relevance Career on CM_KNO",
    x = "Relevance Career",
    y = "Controlled Motivation for KNO",
    color = "User ID"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  ylim(0, 7)

# Display plots side by side
grid.arrange(p3, p4, ncol = 2)

# Bee swarm plot for RAM_Med with relevance_coschap
p5 <- ggplot(data = motivation_yolo, aes(x = relevance_coschap, y = RAM_Med, color = User_ID)) +
  geom_quasirandom(width = 0.2) +
  labs(
    title = "Effect of Relevance Coschap on RAM_Med",
    x = "Relevance Coschap",
    y = "RAM Med",
    color = "User ID"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) 
  

# Bee swarm plot for RAM_KNO with relevance_coschap
p6 <- ggplot(data = motivation_yolo, aes(x = relevance_coschap, y = RAM_KNO, color = User_ID)) +
  geom_quasirandom(width = 0.2) +
  labs(
    title = "Effect of Relevance Coschap on RAM KNO",
    x = "Relevance Coschap",
    y = "RAM for KNO",
    color = "User ID"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) 

# Display plots side by side
grid.arrange(p5, p6, ncol = 2)

# Bee swarm plot for RAM_Med with relevance_career
p7 <- ggplot(data = motivation_yolo, aes(x = relevance_career, y = RAM_Med, color = User_ID)) +
  geom_quasirandom(width = 0.2) +
  labs(
    title = "Effect of Relevance Career on RAM Med",
    x = "Relevance Career",
    y = "RAM for Med",
    color = "User ID"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Bee swarm plot for RAM_KNO with relevance_career
p8 <- ggplot(data = motivation_yolo, aes(x = relevance_career, y = RAM_KNO, color = User_ID)) +
  geom_quasirandom(width = 0.2) +
  labs(
    title = "Effect of Relevance Career on CM_KNO",
    x = "Relevance Career",
    y = "RAM for KNO",
    color = "User ID"
  ) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) 

# Display plots side by side
grid.arrange(p7, p8, ncol = 2)

motivation_clean2 <- motivation %>%
  filter(!is.na(RAM_Med) & !is.na(RAM_KNO))

# Calculate Pearson's correlation between RAM_Med and RAM_KNO
correlation_pearson_clean <- cor(motivation_clean2$RAM_Med, motivation_clean2$RAM_KNO, method = "pearson")
print(correlation_pearson_clean)

## [1] -0.0809967

# Perform a significance test for Pearson's correlation
test_pearson_clean <- cor.test(motivation_clean2$RAM_Med, motivation_clean2$RAM_KNO, method = "pearson")
print(test_pearson_clean)

## 
##  Pearson's product-moment correlation
## 
## data:  motivation_clean2$RAM_Med and motivation_clean2$RAM_KNO
## t = -0.19905, df = 6, p-value = 0.8488
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.7432479  0.6614280
## sample estimates:
##        cor 
## -0.0809967

install.packages("tidyverse", repos = "https://cloud.r-project.org/")

## 
## The downloaded binary packages are in
##  /var/folders/3g/ln4t2cvs3250dv7p12p13c_c0000gn/T//RtmpyDG1vC/downloaded_packages

install.packages("Hmisc", repos = "https://cloud.r-project.org/")

## 
## The downloaded binary packages are in
##  /var/folders/3g/ln4t2cvs3250dv7p12p13c_c0000gn/T//RtmpyDG1vC/downloaded_packages

# Load libraries
library(tidyverse)
library(Hmisc)

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:dplyr':
## 
##     src, summarize

## The following objects are masked from 'package:base':
## 
##     format.pval, units

motivation_clean3 <- motivation %>%
  filter(!is.na(Amotivation_KNO) & !is.na(Level_Completed) & !is.na(Questions_Attemped))

# Calculate Spearman's correlation between Amotivation_KNO and Level_Completed
spearman_corr_level <- cor(motivation_clean3$Amotivation_KNO, motivation_clean3$Level_Completed, method = "spearman")
print(paste("Spearman's correlation between Amotivation_KNO and Level_Completed:", spearman_corr_level))

## [1] "Spearman's correlation between Amotivation_KNO and Level_Completed: -0.56531554109761"

# Calculate Spearman's correlation between Amotivation_KNO and Questions_Attemped
spearman_corr_questions <- cor(motivation_clean3$Amotivation_KNO, motivation_clean3$Questions_Attemped, method = "spearman")
print(paste("Spearman's correlation between Amotivation_KNO and Questions_Attemped:", spearman_corr_questions))

## [1] "Spearman's correlation between Amotivation_KNO and Questions_Attemped: -0.481518178366675"

# Perform a significance test for Spearman's correlation between Amotivation_KNO and Level_Completed
test_spearman_level <- cor.test(motivation_clean3$Amotivation_KNO, motivation_clean3$Level_Completed, method = "spearman")
print(test_spearman_level)

## 
##  Spearman's rank correlation rho
## 
## data:  motivation_clean3$Amotivation_KNO and motivation_clean3$Level_Completed
## S = 131.49, p-value = 0.1442
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.5653155

# Perform a significance test for Spearman's correlation between Amotivation_KNO and Questions_Attemped
test_spearman_questions <- cor.test(motivation_clean3$Amotivation_KNO, motivation_clean3$Questions_Attemped, method = "spearman")
print(test_spearman_questions)

## 
##  Spearman's rank correlation rho
## 
## data:  motivation_clean3$Amotivation_KNO and motivation_clean3$Questions_Attemped
## S = 124.45, p-value = 0.227
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.4815182

Between Amotivation_KNO and Level_Completed:

Spearman’s correlation coefficient (rho) is approximately -0.5653. This indicates a moderate negative correlation between Amotivation_KNO and Level_Completed. As Amotivation_KNO increases, Level_Completed tends to decrease, and vice versa. Between Amotivation_KNO and Questions_Attemped:

Spearman’s correlation coefficient (rho) is approximately -0.4815. This indicates a moderate negative correlation between Amotivation_KNO and Questions_Attemped. As Amotivation_KNO increases, the number of Questions_Attemped tends to decrease, and vice versa. Significance Tests: For Amotivation_KNO and Level_Completed:

The p-value associated with the correlation coefficient is 0.1442. With a p-value greater than the conventional significance level of 0.05, there is insufficient evidence to reject the null hypothesis. Therefore, we fail to reject the null hypothesis, suggesting that the observed correlation coefficient is not statistically significant at the 0.05 level. There is no strong evidence of a linear relationship between Amotivation_KNO and Level_Completed. For Amotivation_KNO and Questions_Attemped:

The p-value associated with the correlation coefficient is 0.227. Similar to the previous test, with a p-value greater than 0.05, we fail to reject the null hypothesis. Hence, the observed correlation coefficient is not statistically significant at the 0.05 level. There is no strong evidence of a linear relationship between Amotivation_KNO and Questions_Attemped.

# Remove NA values
motivation_clean4 <- na.omit(motivation)

correlation_levels_ram <- cor.test(motivation_clean4$Level_Completed, motivation_clean4$RAM_KNO, method = "pearson")

# Correlation between Questions_Attempted and RAM_KNO
correlation_questions_ram <- cor.test(motivation_clean4$Questions_Attemped, motivation_clean4$RAM_KNO, method = "pearson")

# Print the correlation tests with significance level
cat("Correlation test between Levels_Completed and RAM_KNO:\n")

## Correlation test between Levels_Completed and RAM_KNO:

print(correlation_levels_ram)

## 
##  Pearson's product-moment correlation
## 
## data:  motivation_clean4$Level_Completed and motivation_clean4$RAM_KNO
## t = 0.8801, df = 6, p-value = 0.4127
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4811940  0.8421461
## sample estimates:
##       cor 
## 0.3381354

if (correlation_levels_ram$p.value < 0.05) {
  cat("Correlation is statistically significant at the 0.05 level.\n")
} else {
  cat("Correlation is not statistically significant at the 0.05 level.\n")
}

## Correlation is not statistically significant at the 0.05 level.

cat("\nCorrelation test between Questions_Attempted and RAM_KNO:\n")

## 
## Correlation test between Questions_Attempted and RAM_KNO:

print(correlation_questions_ram)

## 
##  Pearson's product-moment correlation
## 
## data:  motivation_clean4$Questions_Attemped and motivation_clean4$RAM_KNO
## t = 1.1021, df = 6, p-value = 0.3127
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.4140995  0.8649056
## sample estimates:
##       cor 
## 0.4103013

if (correlation_questions_ram$p.value < 0.05) {
  cat("Correlation is statistically significant at the 0.05 level.\n")
} else {
  cat("Correlation is not statistically significant at the 0.05 level.\n")
}

## Correlation is not statistically significant at the 0.05 level.

R.version.string

## [1] "R version 4.3.1 (2023-06-16)"