library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(readr)
motivatie <- read_csv("data/motivatie.csv")
## Rows: 9 Columns: 30
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (6): Gender, Afdeling_coschap, Previous_Ed, Failed, relevance_coschap, ...
## dbl (24): User ID, Intrinsic Motivation_Med, Identified Regulation_Med, Intr...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

motivatie$Group <- ifelse(motivatie$Afdeling_coschap == "KNO", "KNO", "non-KNO")


ggplot(motivatie, aes(x = Group)) +
  geom_bar(fill = c("lightgreen", "lightcoral")) +
  labs(x = "Group", y = "Count", title = "Distribution of KNO and non-KNO") +
   theme_minimal()

# Load necessary libraries
library(dplyr)
library(tidyr)
library(ggplot2)

# Check the column names
names(motivatie)
##  [1] "User ID"                    "Intrinsic Motivation_Med"  
##  [3] "Identified Regulation_Med"  "Introjected Regulation_Med"
##  [5] "External Regulation_Med"    "Amotivation_Med"           
##  [7] "AM_Med"                     "CM_Med"                    
##  [9] "RAM_Med"                    "Intrinsic Motivation_KNO"  
## [11] "Identified Regulation KNO"  "Introjected Regulation_KNO"
## [13] "External Regulation_KNO"    "Amotivation_KNO"           
## [15] "AM_KNO"                     "CM_KNO"                    
## [17] "RAM_KNO"                    "Level_Completed"           
## [19] "Questions_Attemped"         "Gender"                    
## [21] "Block"                      "Afdeling_coschap"          
## [23] "Previous_Ed"                "Failed"                    
## [25] "relevance_coschap"          "relevance_career"          
## [27] "Both"                       "Questionaire_only"         
## [29] "Questions_only"             "None"                      
## [31] "Group"
# Summarize the data to count the number of 1s in each category
counts <- motivatie %>%
  summarise(
    Questionnaire = sum(Questionaire_only == 1, na.rm = TRUE),
    Both = sum(Both == 1, na.rm = TRUE),
    Questions = sum(Questions_only == 1, na.rm = TRUE),
    None = sum(None == 1, na.rm = TRUE)
  )

# Convert the summary to a long format for ggplot2
counts_long <- counts %>%
  pivot_longer(cols = everything(), names_to = "Category", values_to = "Count")

# Create a custom color palette
custom_colors <- c("Questionnaire" = "#FFB6C1",   # Baby Pink
                   "Questions" = "#89CFF0",       # Baby Blue
                   "None" = "#98FB98",                 # Baby Green
                   "Both" = "#FFFFE0")                 # Light Yellow

# Create the bar graph with custom colors
ggplot(counts_long, aes(x = Category, y = Count, fill = Category)) +
  geom_bar(stat = "identity", position = position_dodge(width = 10)) +
  scale_fill_manual(values = custom_colors) +  # Apply custom colors
  labs(title = "Participant Response Distribution", x = "Category", y = "Count") +
  theme_minimal()

# Create a scatter plot with trendline
ggplot(motivatie, aes(x = RAM_Med, y = RAM_KNO)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, color = "red") +  # Add a linear trendline
  labs(title = "Relationship between RAM_Med and RAM_KNO",
       x = "RAM_Med", y = "RAM_KNO") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# Calculate correlation coefficient
correlation <- cor(motivatie$RAM_Med, motivatie$RAM_KNO)
correlation
## [1] NA
# Perform simple linear regression
lm_model <- lm(RAM_KNO ~ RAM_Med, data = motivatie)

# Plot observed data and regression line
ggplot(motivatie, aes(x = RAM_Med, y = RAM_KNO)) +
  geom_point() +  # Add observed data points
  geom_smooth(method = "lm", se = FALSE, color = "red") +  # Add regression line
  labs(title = "Simple Linear Regression: RAM_med vs RAM_KNO",
       x = "RAM_med", y = "RAM_KNO") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# Load necessary libraries
library(ggplot2)
library(dplyr)

# Perform multiple linear regression for RAM_KNO
lm_model_kno <- lm(RAM_KNO ~ Questions_Attemped + Level_Completed, data = motivatie)

# Perform multiple linear regression for RAM_Med
lm_model_med <- lm(RAM_Med ~ Questions_Attemped + Level_Completed, data = motivatie)

# Summary of models
summary(lm_model_kno)
## 
## Call:
## lm(formula = RAM_KNO ~ Questions_Attemped + Level_Completed, 
##     data = motivatie)
## 
## Residuals:
##       1       6       7       8 
## -3.6301 -0.8681  0.9470  3.5512 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)         -0.9470     5.1519  -0.184    0.884
## Questions_Attemped  -0.1599     0.4669  -0.342    0.790
## Level_Completed      5.8848    14.4733   0.407    0.754
## 
## Residual standard error: 5.238 on 1 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.5538, Adjusted R-squared:  -0.3387 
## F-statistic: 0.6204 on 2 and 1 DF,  p-value: 0.668
summary(lm_model_med)
## 
## Call:
## lm(formula = RAM_Med ~ Questions_Attemped + Level_Completed, 
##     data = motivatie)
## 
## Residuals:
##       1       2       6       7       8 
## -0.6306  2.1863  2.1543  1.9230 -5.6330 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)         12.7770     4.1920   3.048   0.0929 .
## Questions_Attemped   0.1590     0.1789   0.889   0.4679  
## Level_Completed     -5.0477     5.6473  -0.894   0.4657  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.756 on 2 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.2856, Adjusted R-squared:  -0.4287 
## F-statistic: 0.3998 on 2 and 2 DF,  p-value: 0.7144