library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(readr)
motivatie <- read_csv("data/motivatie.csv")
## Rows: 9 Columns: 30
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Gender, Afdeling_coschap, Previous_Ed, Failed, relevance_coschap, ...
## dbl (24): User ID, Intrinsic Motivation_Med, Identified Regulation_Med, Intr...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

motivatie$Group <- ifelse(motivatie$Afdeling_coschap == "KNO", "KNO", "non-KNO")
ggplot(motivatie, aes(x = Group)) +
geom_bar(fill = c("lightgreen", "lightcoral")) +
labs(x = "Group", y = "Count", title = "Distribution of KNO and non-KNO") +
theme_minimal()

# Load necessary libraries
library(dplyr)
library(tidyr)
library(ggplot2)
# Check the column names
names(motivatie)
## [1] "User ID" "Intrinsic Motivation_Med"
## [3] "Identified Regulation_Med" "Introjected Regulation_Med"
## [5] "External Regulation_Med" "Amotivation_Med"
## [7] "AM_Med" "CM_Med"
## [9] "RAM_Med" "Intrinsic Motivation_KNO"
## [11] "Identified Regulation KNO" "Introjected Regulation_KNO"
## [13] "External Regulation_KNO" "Amotivation_KNO"
## [15] "AM_KNO" "CM_KNO"
## [17] "RAM_KNO" "Level_Completed"
## [19] "Questions_Attemped" "Gender"
## [21] "Block" "Afdeling_coschap"
## [23] "Previous_Ed" "Failed"
## [25] "relevance_coschap" "relevance_career"
## [27] "Both" "Questionaire_only"
## [29] "Questions_only" "None"
## [31] "Group"
# Summarize the data to count the number of 1s in each category
counts <- motivatie %>%
summarise(
Questionnaire = sum(Questionaire_only == 1, na.rm = TRUE),
Both = sum(Both == 1, na.rm = TRUE),
Questions = sum(Questions_only == 1, na.rm = TRUE),
None = sum(None == 1, na.rm = TRUE)
)
# Convert the summary to a long format for ggplot2
counts_long <- counts %>%
pivot_longer(cols = everything(), names_to = "Category", values_to = "Count")
# Create a custom color palette
custom_colors <- c("Questionnaire" = "#FFB6C1", # Baby Pink
"Questions" = "#89CFF0", # Baby Blue
"None" = "#98FB98", # Baby Green
"Both" = "#FFFFE0") # Light Yellow
# Create the bar graph with custom colors
ggplot(counts_long, aes(x = Category, y = Count, fill = Category)) +
geom_bar(stat = "identity", position = position_dodge(width = 10)) +
scale_fill_manual(values = custom_colors) + # Apply custom colors
labs(title = "Participant Response Distribution", x = "Category", y = "Count") +
theme_minimal()

# Create a scatter plot with trendline
ggplot(motivatie, aes(x = RAM_Med, y = RAM_KNO)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "red") + # Add a linear trendline
labs(title = "Relationship between RAM_Med and RAM_KNO",
x = "RAM_Med", y = "RAM_KNO") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# Calculate correlation coefficient
correlation <- cor(motivatie$RAM_Med, motivatie$RAM_KNO)
correlation
## [1] NA
# Perform simple linear regression
lm_model <- lm(RAM_KNO ~ RAM_Med, data = motivatie)
# Plot observed data and regression line
ggplot(motivatie, aes(x = RAM_Med, y = RAM_KNO)) +
geom_point() + # Add observed data points
geom_smooth(method = "lm", se = FALSE, color = "red") + # Add regression line
labs(title = "Simple Linear Regression: RAM_med vs RAM_KNO",
x = "RAM_med", y = "RAM_KNO") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

# Load necessary libraries
library(ggplot2)
library(dplyr)
# Perform multiple linear regression for RAM_KNO
lm_model_kno <- lm(RAM_KNO ~ Questions_Attemped + Level_Completed, data = motivatie)
# Perform multiple linear regression for RAM_Med
lm_model_med <- lm(RAM_Med ~ Questions_Attemped + Level_Completed, data = motivatie)
# Summary of models
summary(lm_model_kno)
##
## Call:
## lm(formula = RAM_KNO ~ Questions_Attemped + Level_Completed,
## data = motivatie)
##
## Residuals:
## 1 6 7 8
## -3.6301 -0.8681 0.9470 3.5512
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.9470 5.1519 -0.184 0.884
## Questions_Attemped -0.1599 0.4669 -0.342 0.790
## Level_Completed 5.8848 14.4733 0.407 0.754
##
## Residual standard error: 5.238 on 1 degrees of freedom
## (5 observations deleted due to missingness)
## Multiple R-squared: 0.5538, Adjusted R-squared: -0.3387
## F-statistic: 0.6204 on 2 and 1 DF, p-value: 0.668
summary(lm_model_med)
##
## Call:
## lm(formula = RAM_Med ~ Questions_Attemped + Level_Completed,
## data = motivatie)
##
## Residuals:
## 1 2 6 7 8
## -0.6306 2.1863 2.1543 1.9230 -5.6330
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.7770 4.1920 3.048 0.0929 .
## Questions_Attemped 0.1590 0.1789 0.889 0.4679
## Level_Completed -5.0477 5.6473 -0.894 0.4657
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.756 on 2 degrees of freedom
## (4 observations deleted due to missingness)
## Multiple R-squared: 0.2856, Adjusted R-squared: -0.4287
## F-statistic: 0.3998 on 2 and 2 DF, p-value: 0.7144