library(readr)
data <- read_csv("C:/Users/Admins/Desktop/R/data/my_data.csv")
# Conduct normality testing using the Shapiro-Wilk test
shapiro.test(data$Participants)
##
## Shapiro-Wilk normality test
##
## data: data$Participants
## W = 0.95472, p-value = 0.001722
shapiro.test(data$Group)
##
## Shapiro-Wilk normality test
##
## data: data$Group
## W = 0.777, p-value = 5.164e-11
shapiro.test(data$`Dependent variable`)
##
## Shapiro-Wilk normality test
##
## data: data$`Dependent variable`
## W = 0.92295, p-value = 2.026e-05
shapiro.test(data$`Range of group-low`)
##
## Shapiro-Wilk normality test
##
## data: data$`Range of group-low`
## W = 0.80632, p-value = 3.89e-10
shapiro.test(data$`Range of group-high`)
##
## Shapiro-Wilk normality test
##
## data: data$`Range of group-high`
## W = 0.64398, p-value = 3.194e-14
# Present descriptive statistics
library(psych)
describe(data)
## vars n mean sd median trimmed mad min max range
## Participants 1 100 50.50 29.01 50.5 50.50 37.06 1 100 99
## Group 2 100 1.25 0.77 1.0 1.31 1.48 0 2 2
## Dependent variable 3 100 17.11 3.05 16.5 16.86 2.22 12 31 19
## Range of group-low 4 100 13.17 1.81 13.0 13.31 2.97 10 17 7
## Range of group-high 5 100 20.75 2.31 20.0 20.54 1.48 19 35 16
## skew kurtosis se
## Participants 0.00 -1.24 2.90
## Group -0.45 -1.21 0.08
## Dependent variable 1.13 2.77 0.31
## Range of group-low -0.57 -0.64 0.18
## Range of group-high 2.47 11.92 0.23
#inferntial statistics
summary(data)
## Participants Group Dependent variable Range of group-low
## Min. : 1.00 Min. :0.00 Min. :12.00 Min. :10.00
## 1st Qu.: 25.75 1st Qu.:1.00 1st Qu.:15.00 1st Qu.:13.00
## Median : 50.50 Median :1.00 Median :16.50 Median :13.00
## Mean : 50.50 Mean :1.25 Mean :17.11 Mean :13.17
## 3rd Qu.: 75.25 3rd Qu.:2.00 3rd Qu.:19.00 3rd Qu.:15.00
## Max. :100.00 Max. :2.00 Max. :31.00 Max. :17.00
## Range of group-high
## Min. :19.00
## 1st Qu.:19.00
## Median :20.00
## Mean :20.75
## 3rd Qu.:23.00
## Max. :35.00
# Create a scatter plot with all columns and add a legend
par(mfrow = c(1, 1))
plot(data$Participants, col = "lightblue", pch=16, main = "Scatter Plot of individual observations", xlab = "Participants", ylab="Value")
points(data$Group, col = "pink", pch=16)
points(data$`Dependent variable`, col = "lightgreen", pch=16)
points(data$`Range of group-low`, col = "lightyellow", pch=16)
points(data$`Range of group-high`, col = "lavender", pch=16)
legend("topright", c("Participants", "Group", "Dependent variable", "Range of group-low", "Range of group-high"),
fill = c("lightblue", "pink", "lightgreen", "lightyellow", "lavender"), cex=0.8)
Hypothesis 1
# Split data by Group
split_data <- split(data$`Dependent variable`, data$Group)
# Conduct t-tests for each group
for (i in 1:length(split_data)) {
group <- names(split_data[i])
results <- t.test(split_data[[i]])
print(paste0("Group ", group, ": t = ", results$statistic, ", p-value = ", results$p.value))
}
## [1] "Group 0: t = 18.3812698474582, p-value = 1.46851000199435e-13"
## [1] "Group 1: t = 42.0159178097729, p-value = 6.84140950731061e-31"
## [1] "Group 2: t = 57.6129397797351, p-value = 4.44361152754593e-43"
hypotheis 2
# Fit a linear regression model
model <- lm(`Dependent variable` ~ `Range of group-high`, data = data)
# Get the summary of the model
summary(model)
##
## Call:
## lm(formula = `Dependent variable` ~ `Range of group-high`, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4522 -1.5751 -0.4522 1.6400 3.9166
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.0892 2.0814 -0.523 0.602
## `Range of group-high` 0.8771 0.0997 8.797 4.91e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.293 on 98 degrees of freedom
## Multiple R-squared: 0.4413, Adjusted R-squared: 0.4355
## F-statistic: 77.39 on 1 and 98 DF, p-value: 4.911e-14