Data Import

library(readr)
data <- read_csv("C:/Users/Admins/Desktop/R/data/my_data.csv")

Normality testing

# Conduct normality testing using the Shapiro-Wilk test
shapiro.test(data$Participants)
## 
##  Shapiro-Wilk normality test
## 
## data:  data$Participants
## W = 0.95472, p-value = 0.001722
shapiro.test(data$Group)
## 
##  Shapiro-Wilk normality test
## 
## data:  data$Group
## W = 0.777, p-value = 5.164e-11
shapiro.test(data$`Dependent variable`)
## 
##  Shapiro-Wilk normality test
## 
## data:  data$`Dependent variable`
## W = 0.92295, p-value = 2.026e-05
shapiro.test(data$`Range of group-low`)
## 
##  Shapiro-Wilk normality test
## 
## data:  data$`Range of group-low`
## W = 0.80632, p-value = 3.89e-10
shapiro.test(data$`Range of group-high`)
## 
##  Shapiro-Wilk normality test
## 
## data:  data$`Range of group-high`
## W = 0.64398, p-value = 3.194e-14

Describe statistics

# Present descriptive statistics
library(psych)
describe(data)
##                     vars   n  mean    sd median trimmed   mad min max range
## Participants           1 100 50.50 29.01   50.5   50.50 37.06   1 100    99
## Group                  2 100  1.25  0.77    1.0    1.31  1.48   0   2     2
## Dependent variable     3 100 17.11  3.05   16.5   16.86  2.22  12  31    19
## Range of group-low     4 100 13.17  1.81   13.0   13.31  2.97  10  17     7
## Range of group-high    5 100 20.75  2.31   20.0   20.54  1.48  19  35    16
##                      skew kurtosis   se
## Participants         0.00    -1.24 2.90
## Group               -0.45    -1.21 0.08
## Dependent variable   1.13     2.77 0.31
## Range of group-low  -0.57    -0.64 0.18
## Range of group-high  2.47    11.92 0.23

#inferntial statistics

summary(data)
##   Participants        Group      Dependent variable Range of group-low
##  Min.   :  1.00   Min.   :0.00   Min.   :12.00      Min.   :10.00     
##  1st Qu.: 25.75   1st Qu.:1.00   1st Qu.:15.00      1st Qu.:13.00     
##  Median : 50.50   Median :1.00   Median :16.50      Median :13.00     
##  Mean   : 50.50   Mean   :1.25   Mean   :17.11      Mean   :13.17     
##  3rd Qu.: 75.25   3rd Qu.:2.00   3rd Qu.:19.00      3rd Qu.:15.00     
##  Max.   :100.00   Max.   :2.00   Max.   :31.00      Max.   :17.00     
##  Range of group-high
##  Min.   :19.00      
##  1st Qu.:19.00      
##  Median :20.00      
##  Mean   :20.75      
##  3rd Qu.:23.00      
##  Max.   :35.00
# Create a scatter plot with all columns and add a legend
par(mfrow = c(1, 1))
plot(data$Participants, col = "lightblue", pch=16, main = "Scatter Plot of individual observations", xlab = "Participants", ylab="Value")
points(data$Group, col = "pink", pch=16)
points(data$`Dependent variable`, col = "lightgreen", pch=16)
points(data$`Range of group-low`, col = "lightyellow", pch=16)
points(data$`Range of group-high`, col = "lavender", pch=16)
legend("topright", c("Participants", "Group", "Dependent variable", "Range of group-low", "Range of group-high"), 
       fill = c("lightblue", "pink", "lightgreen", "lightyellow", "lavender"), cex=0.8)

Hypothesis 1

# Split data by Group
split_data <- split(data$`Dependent variable`, data$Group)

# Conduct t-tests for each group
for (i in 1:length(split_data)) {
  group <- names(split_data[i])
  results <- t.test(split_data[[i]])
  print(paste0("Group ", group, ": t = ", results$statistic, ", p-value = ", results$p.value))
}
## [1] "Group 0: t = 18.3812698474582, p-value = 1.46851000199435e-13"
## [1] "Group 1: t = 42.0159178097729, p-value = 6.84140950731061e-31"
## [1] "Group 2: t = 57.6129397797351, p-value = 4.44361152754593e-43"

hypotheis 2

# Fit a linear regression model
model <- lm(`Dependent variable` ~ `Range of group-high`, data = data)

# Get the summary of the model
summary(model)
## 
## Call:
## lm(formula = `Dependent variable` ~ `Range of group-high`, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4522 -1.5751 -0.4522  1.6400  3.9166 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            -1.0892     2.0814  -0.523    0.602    
## `Range of group-high`   0.8771     0.0997   8.797 4.91e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.293 on 98 degrees of freedom
## Multiple R-squared:  0.4413, Adjusted R-squared:  0.4355 
## F-statistic: 77.39 on 1 and 98 DF,  p-value: 4.911e-14