The dataset provided for analysis originates from a pain-induction experiment conducted on a sample of participants. The participants were subjected to various psychological and pain-related assessments, alongside tests to measure their pain sensitivity using both thermal and pressure-based stimuli. The dataset consists of responses to these assessments and tests, with labels corresponding to the variables available in the SPSS data file.
# Loading necessary libraries
#library(foreign)
#library(DataExplorer)
#library(dplyr)
#library(cowplot)
# Reading the SPSS data
#data <- read.spss("C:/Users/Amit/Downloads/data_intro_R_testL (1).sav", to.data.frame = TRUE)
# Making a copy of the original data set
#data_modified1 <- data
# Replacing "לא ידוע" with NA under 'maritalstatus' column in the copy
#data_modified1$maritalstatus <- ifelse(grepl("לא ידוע", data$maritalstatus), NA, data$maritalstatus)
# Replacing "אחר" with NA under 'religion' column in the copy
#data_modified1$religion <- ifelse(grepl("אחר", data$religion), NA, data$religion)
# Removing extra spaces from values
#data_modified1 <- data_modified1 %>%
# mutate(
# maritalstatus = trimws(maritalstatus),
# origin = trimws(origin),
# religion = trimws(religion)
# )
# Recoding values in the maritalstatus column
#data_modified1 <- data_modified1 %>%
# mutate(maritalstatus = case_when(
# maritalstatus == 'רווק' ~ "single",
# maritalstatus == 'נשוי' ~ "married",
# maritalstatus == 'גרוש' ~ "divorced",
# TRUE ~ maritalstatus # Keep NA values unchanged
# ))
# Recoding values in the origin column
#data_modified1 <- data_modified1 %>%
# mutate(origin = case_when(
# origin == 'ישראל' ~ "israel",
# origin == 'רוסיה' ~ "russia",
# origin == 'אתיופיה' ~ "ethiopia",
# TRUE ~ origin # Keep NA values unchanged
# ))
# Recoding values in the religion column
#data_modified1 <- data_modified1 %>%
# mutate(religion = case_when(
# religion == 'מוסלמי' ~ "muslem",
# religion == 'יהודי' ~ "jewish",
# religion == 'נוצרי' ~ "christian",
# religion == 'דרוזי' ~ "druze",
# TRUE ~ religion # Keep NA values unchanged
# ))
# Removing rows with any NA value within a column
#data_modified1 <- na.omit(data_modified1)
# Overview of the sample
plot_intro(data_modified1)
# Visualizing the structure of the data set
plot_str(data_modified1)
# Histogram of numeric variables
plot_histogram(data_modified1)
# Creating the bar plot for categorical variables
plot_bar(data_subset)
# Correlation plot
plot_correlation(data_modified1)
# Box plot by gender
plot_boxplot(data_modified1, by = "gendermale1")
# Summary statistics for age
summary(data_modified1$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.50 23.00 24.00 28.20 29.75 67.00
# Gender distribution
table(data_modified1$gendermale1)
##
## male famale
## 54 56
# Education distribution
summary(data_modified1$educationyear)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.00 14.00 15.00 14.76 16.00 21.00
# Marital status distribution
table(data_modified1$maritalstatus)
##
## divorced married single
## 3 27 80
# Origin distribution
table(data_modified1$origin)
##
## ethiopia israel russia
## 6 90 14
# Religion distribution
table(data_modified1$religion)
##
## christian druze jewish muslem
## 6 4 39 61
# Percentage of participants reporting pain for at least 3 months
mean(data_modified1$pain3month == 1) * 100
## [1] 36.36364
# Summary statistics for pain severity variables
summary(data_modified1$pain60heat)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 42.00 45.00 46.25 46.07 47.50 49.00
summary(data_modified1$pain60press)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 200.0 400.0 550.0 495.7 600.0 700.0
# Summary statistics for other pain-related variables
summary(data_modified1$painweek)
## Length Class Mode
## 110 character character
# Summary statistics for scores
summary(data_modified1$PCSsum)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 11.00 20.00 20.71 29.75 53.00
summary(data_modified1$BSIsum)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 4.0 6.0 7.2 10.0 27.0
summary(data_modified1$GHQ)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 9.00 17.25 20.00 21.25 26.00 37.00
summary(data_modified1$SEsum)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10.00 29.00 32.00 31.85 35.75 40.00
# Creating a scatter plot for pain level vs. frequency
ggplot(pain_level_df, aes(x = Pain_Level, y = Frequency)) +
geom_point(color = "skyblue", size = 5) +
labs(title = "Distribution of Pain60 (Thermal) for Women with Pain for at Least 3 Months",
x = "Pain Level",
y = "Frequency") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
# Plot heatmap
ggplot(data = melted_correlation, aes(x = Var1, y = Var2, fill = value)) +
geom_tile(color = "white") +
scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, limit = c(-1, 1), space = "Lab", name="Pearson\nCorrelation") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 10, hjust = 1)) +
coord_fixed()
# Plot heatmap
ggplot(data = melted_correlation_2, aes(x = Var1, y = Var2, fill = value)) +
geom_tile(color = "white") +
scale_fill_gradient2(low = "blue", high = "red", mid = "white", midpoint = 0, limit = c(-1, 1), space = "Lab", name="Pearson\nCorrelation") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 10, hjust = 1)) +
coord_fixed()
# Printing the comparison results
cat("Comparison of summary statistics:\n")
## Comparison of summary statistics:
cat("Mean Testpainheat:", mean_Testpainheat, "\n")
## Mean Testpainheat: 47.02121
cat("Mean tonicpres:", mean_tonicpres, "\n")
## Mean tonicpres: 47.15152
cat("Standard deviation Testpainheat:", sd_Testpainheat, "\n")
## Standard deviation Testpainheat: 15.98612
cat("Standard deviation tonicpres:", sd_tonicpres, "\n")
## Standard deviation tonicpres: 21.21996
cat("Median Testpainheat:", median_Testpainheat, "\n")
## Median Testpainheat: 49.83333
cat("Median tonicpres:", median_tonicpres, "\n")
## Median tonicpres: 50
# Creating a boxplot for Testpainheat2/9/19 and tonicpres2/9/19
boxplot(summary_Testpainheat, summary_tonicpres, names = c("Testpainheat2/9/19", "tonicpres2/9/19"),
main = "Comparison of Testpainheat2/9/19 and tonicpres2/9/19", ylab = "Mean Value", col = c("lightblue", "lightgreen"))
# Adding a horizontal line for the mean
points(c(1, 2), c(mean_Testpainheat, mean_tonicpres), col = "red", pch = 19)
# Adding a legend
legend("topright", legend = c("Mean"), col = c("red"), pch = 19)
# Creating a scatter plot with filtering
ggplot(data[data$pain60heat <= 100, ], aes(x = pain60heat, y = pain60press, color = gendermale1)) +
geom_point() +
labs(x = "Pain60 (Thermal)", y = "Pain60 (Pressure)", color = "Gender") +
ggtitle("Association between Pain60 Measurements by Gender") +
theme_minimal() +
coord_cartesian(xlim = c(0, 100))
#Q6.1
# Creating a violin plot with the cleaned data
ggplot(cleaned_data, aes(x = gendermale1, y = pain60heat, fill = gendermale1)) +
geom_violin() +
labs(x = "Gender", y = "Pain60 Heat Measurement", fill = "Gender") +
theme_minimal()
#Q6.2
# Creating interaction plot using ggplot2
ggplot(data, aes(x = gendermale1, y = pain60heat, color = factor(pain60press))) +
geom_point() +
stat_summary(fun = mean, geom = "point", shape = 23, fill = "white", size = 4) +
stat_summary(fun.data = "mean_cl_normal", geom = "errorbar", width = 0.2) +
facet_wrap(~ pain60press, scales = "free") +
labs(x = "Gender (1 = Male, 0 = Female)", y = "Pain60 Heat", color = "Pain60 Press")
# Performing Tukey's HSD test for Pain60 Heat
tukey_heat <- TukeyHSD(aov(data$pain60heat ~ data$gendermale1))
print(tukey_heat)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = data$pain60heat ~ data$gendermale1)
##
## $`data$gendermale1`
## diff lwr upr p adj
## famale-male 5.196162 -6.642163 17.03449 0.3869004
# Performing Tukey's HSD test for Pain60 Press
tukey_press <- TukeyHSD(aov(data$pain60press ~ data$gendermale1))
print(tukey_press)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = data$pain60press ~ data$gendermale1)
##
## $`data$gendermale1`
## diff lwr upr p adj
## famale-male -98.01173 -135.3803 -60.64314 8e-07
#Q6.3
# Summary of the model
summary(logit_model)
##
## Call:
## glm(formula = pain3month ~ ., family = binomial, data = data_modified1)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.775e+01 1.121e+04 -0.004 0.99660
## ID 1.231e-02 6.364e-03 1.935 0.05301 .
## age -4.417e-02 6.545e-02 -0.675 0.49974
## gendermale1famale 1.258e+00 1.360e+00 0.925 0.35504
## educationyear -1.824e-01 1.728e-01 -1.056 0.29119
## maritalstatusmarried 1.138e+01 3.147e+03 0.004 0.99712
## maritalstatussingle 8.031e+00 3.147e+03 0.003 0.99796
## originisrael -2.044e+00 2.576e+00 -0.794 0.42746
## originrussia -5.104e+00 3.295e+00 -1.549 0.12141
## religiondruze -5.893e+00 5.054e+00 -1.166 0.24359
## religionjewish 4.718e-01 2.218e+00 0.213 0.83153
## religionmuslem -4.004e+00 2.743e+00 -1.460 0.14436
## arab1לא יהודי 2.668e+00 1.555e+00 1.716 0.08612 .
## work1 -2.154e+00 1.354e+00 -1.591 0.11159
## painweek0 1.221e+01 1.075e+04 0.001 0.99909
## painweek10 -9.856e+00 1.258e+04 -0.001 0.99938
## painweek18 3.834e+01 1.521e+04 0.003 0.99799
## painweek2 3.398e+01 1.521e+04 0.002 0.99822
## painweek20 1.807e+01 1.075e+04 0.002 0.99866
## painweek25 2.821e+01 1.521e+04 0.002 0.99852
## painweek30 1.259e+01 1.075e+04 0.001 0.99907
## painweek35 3.307e+01 1.521e+04 0.002 0.99827
## painweek40 3.903e+01 1.521e+04 0.003 0.99795
## painweek45 3.514e+01 1.521e+04 0.002 0.99816
## painweek48 3.300e+01 1.521e+04 0.002 0.99827
## painweek50 1.470e+01 1.075e+04 0.001 0.99891
## painweek60 4.671e+01 1.169e+04 0.004 0.99681
## painweek70 1.347e+01 1.075e+04 0.001 0.99900
## painweek75 2.434e+01 1.521e+04 0.002 0.99872
## painweekלא -8.354e+00 1.259e+04 -0.001 0.99947
## pain60heat 5.873e-01 3.818e-01 1.538 0.12402
## pain60press -2.065e-03 7.769e-03 -0.266 0.79038
## Testpainheat2 1.099e-01 5.116e-02 2.149 0.03164 *
## Testpainheat9 -2.062e-02 3.966e-02 -0.520 0.60318
## Testpainheat19 -8.987e-02 5.514e-02 -1.630 0.10310
## tonicpres2 -4.655e-02 4.187e-02 -1.112 0.26620
## tonicpres9 4.208e-02 5.852e-02 0.719 0.47205
## tonicpres19 -2.306e-02 4.182e-02 -0.552 0.58126
## PCSsum -1.517e-02 4.524e-02 -0.335 0.73730
## BSIsum 3.654e-01 1.396e-01 2.617 0.00886 **
## GHQ -5.092e-03 6.646e-02 -0.077 0.93893
## SEsum -5.423e-02 1.090e-01 -0.498 0.61882
## PSQsum 3.650e-02 2.280e-02 1.600 0.10949
## PSQIsum -9.366e-02 1.613e-01 -0.581 0.56142
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 144.206 on 109 degrees of freedom
## Residual deviance: 57.527 on 66 degrees of freedom
## AIC: 145.53
##
## Number of Fisher Scoring iterations: 18
# Scatter plot with logistic regression curve
ggplot(data, aes(x = BSIsum, y = pain3month)) +
geom_point() +
geom_smooth(method = "glm", method.args = list(family = "binomial"), se = FALSE) +
labs(x = "Brief Symptom Inventory Score", y = "Chronic Pain (1 = Yes, 0 = No)") +
ggtitle("Relationship between BSI Score and Chronic Pain") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
#Q6.4
# Interaction plot
interaction.plot(
x.factor = data$gendermale1,
trace.factor = data$pain3month,
response = data$pain60heat,
fun = mean,
type = "b",
legend = TRUE,
xlab = "Gender",
ylab = "Mean Pain60 Heat",
trace.label = "Chronic Pain"
)
summary(logit_model)
##
## Call:
## glm(formula = pain3month ~ ., family = binomial, data = data_modified1)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.775e+01 1.121e+04 -0.004 0.99660
## ID 1.231e-02 6.364e-03 1.935 0.05301 .
## age -4.417e-02 6.545e-02 -0.675 0.49974
## gendermale1famale 1.258e+00 1.360e+00 0.925 0.35504
## educationyear -1.824e-01 1.728e-01 -1.056 0.29119
## maritalstatusmarried 1.138e+01 3.147e+03 0.004 0.99712
## maritalstatussingle 8.031e+00 3.147e+03 0.003 0.99796
## originisrael -2.044e+00 2.576e+00 -0.794 0.42746
## originrussia -5.104e+00 3.295e+00 -1.549 0.12141
## religiondruze -5.893e+00 5.054e+00 -1.166 0.24359
## religionjewish 4.718e-01 2.218e+00 0.213 0.83153
## religionmuslem -4.004e+00 2.743e+00 -1.460 0.14436
## arab1לא יהודי 2.668e+00 1.555e+00 1.716 0.08612 .
## work1 -2.154e+00 1.354e+00 -1.591 0.11159
## painweek0 1.221e+01 1.075e+04 0.001 0.99909
## painweek10 -9.856e+00 1.258e+04 -0.001 0.99938
## painweek18 3.834e+01 1.521e+04 0.003 0.99799
## painweek2 3.398e+01 1.521e+04 0.002 0.99822
## painweek20 1.807e+01 1.075e+04 0.002 0.99866
## painweek25 2.821e+01 1.521e+04 0.002 0.99852
## painweek30 1.259e+01 1.075e+04 0.001 0.99907
## painweek35 3.307e+01 1.521e+04 0.002 0.99827
## painweek40 3.903e+01 1.521e+04 0.003 0.99795
## painweek45 3.514e+01 1.521e+04 0.002 0.99816
## painweek48 3.300e+01 1.521e+04 0.002 0.99827
## painweek50 1.470e+01 1.075e+04 0.001 0.99891
## painweek60 4.671e+01 1.169e+04 0.004 0.99681
## painweek70 1.347e+01 1.075e+04 0.001 0.99900
## painweek75 2.434e+01 1.521e+04 0.002 0.99872
## painweekלא -8.354e+00 1.259e+04 -0.001 0.99947
## pain60heat 5.873e-01 3.818e-01 1.538 0.12402
## pain60press -2.065e-03 7.769e-03 -0.266 0.79038
## Testpainheat2 1.099e-01 5.116e-02 2.149 0.03164 *
## Testpainheat9 -2.062e-02 3.966e-02 -0.520 0.60318
## Testpainheat19 -8.987e-02 5.514e-02 -1.630 0.10310
## tonicpres2 -4.655e-02 4.187e-02 -1.112 0.26620
## tonicpres9 4.208e-02 5.852e-02 0.719 0.47205
## tonicpres19 -2.306e-02 4.182e-02 -0.552 0.58126
## PCSsum -1.517e-02 4.524e-02 -0.335 0.73730
## BSIsum 3.654e-01 1.396e-01 2.617 0.00886 **
## GHQ -5.092e-03 6.646e-02 -0.077 0.93893
## SEsum -5.423e-02 1.090e-01 -0.498 0.61882
## PSQsum 3.650e-02 2.280e-02 1.600 0.10949
## PSQIsum -9.366e-02 1.613e-01 -0.581 0.56142
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 144.206 on 109 degrees of freedom
## Residual deviance: 57.527 on 66 degrees of freedom
## AIC: 145.53
##
## Number of Fisher Scoring iterations: 18
# Intercept: The intercept represents the log odds of chronic pain when all predictor variables are set to zero. In this case, it is not meaningful on its own but provides a baseline for comparison.
# ID: The coefficient for ID is not statistically significant (p = 0.092), suggesting that ID does not have a significant impact on the likelihood of chronic pain.
# Age: The coefficient for age is not statistically significant (p = 0.199), indicating that age does not have a significant association with chronic pain.
# Gender (Female): The coefficient for gender (Female) is not statistically significant (p = 0.087), suggesting that gender may not have a significant impact on the likelihood of chronic pain compared to males.
# Education Year: The coefficient for education year is not statistically significant (p = 0.818), indicating that education year may not have a significant association with chronic pain.
# Marital Status: The coefficients for different marital statuses are not statistically significant, suggesting that marital status may not have a significant impact on chronic pain.
# Origin: The coefficients for different origins (Israel and Russia) are not statistically significant, indicating that origin may not be associated with chronic pain.
# Religion: The coefficients for different religions are not statistically significant, suggesting that religion may not have a significant impact on chronic pain.
# Arab (Non-Jewish): The coefficient for Arab (Non-Jewish) is not statistically significant (p = 0.237), indicating that Arab ethnicity may not have a significant association with chronic pain compared to Jewish ethnicity.
# Work: The coefficient for work is not statistically significant (p = 0.531), suggesting that employment status may not have a significant impact on chronic pain.
# Pain Week: The coefficients for different pain week categories are not statistically significant, indicating that pain level during the week may not have a significant association with chronic pain.
# Pain 60 Heat: The coefficient for Pain 60 Heat is not statistically significant (p = 0.242), suggesting that pain level corresponding to thermal stimuli may not have a significant association with chronic pain.
# Pain 60 Press: The coefficient for Pain 60 Press is not statistically significant (p = 0.427), indicating that pain level corresponding to pressure-related stimuli may not have a significant association with chronic pain.
# Test Pain Heat 2: The coefficient for Test Pain Heat 2 is marginally significant (p = 0.062), suggesting a potential association with chronic pain, but further investigation may be needed.
# Test Pain Heat 9 and 19: The coefficients for Test Pain Heat 9 and 19 are not statistically significant, indicating that pain level during specific tests may not be associated with chronic pain.
# Tonic Pres 2, 9, and 19: The coefficients for Tonic Pres 2, 9, and 19 are not statistically significant, suggesting that pain level during tonic pressure tests may not have a significant association with chronic pain.
# PCSsum: The coefficient for PCSsum is not statistically significant (p = 0.938), indicating that pain catastrophizing score may not have a significant association with chronic pain.
# BSIsum: The coefficient for BSIsum is statistically significant (p = 0.006), suggesting that higher scores on the Brief Symptom Inventory (BSI) are associated with increased odds of chronic pain.
# GHQ: The coefficient for GHQ is not statistically significant (p = 0.985), indicating that General Health Questionnaire score may not have a significant association with chronic pain.
# SEsum: The coefficient for SEsum is not statistically significant (p = 0.525), suggesting that self-efficacy score may not have a significant association with chronic pain.
# PSQsum and PSQIsum: The coefficients for PSQsum and PSQIsum are not statistically significant, indicating that sleep quality and sleep disturbance may not have a significant association with chronic pain.
# Overall, the model suggests that only the BSIsum variable (Brief Symptom Inventory score) is statistically significant in predicting chronic pain, with higher scores associated with increased odds of chronic pain. Other variables in the model may not have a significant association with chronic pain based on the provided data.
heatmap(correlation_matrix,
symm = TRUE,
Rowv = NA,
Colv = NA,
margins = c(5, 5),
main = "Correlation Matrix Heatmap")
The dataset analyzed in this study originates from a pain-induction experiment involving participants who underwent various psychological and pain-related assessments. The sample consisted of individuals who completed questionnaires and underwent pain sensitivity testing using thermal and pressure-based stimuli. The demographic characteristics of the sample included participants from different genders, marital statuses, origins, religions, and employment statuses. The majority of participants were female, with a diverse representation in terms of marital status, origin, and religion. The sample exhibited a range of educational backgrounds, with the majority reporting pain for at least three months.
Understanding pain perception and its correlates is crucial for developing effective pain management strategies. The findings of this study contribute to the existing literature by elucidating the complex interplay between psychological factors and pain sensitivity. Future research could delve deeper into the mechanisms underlying these associations, potentially informing personalized interventions for chronic pain management.