DATA_Curiosity

Author

Leonardo Garcia

library(readr)
Format_data <- read_csv("20260427-DataPsychProject - Form Responses 1.csv", 
    col_types = cols(Timestamp = col_skip()))

New names:
• `What is the reason for your level of Interest?` -> `What is the reason for
  your level of Interest?...5`
• `What is the reason for your level of Interest?` -> `What is the reason for
  your level of Interest?...7`
• `What is the reason for your level of Interest?` -> `What is the reason for
  your level of Interest?...9`
• `What is the reason for your level of Interest?` -> `What is the reason for
  your level of Interest?...11`
• `What is the reason for your level of Interest?` -> `What is the reason for
  your level of Interest?...13`

head(Format_data)

# A tibble: 6 × 13
  `What is your gender?` What is your age? (Whole numbe…¹ Question 1: Would yo…²
  <chr>                                             <dbl> <chr>                 
1 Man                                                  37 2)No                  
2 Man                                                  55 1)Absolutely no       
3 Woman                                                34 5) Absolutely yes     
4 Man                                                  18 4)Yes                 
5 Woman                                                19 5) Absolutely yes     
6 Man                                                  67 1)Absolutely no       
# ℹ abbreviated names: ¹`What is your age? (Whole number only)`,
#   ²`Question 1: Would you ever go sky diving?`
# ℹ 10 more variables:
#   `What is the reason for your level of  Interest?...5` <chr>,
#   `Question 2: Would you ever go hiking?` <chr>,
#   `What is the reason for your level of  Interest?...7` <chr>,
#   `Question 3: Would you ever learn to crochet?` <chr>, …

Format_data[] <-lapply(Format_data, gsub, pattern = "1)Absolutely no", replacement =1, fixed = TRUE)
Format_data[] <-lapply(Format_data, gsub, pattern = "2)No", replacement = 2, fixed = TRUE)
Format_data[] <-lapply(Format_data, gsub, pattern = "3)Unsure", replacement =3, fixed = TRUE) 
Format_data[] <-lapply(Format_data, gsub, pattern = "4)Yes", replacement = 4, fixed = TRUE)
Format_data[] <-lapply(Format_data, gsub, pattern = "5) Absolutely yes", replacement = 5, fixed = TRUE)

Format_data[]<-lapply(Format_data, gsub, pattern = "1) Not Interested", replacement = 1, fixed = TRUE)
Format_data[]<-lapply(Format_data, gsub, pattern = "2) For no Reason", replacement = 2, fixed = TRUE)
Format_data[] <-lapply(Format_data, gsub, pattern = "3)I'm a little interested", replacement = 3, fixed = TRUE)
Format_data[] <-lapply(Format_data, gsub, pattern = "4) It's something I wanted to do", replacement = 4, fixed = TRUE)
Format_data[] <-lapply(Format_data, gsub, pattern = "5)It seems fun", replacement = 5, fixed = TRUE)
Format_data[]<-lapply(Format_data, gsub, pattern = "6) Doing it for the thrill", replacement= 6, fixed = TRUE)

Format_data[] <-lapply(Format_data, gsub, pattern = "1)Never", replacement = 1, fixed = T)
Format_data[] <-lapply(Format_data, gsub, pattern = "2)Rarely", replacement = 2, fixed = T)
Format_data[] <-lapply(Format_data, gsub, pattern = "3)Somewhat Often", replacement = 3, fixed = T) 
Format_data[] <-lapply(Format_data, gsub, pattern = "4)Very often", replacement = 4, fixed = T)
Format_data[] <-lapply(Format_data, gsub, pattern = "5) Extremely often", replacement = 5, fixed = T)
Format_data[, 2:13] <- lapply(Format_data[, 2:13], function(x) as.numeric(x))

Warning in FUN(X[[i]], ...): NAs introduced by coercion
Warning in FUN(X[[i]], ...): NAs introduced by coercion
Warning in FUN(X[[i]], ...): NAs introduced by coercion
Warning in FUN(X[[i]], ...): NAs introduced by coercion

names(Format_data) <-c("Gender", "Age", "Q1", "Q1A", "Q2", "Q2A", "Q3", "Q3A", "Q4", "Q4A", "Q5", "Q5A", "Openness" )

head(Format_data)

# A tibble: 6 × 13
  Gender   Age    Q1   Q1A    Q2   Q2A    Q3   Q3A    Q4   Q4A    Q5   Q5A
  <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Man       37     2     1     5     5     3     2     4     5     3     4
2 Man       55     1     1     1     1     5     5     2     2     1     1
3 Woman     34     5     4     5     6     4     5     4     5     4     5
4 Man       18     4     5     5     5     1     1     2     1     2     1
5 Woman     19     5     6     5     5     2     1     5     5     5     5
6 Man       67     1     1     2     2     2     1     2     2     2     1
# ℹ 1 more variable: Openness <dbl>

Format_data$Curiosity <- rowMeans(Format_data[, c("Q1","Q2","Q3","Q4","Q5")], na.rm = TRUE)
Format_data$Openness <- rowMeans(Format_data[, c("Q1A","Q2A","Q3A","Q4A","Q5A")], na.rm = TRUE)
Age <- Format_data[, c("Age")]

par(mfrow = c(1, 3))
hist(Format_data$Curiosity,
     main = "Distribution of Curiosity",
     xlab = "Curiosity Level",
     xlim = c(1,5),
     col = blues9)
hist(Format_data$Openness,
     main = "Distribution of Openness",
     xlab = "Openness Level",
     col = blues9)
hist(Format_data$Age,
     main = "Distribution of Age",
     xlab = "Age",
     col = blues9)

hist(Format_data$Curiosity,
     main = "Distribution of Curiosity",
     xlab = "Curiosity Level",
     xlim = c(1,5),
     col = "lightblue")

summary(Format_data$Curiosity)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    1.8     2.7     3.3     3.3     4.1     4.4

sd(Format_data$Curiosity, na.rm = TRUE)

[1] 0.9283416

range(Format_data$Curiosity, na.rm = TRUE)

[1] 1.8 4.4

hist(Format_data$Openness,
     main = "Distribution of Openness",
     xlab = "Openness Level",
     col = blues9)

summary(Format_data$Age)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  18.00   18.00   19.00   28.75   34.75   67.00

sd(Format_data$Age, na.rm = TRUE)

[1] 16.59203

range(Format_data$Age, na.rm = TRUE)

[1] 18 67

hist(Format_data$Age,
     main = "Distribution of Age",
     xlab = "Age",
     col = blues9)

summary(Format_data$Openness)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   1.00    2.00    2.80    2.95    4.10    5.00

sd(Format_data$Openness, na.rm = TRUE)

[1] 1.326992

range(Format_data$Openness, na.rm = TRUE)

[1] 1 5

mod1<- lm(Format_data$Curiosity ~ Format_data$Age)
summary(mod1)


Call:
lm(formula = Format_data$Curiosity ~ Format_data$Age)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.25748 -0.47638 -0.04276  0.47470  1.27458 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)      4.25604    0.46739   9.106 3.72e-06 ***
Format_data$Age -0.03325    0.01423  -2.337   0.0416 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.783 on 10 degrees of freedom
Multiple R-squared:  0.3532,    Adjusted R-squared:  0.2886 
F-statistic: 5.461 on 1 and 10 DF,  p-value: 0.04155

mod2<- lm(Format_data$Curiosity ~ Format_data$Openness)
summary(mod2)


Call:
lm(formula = Format_data$Curiosity ~ Format_data$Openness)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.7929 -0.3432 -0.1004  0.2459  1.2071 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)   
(Intercept)            1.7252     0.4593   3.756  0.00374 **
Format_data$Openness   0.5338     0.1430   3.733  0.00389 **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.6293 on 10 degrees of freedom
Multiple R-squared:  0.5822,    Adjusted R-squared:  0.5405 
F-statistic: 13.94 on 1 and 10 DF,  p-value: 0.003889

mod3<- lm(Format_data$Curiosity ~ Format_data$Age + Format_data$Openness)
summary(mod3)


Call:
lm(formula = Format_data$Curiosity ~ Format_data$Age + Format_data$Openness)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.71145 -0.35784  0.05696  0.25264  0.88855 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)   
(Intercept)           2.61271    0.57615   4.535  0.00142 **
Format_data$Age      -0.02198    0.01038  -2.118  0.06323 . 
Format_data$Openness  0.44718    0.12974   3.447  0.00731 **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.5419 on 9 degrees of freedom
Multiple R-squared:  0.7212,    Adjusted R-squared:  0.6593 
F-statistic: 11.64 on 2 and 9 DF,  p-value: 0.003189

plot(Format_data$Curiosity ~Format_data$Age,
     main = "Curiosity vs Age",
     xlab = "Age",
     ylab = "Curiosity",
     pch = 19,
     col = "blue")

abline(mod1, col = "red", lwd = 2)

plot(Format_data$Curiosity ~ Format_data$Openness,
     main = "Curiosity vs Openness",
     xlab = "Openness",
     ylab = "Curiosity",
     pch = 19,
     col = "green")

abline(mod2, col = "red", lwd = 2)

library(psych)
alpha(Format_data[, c("Q1","Q2","Q3","Q4","Q5")])

Warning in alpha(Format_data[, c("Q1", "Q2", "Q3", "Q4", "Q5")]): Some items were negatively correlated with the first principal component and probably 
should be reversed.  
To do this, run the function again with the 'check.keys=TRUE' option

Some items ( Q3 ) were negatively correlated with the first principal component and 
probably should be reversed.  
To do this, run the function again with the 'check.keys=TRUE' option


Reliability analysis   
Call: alpha(x = Format_data[, c("Q1", "Q2", "Q3", "Q4", "Q5")])

  raw_alpha std.alpha G6(smc) average_r S/N  ase mean   sd median_r
      0.69       0.7    0.82      0.32 2.4 0.14  3.3 0.93     0.51

    95% confidence boundaries 
         lower alpha upper
Feldt     0.28  0.69  0.90
Duhachek  0.41  0.69  0.97

 Reliability if an item is dropped:
   raw_alpha std.alpha G6(smc) average_r  S/N alpha se  var.r med.r
Q1      0.59      0.62    0.75      0.29 1.61    0.201 0.1322  0.40
Q2      0.59      0.61    0.78      0.28 1.59    0.202 0.1926  0.41
Q3      0.85      0.86    0.84      0.60 5.91    0.068 0.0074  0.59
Q4      0.51      0.50    0.64      0.20 0.99    0.223 0.2156  0.18
Q5      0.53      0.57    0.72      0.25 1.32    0.227 0.1645  0.41

 Item statistics 
    n raw.r std.r r.cor r.drop mean  sd
Q1 12  0.77  0.74  0.70   0.55  3.2 1.6
Q2 12  0.75  0.75  0.65   0.57  4.2 1.3
Q3 12  0.17  0.19  0.04  -0.14  3.2 1.4
Q4 12  0.88  0.90  0.89   0.80  3.3 1.2
Q5 12  0.82  0.81  0.78   0.66  2.7 1.4

Non missing response frequency for each item
      1    2    3    4    5 miss
Q1 0.25 0.08 0.17 0.25 0.25    0
Q2 0.08 0.08 0.00 0.25 0.58    0
Q3 0.08 0.33 0.17 0.17 0.25    0
Q4 0.00 0.33 0.17 0.33 0.17    0
Q5 0.25 0.25 0.25 0.08 0.17    0

alpha(Format_data[, c("Q1A","Q2A","Q3A","Q4A","Q5A")])


Reliability analysis   
Call: alpha(x = Format_data[, c("Q1A", "Q2A", "Q3A", "Q4A", "Q5A")])

  raw_alpha std.alpha G6(smc) average_r S/N  ase mean  sd median_r
      0.72      0.73    0.76      0.35 2.7 0.13    3 1.3     0.41

    95% confidence boundaries 
         lower alpha upper
Feldt     0.36  0.72  0.91
Duhachek  0.47  0.72  0.97

 Reliability if an item is dropped:
    raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
Q1A      0.70      0.70    0.72      0.37 2.3    0.144 0.0587  0.45
Q2A      0.63      0.64    0.68      0.31 1.8    0.175 0.0679  0.39
Q3A      0.80      0.80    0.77      0.50 4.0    0.098 0.0099  0.50
Q4A      0.58      0.58    0.59      0.26 1.4    0.198 0.0757  0.25
Q5A      0.62      0.63    0.66      0.30 1.7    0.182 0.0683  0.40

 Item statistics 
     n raw.r std.r r.cor r.drop mean  sd
Q1A 10  0.68  0.65  0.54   0.43  3.1 2.1
Q2A 10  0.72  0.76  0.68   0.59  4.1 1.7
Q3A 12  0.51  0.43  0.24   0.14  2.8 1.9
Q4A 10  0.82  0.85  0.83   0.70  3.3 1.9
Q5A 11  0.77  0.77  0.72   0.62  2.4 1.7

Non missing response frequency for each item
       1    2    4    5   6 miss
Q1A 0.40 0.10 0.10 0.30 0.1 0.17
Q2A 0.10 0.20 0.00 0.60 0.1 0.17
Q3A 0.42 0.17 0.08 0.33 0.0 0.00
Q4A 0.20 0.30 0.10 0.30 0.1 0.17
Q5A 0.55 0.09 0.18 0.18 0.0 0.08