#load library

library(readr)
library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──

## ✓ ggplot2 3.3.2     ✓ dplyr   1.0.2
## ✓ tibble  3.0.4     ✓ stringr 1.4.0
## ✓ tidyr   1.1.2     ✓ forcats 0.5.0
## ✓ purrr   0.3.4

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

getwd()

## [1] "/Users/krise/R Mac working folder/Controllable Meta-mindset data"

#load data

d.study <- read_csv("~/R Mac working folder/Controllable Meta-mindset data/raw data files - controllable meta-mindset/CMM_pilot_study_1_December 8, 2020_11.16_raw.csv")

## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_character()
## )
## ℹ Use `spec()` for the full column specifications.

#drop 2nd row

glimpse(d.study)

## Rows: 51
## Columns: 45
## $ StartDate               <chr> "Start Date", "{\"ImportId\":\"startDate\",\"…
## $ EndDate                 <chr> "End Date", "{\"ImportId\":\"endDate\",\"time…
## $ Status                  <chr> "Response Type", "{\"ImportId\":\"status\"}",…
## $ IPAddress               <chr> "IP Address", "{\"ImportId\":\"ipAddress\"}",…
## $ Progress                <chr> "Progress", "{\"ImportId\":\"progress\"}", "1…
## $ `Duration (in seconds)` <chr> "Duration (in seconds)", "{\"ImportId\":\"dur…
## $ Finished                <chr> "Finished", "{\"ImportId\":\"finished\"}", "1…
## $ RecordedDate            <chr> "Recorded Date", "{\"ImportId\":\"recordedDat…
## $ ResponseId              <chr> "Response ID", "{\"ImportId\":\"_recordId\"}"…
## $ RecipientLastName       <chr> "Recipient Last Name", "{\"ImportId\":\"recip…
## $ RecipientFirstName      <chr> "Recipient First Name", "{\"ImportId\":\"reci…
## $ RecipientEmail          <chr> "Recipient Email", "{\"ImportId\":\"recipient…
## $ ExternalReference       <chr> "External Data Reference", "{\"ImportId\":\"e…
## $ LocationLatitude        <chr> "Location Latitude", "{\"ImportId\":\"locatio…
## $ LocationLongitude       <chr> "Location Longitude", "{\"ImportId\":\"locati…
## $ DistributionChannel     <chr> "Distribution Channel", "{\"ImportId\":\"dist…
## $ UserLanguage            <chr> "User Language", "{\"ImportId\":\"userLanguag…
## $ cons                    <chr> "DESCRIPTION: The purpose of this research st…
## $ cmm_L_1                 <chr> "The statements below describe your beliefs a…
## $ cmm_L_2                 <chr> "The statements below describe your beliefs a…
## $ cmm_L_3                 <chr> "The statements below describe your beliefs a…
## $ cmm_L_4                 <chr> "The statements below describe your beliefs a…
## $ cmm_s_1                 <chr> "The statements below describe your beliefs a…
## $ cmm_s_2                 <chr> "The statements below describe your beliefs a…
## $ cmm_s_3                 <chr> "The statements below describe your beliefs a…
## $ cmm_s_4                 <chr> "The statements below describe your beliefs a…
## $ qq_1                    <chr> "In your own words, what is a mindset?", "{\"…
## $ qq_2                    <chr> "Can you give an example of some of the minds…
## $ qq_3                    <chr> "Can you give an example of a time when a min…
## $ qq_4                    <chr> "Describe a time that you did or didn’t chang…
## $ qq_5                    <chr> "If you wanted to change your mindset, what m…
## $ qq_6                    <chr> "Please tell us some other mindsets you think…
## $ dem_age                 <chr> "What is your age? (number only)", "{\"Import…
## $ dem_race                <chr> "Which race/ethnicity best describes you? Sel…
## $ dem_race_6_TEXT         <chr> "Which race/ethnicity best describes you? Sel…
## $ dem_gender              <chr> "What is your gender? - Selected Choice", "{\…
## $ dem_gender_5_TEXT       <chr> "What is your gender? - Other (please specify…
## $ dem_inc                 <chr> "What is your approximate combined annual hou…
## $ dem_ed                  <chr> "What is the highest education level you have…
## $ dem_emp                 <chr> "Employment\nStatus: Are you currently…?", "{…
## $ debrief_else            <chr> "Is there anything else you want to tell us?"…
## $ Q_TotalDuration         <chr> "Q_TotalDuration", "{\"ImportId\":\"Q_TotalDu…
## $ workerId                <chr> "workerId", "{\"ImportId\":\"workerId\"}", NA…
## $ assignmentId            <chr> "assignmentId", "{\"ImportId\":\"assignmentId…
## $ hitId                   <chr> "hitId", "{\"ImportId\":\"hitId\"}", NA, NA, …

head(d.study)

## # A tibble: 6 x 45
##   StartDate EndDate Status IPAddress Progress `Duration (in s… Finished
##   <chr>     <chr>   <chr>  <chr>     <chr>    <chr>            <chr>   
## 1 "Start D… "End D… "Resp… "IP Addr… "Progre… "Duration (in s… "Finish…
## 2 "{\"Impo… "{\"Im… "{\"I… "{\"Impo… "{\"Imp… "{\"ImportId\":… "{\"Imp…
## 3 "12/8/20… "12/8/… "0"    "165.225… "100"    "275"            "1"     
## 4 "12/8/20… "12/8/… "0"    "82.132.… "100"    "361"            "1"     
## 5 "12/8/20… "12/8/… "0"    "122.56.… "100"    "449"            "1"     
## 6 "12/8/20… "12/8/… "0"    "75.85.5… "100"    "566"            "1"     
## # … with 38 more variables: RecordedDate <chr>, ResponseId <chr>,
## #   RecipientLastName <chr>, RecipientFirstName <chr>, RecipientEmail <chr>,
## #   ExternalReference <chr>, LocationLatitude <chr>, LocationLongitude <chr>,
## #   DistributionChannel <chr>, UserLanguage <chr>, cons <chr>, cmm_L_1 <chr>,
## #   cmm_L_2 <chr>, cmm_L_3 <chr>, cmm_L_4 <chr>, cmm_s_1 <chr>, cmm_s_2 <chr>,
## #   cmm_s_3 <chr>, cmm_s_4 <chr>, qq_1 <chr>, qq_2 <chr>, qq_3 <chr>,
## #   qq_4 <chr>, qq_5 <chr>, qq_6 <chr>, dem_age <chr>, dem_race <chr>,
## #   dem_race_6_TEXT <chr>, dem_gender <chr>, dem_gender_5_TEXT <chr>,
## #   dem_inc <chr>, dem_ed <chr>, dem_emp <chr>, debrief_else <chr>,
## #   Q_TotalDuration <chr>, workerId <chr>, assignmentId <chr>, hitId <chr>

c.study <- (d.study[ -c(1, 2), ])

#keep all those that were finished, didnt drop anyone 
#select key variables
cmm.study <- c.study %>% filter(Finished == "1") %>% select(cmm_L_1, cmm_L_2, cmm_L_3, cmm_L_4, cmm_s_1, cmm_s_2, cmm_s_3, cmm_s_4, qq_1, qq_2, qq_3, qq_4, qq_5, qq_6, dem_age, dem_race, dem_race_6_TEXT, dem_gender, dem_gender_5_TEXT, dem_inc, dem_ed, dem_emp, debrief_else)

#In this questionnaire you will be shown a number of statements regarding mindsets. Please indicate your agreement with each statement by selecting the number that best represents your answer on the scale presented below the statement.

1. To be honest, you can’t really change your mindsets. (R)

2. As much as I hate to admit it, you can’t teach an old dog new tricks. You can’t really change your mindsets about things in the world. (R)

3. Even in moments when it really matters, I can’t do much to change my mindset. (R)

4. How much I can control my mindset is something about me that I can’t change very much. (R)

Scale: Strongly disagree (1), Disagree (2), Somewhat disagree (3), Somewhat agree (4), Agree (5), Strongly Agree (6)

Scoring: Reverse score items then, compute the mean score of all the items. A higher score will indicate greater agreement that one’s mindset is controllable and changeable.

#add id number

cmm.study <- cmm.study %>% mutate(id = row_number())

#split into two data frames for short and long instructions
#tidy data so only filter all those with no response to q1


cmm.study.long <- cmm.study %>% filter(cmm_L_1 !="NA") %>% select(-cmm_s_1, -cmm_s_2, -cmm_s_3, -cmm_s_4)

#25 responses

cmm.study.short <- cmm.study %>% filter(cmm_s_1 !="NA") %>% select(-cmm_L_1, -cmm_L_2, -cmm_L_3, -cmm_L_4)

#24 responses

#turn the necessary variables into numeric because they are characters

cmm.study.long$cmm_L_1 <- as.numeric(cmm.study.long$cmm_L_1)
cmm.study.long$cmm_L_2 <- as.numeric(cmm.study.long$cmm_L_2)
cmm.study.long$cmm_L_3 <- as.numeric(cmm.study.long$cmm_L_3)
cmm.study.long$cmm_L_4 <- as.numeric(cmm.study.long$cmm_L_4)

cmm.study.short$cmm_s_1 <- as.numeric(cmm.study.short$cmm_s_1)
cmm.study.short$cmm_s_2 <- as.numeric(cmm.study.short$cmm_s_2)
cmm.study.short$cmm_s_3 <- as.numeric(cmm.study.short$cmm_s_3)
cmm.study.short$cmm_s_4 <- as.numeric(cmm.study.short$cmm_s_4)

#create CMM item score for long and short instructions 

cmm.long.df <- cmm.study.long %>% mutate(cmm_Long_score = (cmm_L_1 + cmm_L_2 + cmm_L_3 + cmm_L_4)/4)


cmm.short.df <- cmm.study.short %>% mutate(cmm_Short_score = (cmm_s_1 + cmm_s_2 +
                                          cmm_s_3 +
                                          cmm_s_4)/4)

#mean of the two scores long = 2.99, short = 2.60
#mean of the long scores are .39 higher, suggest may be higher score responses on longer, will look at significant difference between the groups

mean(cmm.long.df$cmm_Long_score)

## [1] 2.99

mean(cmm.short.df$cmm_Short_score)

## [1] 2.604167

#summary
summary(cmm.long.df$cmm_Long_score)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    2.00    3.00    2.99    4.00    5.00

summary(cmm.short.df$cmm_Short_score)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.500   2.000   2.500   2.604   3.000   4.000

sd(cmm.long.df$cmm_Long_score) #sd long responses do have larder sd.3

## [1] 1.093256

sd(cmm.short.df$cmm_Short_score) #sd

## [1] 0.7294901

#in cmm pilot(n=324) mean = 3.16, sd=1.50

#view histogram of response cores for long adn short df - doesn't seem normally distributed with the obderved sample and responses
hist(cmm.long.df$cmm_Long_score)

hist(cmm.short.df$cmm_Short_score)

boxplot(cmm.long.df$cmm_Long_score)

boxplot(cmm.short.df$cmm_Short_score)

#no outliers only 224 and 25 obs each variable

#export the data score and open ended question responses

xcl.short.df <- cmm.long.df %>% select(id, cmm_Long_score, qq_1, qq_2, qq_3, qq_4, qq_5, qq_6)
xcl.long.df <- cmm.short.df %>% select(id, cmm_Short_score, qq_1, qq_2, qq_3, qq_4, qq_5, qq_6)

write.csv(xcl.long.df,'cmm.pilot3.long.csv')
write.csv(xcl.short.df,'cmm.pilot3.short.csv')

#Analyses and tests

#do the populations have the same variances? -- reject the null so the two samples do not have the same variance
cmm.ftest <- var.test(cmm.long.df$cmm_Long_score, cmm.short.df$cmm_Short_score)
cmm.ftest

## 
##  F test to compare two variances
## 
## data:  cmm.long.df$cmm_Long_score and cmm.short.df$cmm_Short_score
## F = 2.246, num df = 24, denom df = 23, p-value = 0.05657
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.9769749 5.1254711
## sample estimates:
## ratio of variances 
##           2.245974

# is data normally distributed? - cannot reject the null, data seem to be normally distributed

shapiro.test(cmm.long.df$cmm_Long_score) #shapiro test- [value-0.06] > .05 - we assume normal distribution - long version seems normally distributed

## 
##  Shapiro-Wilk normality test
## 
## data:  cmm.long.df$cmm_Long_score
## W = 0.925, p-value = 0.06668

shapiro.test(cmm.short.df$cmm_Short_score) #pvalue = .305 same as above

## 
##  Shapiro-Wilk normality test
## 
## data:  cmm.short.df$cmm_Short_score
## W = 0.95241, p-value = 0.3053

#could try to run more samples at this response and do analyses?

#is there a sig diff between the mean scores?
#Run a paired T-test of mean scores, reject null, the true difference in means is not = 0, it is positive meaning significantly higher at post

#independent 2 group t test? treat treatedthem as independet groups because two groups took different measures, using welch two sample t test -

cmm.ttest <- t.test(cmm.long.df$cmm_Long_score, cmm.short.df$cmm_Short_score, var.equal = FALSE) #variance of two groups is not equal

#p=.15 so we cannot reject the null, meaning there is not a sig diff in means between groups
#could create a new variablelike inst.length = Long or Short as a categorical variable with two levels but can just use the scores of the two group means like above

cmm.ttest

## 
##  Welch Two Sample t-test
## 
## data:  cmm.long.df$cmm_Long_score and cmm.short.df$cmm_Short_score
## t = 1.4585, df = 41.998, p-value = 0.1521
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.1480311  0.9196978
## sample estimates:
## mean of x mean of y 
##  2.990000  2.604167

#subset just the CMM item variables
cmm.long.scale.df <- cmm.long.df %>% select(cmm_L_1, cmm_L_2, cmm_L_3, cmm_L_4)

#subset the short scale
cmm.short.scale.df <- cmm.short.df %>% select(cmm_s_1, cmm_s_2, cmm_s_3, cmm_s_4)

library(psych)

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

#check alpha and reliability analysis
alpha(cmm.long.scale.df) #long scale has higher alpha and reliability .92 > .82

## 
## Reliability analysis   
## Call: alpha(x = cmm.long.scale.df)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean  sd median_r
##       0.93      0.93    0.93      0.76  13 0.025    3 1.1     0.78
## 
##  lower alpha upper     95% confidence boundaries
## 0.88 0.93 0.98 
## 
##  Reliability if an item is dropped:
##         raw_alpha std.alpha G6(smc) average_r  S/N alpha se   var.r med.r
## cmm_L_1      0.88      0.88    0.87      0.71  7.4    0.042 0.02315  0.67
## cmm_L_2      0.96      0.96    0.95      0.90 27.5    0.013 0.00073  0.89
## cmm_L_3      0.88      0.88    0.87      0.72  7.6    0.042 0.02477  0.67
## cmm_L_4      0.88      0.88    0.87      0.70  7.1    0.047 0.03978  0.59
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean  sd
## cmm_L_1 25  0.94  0.94  0.95   0.90  3.0 1.2
## cmm_L_2 25  0.78  0.79  0.66   0.64  2.6 1.2
## cmm_L_3 25  0.94  0.94  0.94   0.89  3.2 1.2
## cmm_L_4 25  0.95  0.95  0.94   0.91  3.2 1.3
## 
## Non missing response frequency for each item
##            1    2    3    4    5    6 miss
## cmm_L_1 0.08 0.36 0.16 0.32 0.08 0.00    0
## cmm_L_2 0.20 0.28 0.24 0.24 0.04 0.00    0
## cmm_L_3 0.08 0.28 0.08 0.48 0.08 0.00    0
## cmm_L_4 0.04 0.40 0.08 0.36 0.08 0.04    0

alpha(cmm.short.scale.df)

## 
## Reliability analysis   
## Call: alpha(x = cmm.short.scale.df)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##       0.82      0.84    0.85      0.56 5.1 0.061  2.6 0.73     0.61
## 
##  lower alpha upper     95% confidence boundaries
## 0.7 0.82 0.94 
## 
##  Reliability if an item is dropped:
##         raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## cmm_s_1      0.67      0.69    0.65      0.43 2.3    0.122 0.038  0.37
## cmm_s_2      0.88      0.88    0.84      0.72 7.6    0.040 0.004  0.73
## cmm_s_3      0.76      0.77    0.78      0.53 3.4    0.089 0.053  0.58
## cmm_s_4      0.79      0.80    0.79      0.57 4.0    0.078 0.042  0.58
## 
##  Item statistics 
##          n raw.r std.r r.cor r.drop mean   sd
## cmm_s_1 24  0.94  0.94  0.95   0.87  2.3 0.91
## cmm_s_2 24  0.72  0.68  0.53   0.46  2.5 1.06
## cmm_s_3 24  0.83  0.85  0.80   0.70  2.7 0.81
## cmm_s_4 24  0.78  0.81  0.74   0.63  3.0 0.81
## 
## Non missing response frequency for each item
##            1    2    3    4 miss
## cmm_s_1 0.17 0.50 0.21 0.12    0
## cmm_s_2 0.21 0.33 0.25 0.21    0
## cmm_s_3 0.08 0.25 0.54 0.12    0
## cmm_s_4 0.04 0.21 0.50 0.25    0

#suggest more stable using long measure? higher response scores as well

#EFA and CFA long


EFA.model.CMM.long <- fa(cmm.long.scale.df)


EFA.model.CMM.long$loadings

## 
## Loadings:
##         MR1  
## cmm_L_1 0.952
## cmm_L_2 0.647
## cmm_L_3 0.941
## cmm_L_4 0.955
## 
##                  MR1
## SS loadings    3.124
## Proportion Var 0.781

#factor loadings
fa.diagram(EFA.model.CMM.long)

#this function shows individuals scores on factor
EFA.model.CMM.long$scores

##               MR1
##  [1,]  1.92995775
##  [2,] -0.91797359
##  [3,] -0.90054464
##  [4,]  1.19694616
##  [5,] -0.93540253
##  [6,]  0.08811625
##  [7,]  0.50690888
##  [8,]  0.74211976
##  [9,]  0.75954870
## [10,] -0.90054464
## [11,]  0.75954870
## [12,] -1.40765788
## [13,]  0.75954870
## [14,]  0.75954870
## [15,] -0.71578700
## [16,]  0.72469081
## [17,] -0.69835806
## [18,]  0.10554519
## [19,] -0.91797359
## [20,]  0.74211976
## [21,] -0.93540253
## [22,] -1.75673473
## [23,] -0.91797359
## [24,]  1.10862555
## [25,]  0.82112784

summary(EFA.model.CMM.long$scores)

##       MR1         
##  Min.   :-1.7567  
##  1st Qu.:-0.9180  
##  Median : 0.1055  
##  Mean   : 0.0000  
##  3rd Qu.: 0.7595  
##  Max.   : 1.9300

#feel for distribution of factor scores
summary(cmm.long.df)

##     cmm_L_1        cmm_L_2        cmm_L_3       cmm_L_4         qq_1          
##  Min.   :1.00   Min.   :1.00   Min.   :1.0   Min.   :1.00   Length:25         
##  1st Qu.:2.00   1st Qu.:2.00   1st Qu.:2.0   1st Qu.:2.00   Class :character  
##  Median :3.00   Median :3.00   Median :4.0   Median :3.00   Mode  :character  
##  Mean   :2.96   Mean   :2.64   Mean   :3.2   Mean   :3.16                     
##  3rd Qu.:4.00   3rd Qu.:4.00   3rd Qu.:4.0   3rd Qu.:4.00                     
##  Max.   :5.00   Max.   :5.00   Max.   :5.0   Max.   :6.00                     
##      qq_2               qq_3               qq_4               qq_5          
##  Length:25          Length:25          Length:25          Length:25         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##      qq_6             dem_age            dem_race         dem_race_6_TEXT   
##  Length:25          Length:25          Length:25          Length:25         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##   dem_gender        dem_gender_5_TEXT    dem_inc             dem_ed         
##  Length:25          Length:25          Length:25          Length:25         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##    dem_emp          debrief_else             id        cmm_Long_score
##  Length:25          Length:25          Min.   : 2.00   Min.   :1.00  
##  Class :character   Class :character   1st Qu.:13.00   1st Qu.:2.00  
##  Mode  :character   Mode  :character   Median :23.00   Median :3.00  
##                                        Mean   :23.56   Mean   :2.99  
##                                        3rd Qu.:35.00   3rd Qu.:4.00  
##                                        Max.   :46.00   Max.   :5.00

plot(density(EFA.model.CMM.long$scores))

describe(cmm.long.df$cmm_Long_score)

##    vars  n mean   sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 25 2.99 1.09      3    3.01 1.48   1   5     4 -0.11    -1.33 0.22

#we see 1 factor also suggest the long scale explains more variance need to check the scale reliability parameters
#also more distributed between scores but more evenly with the short

#EFA and CFA short


EFA.model.CMM.short <- fa(cmm.short.scale.df)

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.

## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully

EFA.model.CMM.short$loadings

## 
## Loadings:
##         MR1  
## cmm_s_1 1.003
## cmm_s_2 0.491
## cmm_s_3 0.802
## cmm_s_4 0.734
## 
##                  MR1
## SS loadings    2.431
## Proportion Var 0.608

#factor loadings
fa.diagram(EFA.model.CMM.short)

#this function shows individuals scores on factor
EFA.model.CMM.short$scores

##              MR1
##  [1,]  0.6040182
##  [2,] -1.5183861
##  [3,]  2.1031875
##  [4,] -0.1475880
##  [5,] -1.2811563
##  [6,]  1.8485461
##  [7,]  0.7873439
##  [8,]  0.7873439
##  [9,] -0.5295501
## [10,]  1.8485461
## [11,] -0.2749087
## [12,] -0.2738582
## [13,]  0.7873439
## [14,] -0.2749087
## [15,] -0.5284996
## [16,] -0.4022294
## [17,] -1.4470704
## [18,]  0.7313389
## [19,] -0.4571839
## [20,] -0.2749087
## [21,] -1.4074265
## [22,] -0.2025425
## [23,] -0.2035930
## [24,] -0.2738582

summary(EFA.model.CMM.short$scores)

##       MR1         
##  Min.   :-1.5184  
##  1st Qu.:-0.4750  
##  Median :-0.2739  
##  Mean   : 0.0000  
##  3rd Qu.: 0.7453  
##  Max.   : 2.1032

#feel for distribution of factor scores
summary(cmm.short.df)

##     cmm_s_1         cmm_s_2         cmm_s_3         cmm_s_4     
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.750  
##  Median :2.000   Median :2.000   Median :3.000   Median :3.000  
##  Mean   :2.292   Mean   :2.458   Mean   :2.708   Mean   :2.958  
##  3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:3.250  
##  Max.   :4.000   Max.   :4.000   Max.   :4.000   Max.   :4.000  
##      qq_1               qq_2               qq_3               qq_4          
##  Length:24          Length:24          Length:24          Length:24         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##      qq_5               qq_6             dem_age            dem_race        
##  Length:24          Length:24          Length:24          Length:24         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  dem_race_6_TEXT     dem_gender        dem_gender_5_TEXT    dem_inc         
##  Length:24          Length:24          Length:24          Length:24         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##     dem_ed            dem_emp          debrief_else             id       
##  Length:24          Length:24          Length:24          Min.   : 1.00  
##  Class :character   Class :character   Class :character   1st Qu.:14.25  
##  Mode  :character   Mode  :character   Mode  :character   Median :26.50  
##                                                           Mean   :26.50  
##                                                           3rd Qu.:41.50  
##                                                           Max.   :49.00  
##  cmm_Short_score
##  Min.   :1.500  
##  1st Qu.:2.000  
##  Median :2.500  
##  Mean   :2.604  
##  3rd Qu.:3.000  
##  Max.   :4.000

plot(density(EFA.model.CMM.short$scores))

describe(cmm.short.df$cmm_Short_score)

##    vars  n mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 24  2.6 0.73    2.5    2.58 0.74 1.5   4   2.5 0.22    -0.86 0.15

#we see 1 factor

#check out the open ended responses, text mining bag of words?

view(xcl.long.df)
xcl.long.df

## # A tibble: 24 x 8
##       id cmm_Short_score qq_1      qq_2      qq_3     qq_4     qq_5     qq_6    
##    <int>           <dbl> <chr>     <chr>     <chr>    <chr>    <chr>    <chr>   
##  1     1            3.5  It is th… I though… "Going … "At wor… My mind… "Fixed,…
##  2     6            1.5  The way … Mindset … "When a… "I chan… I would… "I am n…
##  3     7            3.5  your tho… mind set… "mindse… "I can'… changin… "genera…
##  4     8            2.25 Mindset … mindsets… "workin… "I foug… I would… "Mindse…
##  5    11            1.75 A mindse… Abortion… "On the… "The to… Not hav… "Having…
##  6    12            4    It is ba… stance o… "I do n… "My min… There i… "honest…
##  7    15            3    The set … Depresse… "Having… "I was … I would… "Happy/…
##  8    16            3    Your tho… Circumst… "I have… "I had … Think r… "I can’…
##  9    19            3    Someones… Changing… "When I… "I was … Stubbor… "selfis…
## 10    20            4    Your att… Motivati… "I felt… "I need… Again i… "A posi…
## # … with 14 more rows

view(xcl.short.df)

cmm.pilot3.cleaning