See the Clean rows.

Load Packages

library(tidyverse) # for data munging

## ── Attaching packages ────────

## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0

## ── Conflicts ─────────────────
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(knitr) 
library(rstatix)

## 
## Attaching package: 'rstatix'

## The following object is masked from 'package:stats':
## 
##     filter

library(ggpubr)

Load data

getwd()

## [1] "/Users/JosephOuta/Desktop/GitHub"

d <- read_csv("AgencyProj/agendata.csv")

## Warning: Duplicated column names deduplicated: 'Ethnic' => 'Ethnic_1' [96],
## 'Politics' => 'Politics_1' [99]

## Parsed with column specification:
## cols(
##   .default = col_character()
## )

## See spec(...) for full column specifications.

Explore data

head(d)

## # A tibble: 6 x 106
##   StartDate EndDate Status IPAddress Progress `Duration (in s… Finished
##   <chr>     <chr>   <chr>  <chr>     <chr>    <chr>            <chr>   
## 1 "Start D… "End D… "Resp… "IP Addr… "Progre… "Duration (in s… "Finish…
## 2 "{\"Impo… "{\"Im… "{\"I… "{\"Impo… "{\"Imp… "{\"ImportId\":… "{\"Imp…
## 3 "9/13/20… "9/13/… "1"    "*******" "100"    "672"            "1"     
## 4 "9/15/20… "9/15/… "1"    "*******" "100"    "74"             "1"     
## 5 "9/15/20… "9/15/… "1"    "*******" "100"    "38"             "1"     
## 6 "9/15/20… "9/15/… "1"    "*******" "100"    "43"             "1"     
## # … with 99 more variables: RecordedDate <chr>, ResponseId <chr>,
## #   RecipientLastName <chr>, RecipientFirstName <chr>, RecipientEmail <chr>,
## #   ExternalReference <chr>, LocationLatitude <chr>, LocationLongitude <chr>,
## #   DistributionChannel <chr>, UserLanguage <chr>, Consent <chr>,
## #   `timing_intro_First Click` <chr>, `timing_intro_Last Click` <chr>,
## #   `timing_intro_Page Submit` <chr>, `timing_intro_Click Count` <chr>,
## #   Agency2 <chr>, Sab1_1 <chr>, Sab1_2 <chr>, Sab1_3 <chr>, Sab1_4 <chr>,
## #   Sab1_5 <chr>, Sab1b_1 <chr>, Sab1b_2 <chr>, Sab1b_3 <chr>, Sab1b_4 <chr>,
## #   Sab1b_5 <chr>, Sab2_1 <chr>, Sab2_2 <chr>, Sab2_3 <chr>, Sab2_4 <chr>,
## #   Sab2_5 <chr>, Sab2b_1 <chr>, Sab2b_2 <chr>, Sab2b_3 <chr>, Sab2b_4 <chr>,
## #   Sab2b_5 <chr>, Sab3_1 <chr>, Sab3_2 <chr>, Sab3_3 <chr>, Sab3_4 <chr>,
## #   Sab3_5 <chr>, Sab3b_1 <chr>, Sab3b_2 <chr>, Sab3b_3 <chr>, Sab3b_4 <chr>,
## #   Sab3b_5 <chr>, EC <chr>, NonA2 <chr>, Sab1NA_1 <chr>, Sab1NA_2 <chr>,
## #   Sab1NA_3 <chr>, Sab1NA_4 <chr>, Sab1NA_5 <chr>, Sab1NAb_1 <chr>,
## #   Sab1NAb_2 <chr>, Sab1NAb_3 <chr>, Sab1NAb_4 <chr>, Sab1NAb_5 <chr>,
## #   Sab2NA_1 <chr>, Sab2NA_2 <chr>, Sab2NA_3 <chr>, Sab2NA_4 <chr>,
## #   Sab2NA_5 <chr>, Sab2NAb_1 <chr>, Sab2NAb_2 <chr>, Sab2NAb_3 <chr>,
## #   Sab2NAb_4 <chr>, Sab2NAb_5 <chr>, Sab3NA_1 <chr>, Sab3NA_2 <chr>,
## #   Sab3NA_3 <chr>, Sab3NA_4 <chr>, Sab3NA_5 <chr>, Sab3NAb_1 <chr>,
## #   Sab3NAb_2 <chr>, Sab3NAb_3 <chr>, Sab3NAb_4 <chr>, Sab3NAb_5 <chr>,
## #   Q51 <chr>, `Timing5sec_First Click` <chr>, `Timing5sec_Last Click` <chr>,
## #   `Timing5sec_Page Submit` <chr>, `Timing5sec_Click Count` <chr>,
## #   `Debrief+attend` <chr>, Edu <chr>, Age_1 <chr>, Gender <chr>, Ethnic <chr>,
## #   Ethnic_1 <chr>, Income <chr>, Politics <chr>, Politics_1 <chr>,
## #   `Timing_First Click` <chr>, `Timing_Last Click` <chr>, `Timing_Page
## #   Submit` <chr>, `Timing_Click Count` <chr>, Q81 <chr>, Q82 <chr>,
## #   PROLIFIC_PID <chr>

colnames(d)

##   [1] "StartDate"                "EndDate"                 
##   [3] "Status"                   "IPAddress"               
##   [5] "Progress"                 "Duration (in seconds)"   
##   [7] "Finished"                 "RecordedDate"            
##   [9] "ResponseId"               "RecipientLastName"       
##  [11] "RecipientFirstName"       "RecipientEmail"          
##  [13] "ExternalReference"        "LocationLatitude"        
##  [15] "LocationLongitude"        "DistributionChannel"     
##  [17] "UserLanguage"             "Consent"                 
##  [19] "timing_intro_First Click" "timing_intro_Last Click" 
##  [21] "timing_intro_Page Submit" "timing_intro_Click Count"
##  [23] "Agency2"                  "Sab1_1"                  
##  [25] "Sab1_2"                   "Sab1_3"                  
##  [27] "Sab1_4"                   "Sab1_5"                  
##  [29] "Sab1b_1"                  "Sab1b_2"                 
##  [31] "Sab1b_3"                  "Sab1b_4"                 
##  [33] "Sab1b_5"                  "Sab2_1"                  
##  [35] "Sab2_2"                   "Sab2_3"                  
##  [37] "Sab2_4"                   "Sab2_5"                  
##  [39] "Sab2b_1"                  "Sab2b_2"                 
##  [41] "Sab2b_3"                  "Sab2b_4"                 
##  [43] "Sab2b_5"                  "Sab3_1"                  
##  [45] "Sab3_2"                   "Sab3_3"                  
##  [47] "Sab3_4"                   "Sab3_5"                  
##  [49] "Sab3b_1"                  "Sab3b_2"                 
##  [51] "Sab3b_3"                  "Sab3b_4"                 
##  [53] "Sab3b_5"                  "EC"                      
##  [55] "NonA2"                    "Sab1NA_1"                
##  [57] "Sab1NA_2"                 "Sab1NA_3"                
##  [59] "Sab1NA_4"                 "Sab1NA_5"                
##  [61] "Sab1NAb_1"                "Sab1NAb_2"               
##  [63] "Sab1NAb_3"                "Sab1NAb_4"               
##  [65] "Sab1NAb_5"                "Sab2NA_1"                
##  [67] "Sab2NA_2"                 "Sab2NA_3"                
##  [69] "Sab2NA_4"                 "Sab2NA_5"                
##  [71] "Sab2NAb_1"                "Sab2NAb_2"               
##  [73] "Sab2NAb_3"                "Sab2NAb_4"               
##  [75] "Sab2NAb_5"                "Sab3NA_1"                
##  [77] "Sab3NA_2"                 "Sab3NA_3"                
##  [79] "Sab3NA_4"                 "Sab3NA_5"                
##  [81] "Sab3NAb_1"                "Sab3NAb_2"               
##  [83] "Sab3NAb_3"                "Sab3NAb_4"               
##  [85] "Sab3NAb_5"                "Q51"                     
##  [87] "Timing5sec_First Click"   "Timing5sec_Last Click"   
##  [89] "Timing5sec_Page Submit"   "Timing5sec_Click Count"  
##  [91] "Debrief+attend"           "Edu"                     
##  [93] "Age_1"                    "Gender"                  
##  [95] "Ethnic"                   "Ethnic_1"                
##  [97] "Income"                   "Politics"                
##  [99] "Politics_1"               "Timing_First Click"      
## [101] "Timing_Last Click"        "Timing_Page Submit"      
## [103] "Timing_Click Count"       "Q81"                     
## [105] "Q82"                      "PROLIFIC_PID"

Clean rows

Filter out Redundant column titles and Preview participants

#Removing irrelevant column titles
rowlength <- length(d$ResponseId) # = 251 rows

d_notitles <- d[3:rowlength,] #remove first 2 rows with title info

d_noprev <- filter(d_notitles, Status==0) #Removing Survey Preview rows

d_cond <- arrange(d_noprev, Agency2) #Arrange data by condition

Clean columns

#what's Q51? 
select(d, Q51) #looks like its the end checks for the non agency condition

## # A tibble: 251 x 1
##    Q51                                                                          
##    <chr>                                                                        
##  1 "How much do you agree:\n\nEthical decisions at work comply with the company…
##  2 "{\"ImportId\":\"QID79\"}"                                                   
##  3  <NA>                                                                        
##  4  <NA>                                                                        
##  5  <NA>                                                                        
##  6  <NA>                                                                        
##  7  <NA>                                                                        
##  8 "6"                                                                          
##  9  <NA>                                                                        
## 10  <NA>                                                                        
## # … with 241 more rows

#rename columns
colnames(d_cond)

##   [1] "StartDate"                "EndDate"                 
##   [3] "Status"                   "IPAddress"               
##   [5] "Progress"                 "Duration (in seconds)"   
##   [7] "Finished"                 "RecordedDate"            
##   [9] "ResponseId"               "RecipientLastName"       
##  [11] "RecipientFirstName"       "RecipientEmail"          
##  [13] "ExternalReference"        "LocationLatitude"        
##  [15] "LocationLongitude"        "DistributionChannel"     
##  [17] "UserLanguage"             "Consent"                 
##  [19] "timing_intro_First Click" "timing_intro_Last Click" 
##  [21] "timing_intro_Page Submit" "timing_intro_Click Count"
##  [23] "Agency2"                  "Sab1_1"                  
##  [25] "Sab1_2"                   "Sab1_3"                  
##  [27] "Sab1_4"                   "Sab1_5"                  
##  [29] "Sab1b_1"                  "Sab1b_2"                 
##  [31] "Sab1b_3"                  "Sab1b_4"                 
##  [33] "Sab1b_5"                  "Sab2_1"                  
##  [35] "Sab2_2"                   "Sab2_3"                  
##  [37] "Sab2_4"                   "Sab2_5"                  
##  [39] "Sab2b_1"                  "Sab2b_2"                 
##  [41] "Sab2b_3"                  "Sab2b_4"                 
##  [43] "Sab2b_5"                  "Sab3_1"                  
##  [45] "Sab3_2"                   "Sab3_3"                  
##  [47] "Sab3_4"                   "Sab3_5"                  
##  [49] "Sab3b_1"                  "Sab3b_2"                 
##  [51] "Sab3b_3"                  "Sab3b_4"                 
##  [53] "Sab3b_5"                  "EC"                      
##  [55] "NonA2"                    "Sab1NA_1"                
##  [57] "Sab1NA_2"                 "Sab1NA_3"                
##  [59] "Sab1NA_4"                 "Sab1NA_5"                
##  [61] "Sab1NAb_1"                "Sab1NAb_2"               
##  [63] "Sab1NAb_3"                "Sab1NAb_4"               
##  [65] "Sab1NAb_5"                "Sab2NA_1"                
##  [67] "Sab2NA_2"                 "Sab2NA_3"                
##  [69] "Sab2NA_4"                 "Sab2NA_5"                
##  [71] "Sab2NAb_1"                "Sab2NAb_2"               
##  [73] "Sab2NAb_3"                "Sab2NAb_4"               
##  [75] "Sab2NAb_5"                "Sab3NA_1"                
##  [77] "Sab3NA_2"                 "Sab3NA_3"                
##  [79] "Sab3NA_4"                 "Sab3NA_5"                
##  [81] "Sab3NAb_1"                "Sab3NAb_2"               
##  [83] "Sab3NAb_3"                "Sab3NAb_4"               
##  [85] "Sab3NAb_5"                "Q51"                     
##  [87] "Timing5sec_First Click"   "Timing5sec_Last Click"   
##  [89] "Timing5sec_Page Submit"   "Timing5sec_Click Count"  
##  [91] "Debrief+attend"           "Edu"                     
##  [93] "Age_1"                    "Gender"                  
##  [95] "Ethnic"                   "Ethnic_1"                
##  [97] "Income"                   "Politics"                
##  [99] "Politics_1"               "Timing_First Click"      
## [101] "Timing_Last Click"        "Timing_Page Submit"      
## [103] "Timing_Click Count"       "Q81"                     
## [105] "Q82"                      "PROLIFIC_PID"

names(d_cond)[86] <- "ECNA" # rename column Q51 to ECNA 
names(d_cond)[91] <- "AttnCheck" #rename "debrief+attend" to attention check

# exclude columns with demographic info -> for analysis
d_tidy <- select(d_cond, ResponseId, 23:86,91)

Exclusions

Exclusion 1: Attention Check 2

Excluding based on wrong responses to attention check at the end (9 exclusions)

length(d_tidy$ResponseId) #241 participants

## [1] 241

select(d_tidy, AttnCheck) #view attention check column

## # A tibble: 241 x 1
##    AttnCheck
##    <chr>    
##  1 5        
##  2 5        
##  3 5        
##  4 5        
##  5 5        
##  6 5        
##  7 5        
##  8 5        
##  9 5        
## 10 5        
## # … with 231 more rows

d_excl1 <- filter(d_tidy, AttnCheck==5) #exclude if didn't answer yes (option 5)

Attncheck2_excl_total <- length(d_tidy$ResponseId) - length(d_excl1$ResponseId) #9 exclusions

Exclusion 2: Attention Check 1 - Agency Condition

Excluding based on failed attention responses in Agency condition (22 exclusions) 1 coder (Joseph Outa)

d_excl2 <- filter(d_excl1, !grepl("5", Agency2),
        # CODER 1 - JOSEPH OUTA
       !grepl("Balancing the Company's Needs and Employee Satisfaction", Agency2), #page 1
       !grepl("Based on the company values or mission statement", Agency2),
       !grepl("By the value", Agency2),
       !grepl("Company decisions should be based on values", Agency2),
       !grepl("Decisions will be all over the place and inconsistent.", Agency2),
       
       !grepl("Employees should make decisions based on values.", Agency2), #page 2 of preview
       
       !grepl("Employees should keep the big picture in mind. Not ever customer will be the same and they must respect them and handle them in a friendly manner.", Agency2), # page 3
       
       !grepl("Focus on customer care and well-being", Agency2), # page 4
       !grepl("It treats the costumer with professionalism", Agency2),
       
       !grepl("Quality and respect", Agency2), #page 5
       !grepl("quickly and confidently", Agency2),
       
       !grepl("should make them respectfully and fairly", Agency2), #page 7
       !grepl("That they should do them with the best interests of the company in mind", Agency2),
       
       !grepl("They respect there policy", Agency2), #page 8
       !grepl("They should make decisions that the company would approve of and they themselves would approve of.", Agency2),
       
       !grepl("They should think about your value as a pets", Agency2), #page 9
       !grepl("They should try to be more friendly towards customers.", Agency2),
       
       !grepl("to serve the customer best", Agency2), #page 10
       !grepl("Treat customers in professional and friendly manner", Agency2),
       !grepl("treat employees well", Agency2),
       !grepl("Treat the customers well.", Agency2),
       
       !grepl("yes", Agency2), #page 11
       
       ## CODER 2 - YUEL LI ADDITIONAL EXCLUSIONS
       !grepl("Allows customers to make company decisions", Agency2),
       !grepl("By working together", Agency2),
       !grepl("Employees of SerVest should be mindful of the different value's their customers might have.", Agency2),
       !grepl("Employees should make company decisions based on the values the company at large has, not their own.", Agency2),
       !grepl("Employees should make company decisions taking into account customer values.", Agency2),
       !grepl("Employees should make company decisions with the customer's best interest in mind. All decisions should benefit the customer first.", Agency2),
       !grepl("employees should make the best decisions for the customers", Agency2),
       !grepl("Employees should work together to make decisions.", Agency2),
       !grepl("Following their guidelines", Agency2),
       !grepl("How to best serve their interests.", Agency2),
       !grepl("I believe that when the company encourages people who do not agree with their beliefs to find another place to be serviced, it gives the employees more power to say who can and cannot shop there.", Agency2),
       !grepl("In a professional and caring manner", Agency2),
       !grepl("Sarvest trusts its employees to make good decisions", Agency2),
       !grepl("Servest relies on employees making good choices.", Agency2),
       !grepl("SerVest understands that company values vary and they trust companies to make their own decisions that reflect those.", Agency2),
       !grepl("That the company has certain values and also employees should also have those values and their own", Agency2),
       !grepl("They respect there policy", Agency2),
       !grepl("They should accomidate the customer's values", Agency2),
       !grepl("They should make decisions that the company would approve of and they themselves would approve of.", Agency2),
       !grepl("They should make decisions with the customers in mind", Agency2),
       !grepl("They should put customer service first.", Agency2),
       !grepl("They should reflect customer values", Agency2),
       !grepl("they take in mind what customers say", Agency2),
       !grepl("to reflect customer's beliefs", Agency2),
       !grepl("trust employee to make decisions that will reflect positively on the company", Agency2),
       !grepl("values reflect the work culture", Agency2),
       !grepl("with consideration of the customer needs", Agency2),
       !grepl("With morals & sincerity. SerVest trusts the employees", Agency2),
       !grepl("with the customer in mind", Agency2),
       !grepl("with the upmost integrity", Agency2)
       )

length(d_excl2$ResponseId) # 210 rows left

## [1] 185

# How many exclusions?
Agency_excl_total <- length(d_excl1$ResponseId) - length(d_excl2$ResponseId) #22 participants

Exclusion 3: Attention Check 1 - Non-Agency condition

Excluding based on failed responses in Non-Agency condition (30 exclusions)

d_excl3 <- d_excl2 %>%
  filter(!grepl("By keeping a good relationship with the customer- honesty and respect.", NonA2), #page 1
         !grepl("Company decisions are basically based in how they treat their cliental.", NonA2),
         !grepl("Decisions should be made in a way that will serve the customers better", NonA2),
         !grepl("Do not understand the question", NonA2),
         
         !grepl("Employees should share their knowledge with others", NonA2),# page 3
         
         !grepl("employess should be involved with company decisions since everyone has different opinions", NonA2), #page 4
         !grepl("Ethical decisions", NonA2),
         !grepl("I cannot remember", NonA2),
         !grepl("im not aware", NonA2),
         !grepl("It denotes professionalism that isn't swayed by the person in charge's own views.", NonA2),
         !grepl("It says that Servest understands there are different personal values and trusts that those values will be reflected by you in the company.", NonA2),
         
         !grepl("iT SHOULD BE REAL", NonA2), #page 5
         !grepl("make them as a good decision", NonA2),
         !grepl("my values might clash with company values", NonA2),
         
         !grepl("Of course, they should think carefully and make a good decision", NonA2), #page 7
         !grepl("Of course, they should think carefully and make a good decision.", NonA2),
         !grepl("ok", NonA2),
         
         !grepl("SerVest does not discriminate.", NonA2), #page 8
         !grepl("should keep customer in mind", NonA2),
         !grepl("Should not use company's beliefs as part of personality", NonA2),
         
         !grepl("They are respectful and pay close attention to detail", NonA2), #page 9
         !grepl("they say you put the emotions of the customer into consideration", NonA2),
         
         !grepl("They should think about the customer more than themselves.", NonA2),#page 10
         
         !grepl("to the best", NonA2), #page 11
         !grepl("Trust themselves to not let their values be swayed", NonA2),
         !grepl("Value your privacy and your opinions.", NonA2),
         !grepl("varies", NonA2),
         
         !grepl("Vary depending on customer values", NonA2), #page 12
         !grepl("with responsibility", NonA2),
         !grepl("You should decide what to do", NonA2),
         
         # CODER 2 - YUE LI ADDITIONAL EXCLUSIONS
         !grepl("Employees Should be able to make their own decisions", Agency2),
         !grepl("information technogy company", Agency2),
         !grepl("It's Information Technogy", Agency2),
         !grepl("Like many companies, SerVest is committed to making sure customers are treated in a professional, friendly, and patient manner.", Agency2),
         !grepl("my values might conflict with the company's", Agency2)
  )

length(d_excl3$ResponseId) # 180 rows left

## [1] 155

# How many exclusions?
NonAgency_excl_total <- length(d_excl2$ResponseId) - length(d_excl3$ResponseId) #30 exclusions

Exclusion 4: Blank Responses + Computing Total Exclusions

Excluding people who left attention check blank in both Agency2 and NonA2

#first check who left blank
d_filtertest_3 <- d_excl3 %>%
  filter(is.na(Agency2)) %>% #check what rows are NA in Agency...
  filter(is.na(NonA2)) #...that are also NA in Non Agency
d_filtertest_3 #these are 6 participants in total

## # A tibble: 5 x 66
##   ResponseId Agency2 Sab1_1 Sab1_2 Sab1_3 Sab1_4 Sab1_5 Sab1b_1 Sab1b_2 Sab1b_3
##   <chr>      <chr>   <chr>  <chr>  <chr>  <chr>  <chr>  <chr>   <chr>   <chr>  
## 1 R_RP0Xu1l… <NA>    <NA>   <NA>   <NA>   <NA>   <NA>   <NA>    <NA>    <NA>   
## 2 R_2uyeCzC… <NA>    1      <NA>   1      1      <NA>   1       <NA>    <NA>   
## 3 R_UzNZ8Vg… <NA>    <NA>   <NA>   <NA>   <NA>   <NA>   <NA>    <NA>    <NA>   
## 4 R_3ixmIni… <NA>    <NA>   1      1      1      <NA>   <NA>    1       1      
## 5 R_24BFpyv… <NA>    <NA>   1      <NA>   <NA>   <NA>   <NA>    <NA>    <NA>   
## # … with 56 more variables: Sab1b_4 <chr>, Sab1b_5 <chr>, Sab2_1 <chr>,
## #   Sab2_2 <chr>, Sab2_3 <chr>, Sab2_4 <chr>, Sab2_5 <chr>, Sab2b_1 <chr>,
## #   Sab2b_2 <chr>, Sab2b_3 <chr>, Sab2b_4 <chr>, Sab2b_5 <chr>, Sab3_1 <chr>,
## #   Sab3_2 <chr>, Sab3_3 <chr>, Sab3_4 <chr>, Sab3_5 <chr>, Sab3b_1 <chr>,
## #   Sab3b_2 <chr>, Sab3b_3 <chr>, Sab3b_4 <chr>, Sab3b_5 <chr>, EC <chr>,
## #   NonA2 <chr>, Sab1NA_1 <chr>, Sab1NA_2 <chr>, Sab1NA_3 <chr>,
## #   Sab1NA_4 <chr>, Sab1NA_5 <chr>, Sab1NAb_1 <chr>, Sab1NAb_2 <chr>,
## #   Sab1NAb_3 <chr>, Sab1NAb_4 <chr>, Sab1NAb_5 <chr>, Sab2NA_1 <chr>,
## #   Sab2NA_2 <chr>, Sab2NA_3 <chr>, Sab2NA_4 <chr>, Sab2NA_5 <chr>,
## #   Sab2NAb_1 <chr>, Sab2NAb_2 <chr>, Sab2NAb_3 <chr>, Sab2NAb_4 <chr>,
## #   Sab2NAb_5 <chr>, Sab3NA_1 <chr>, Sab3NA_2 <chr>, Sab3NA_3 <chr>,
## #   Sab3NA_4 <chr>, Sab3NA_5 <chr>, Sab3NAb_1 <chr>, Sab3NAb_2 <chr>,
## #   Sab3NAb_3 <chr>, Sab3NAb_4 <chr>, Sab3NAb_5 <chr>, ECNA <chr>,
## #   AttnCheck <chr>

#then exclude them
d_excl4 <- filter(d_excl3, !grepl("R_RP0Xu1ld7b9k4PT", ResponseId),#exclude the 6 participants
                  !grepl("R_2uyeCzCvlfF65O1", ResponseId),
                  !grepl("R_UzNZ8VgoKNfq3Zv", ResponseId),
                  !grepl("R_3ixmIniTJQMh1T0", ResponseId),
                  !grepl("R_24BFpyvSimlCdnS", ResponseId),
                  )
d_excl4 # 175 rows

## # A tibble: 150 x 66
##    ResponseId Agency2 Sab1_1 Sab1_2 Sab1_3 Sab1_4 Sab1_5 Sab1b_1 Sab1b_2 Sab1b_3
##    <chr>      <chr>   <chr>  <chr>  <chr>  <chr>  <chr>  <chr>   <chr>   <chr>  
##  1 R_3jZpCHw… Accord… 1      <NA>   <NA>   1      <NA>   1       <NA>    <NA>   
##  2 R_1pxkNNp… accord… <NA>   <NA>   <NA>   <NA>   <NA>   <NA>    <NA>    <NA>   
##  3 R_1LiGUJ1… Accord… <NA>   1      <NA>   1      <NA>   <NA>    <NA>    <NA>   
##  4 R_2Wxpb14… Based … <NA>   <NA>   <NA>   1      <NA>   1       <NA>    <NA>   
##  5 R_2UXxMq9… based … 1      <NA>   <NA>   <NA>   <NA>   1       <NA>    <NA>   
##  6 R_3kCeJHD… Based … <NA>   <NA>   <NA>   <NA>   <NA>   <NA>    <NA>    <NA>   
##  7 R_1DVkqRs… Based … 1      1      <NA>   <NA>   <NA>   1       <NA>    <NA>   
##  8 R_3htC3eV… based … <NA>   <NA>   <NA>   <NA>   <NA>   <NA>    1       <NA>   
##  9 R_2curmuS… Based … <NA>   <NA>   <NA>   <NA>   <NA>   <NA>    <NA>    <NA>   
## 10 R_swmW3MG… Based … 1      <NA>   <NA>   1      <NA>   1       <NA>    <NA>   
## # … with 140 more rows, and 56 more variables: Sab1b_4 <chr>, Sab1b_5 <chr>,
## #   Sab2_1 <chr>, Sab2_2 <chr>, Sab2_3 <chr>, Sab2_4 <chr>, Sab2_5 <chr>,
## #   Sab2b_1 <chr>, Sab2b_2 <chr>, Sab2b_3 <chr>, Sab2b_4 <chr>, Sab2b_5 <chr>,
## #   Sab3_1 <chr>, Sab3_2 <chr>, Sab3_3 <chr>, Sab3_4 <chr>, Sab3_5 <chr>,
## #   Sab3b_1 <chr>, Sab3b_2 <chr>, Sab3b_3 <chr>, Sab3b_4 <chr>, Sab3b_5 <chr>,
## #   EC <chr>, NonA2 <chr>, Sab1NA_1 <chr>, Sab1NA_2 <chr>, Sab1NA_3 <chr>,
## #   Sab1NA_4 <chr>, Sab1NA_5 <chr>, Sab1NAb_1 <chr>, Sab1NAb_2 <chr>,
## #   Sab1NAb_3 <chr>, Sab1NAb_4 <chr>, Sab1NAb_5 <chr>, Sab2NA_1 <chr>,
## #   Sab2NA_2 <chr>, Sab2NA_3 <chr>, Sab2NA_4 <chr>, Sab2NA_5 <chr>,
## #   Sab2NAb_1 <chr>, Sab2NAb_2 <chr>, Sab2NAb_3 <chr>, Sab2NAb_4 <chr>,
## #   Sab2NAb_5 <chr>, Sab3NA_1 <chr>, Sab3NA_2 <chr>, Sab3NA_3 <chr>,
## #   Sab3NA_4 <chr>, Sab3NA_5 <chr>, Sab3NAb_1 <chr>, Sab3NAb_2 <chr>,
## #   Sab3NAb_3 <chr>, Sab3NAb_4 <chr>, Sab3NAb_5 <chr>, ECNA <chr>,
## #   AttnCheck <chr>

# How many exclusions?
Blanks_excl_total <- length(d_excl3$ResponseId) - length(d_excl4$ResponseId) # 5 exclusions

Total Exclusions and remaining participants (Start:d_tidy, End:d_excl4)

#Total attention check exclusions
Total_attn_excls <- Attncheck2_excl_total + Agency_excl_total + NonAgency_excl_total + Blanks_excl_total
Total_attn_excls #66 excluded

## [1] 91

#participants in each condition
length(d_tidy$ResponseId) #pre-exclusion and preview-removal length = 241

## [1] 241

length(d_excl4$ResponseId) #post-exclusion length = 175

## [1] 150

A <- filter(d_excl4, !is.na(Agency2))
length(A$ResponseId) #88 in Agency condition

## [1] 63

NonA <- filter(d_excl4, !is.na(NonA2))
length(NonA$ResponseId) #87 in Non-Agency condition

## [1] 87

Convert unethical behaviors from character to numeric (Start:d_excl_4, End:dat)

colnames(d_excl4)

##  [1] "ResponseId" "Agency2"    "Sab1_1"     "Sab1_2"     "Sab1_3"    
##  [6] "Sab1_4"     "Sab1_5"     "Sab1b_1"    "Sab1b_2"    "Sab1b_3"   
## [11] "Sab1b_4"    "Sab1b_5"    "Sab2_1"     "Sab2_2"     "Sab2_3"    
## [16] "Sab2_4"     "Sab2_5"     "Sab2b_1"    "Sab2b_2"    "Sab2b_3"   
## [21] "Sab2b_4"    "Sab2b_5"    "Sab3_1"     "Sab3_2"     "Sab3_3"    
## [26] "Sab3_4"     "Sab3_5"     "Sab3b_1"    "Sab3b_2"    "Sab3b_3"   
## [31] "Sab3b_4"    "Sab3b_5"    "EC"         "NonA2"      "Sab1NA_1"  
## [36] "Sab1NA_2"   "Sab1NA_3"   "Sab1NA_4"   "Sab1NA_5"   "Sab1NAb_1" 
## [41] "Sab1NAb_2"  "Sab1NAb_3"  "Sab1NAb_4"  "Sab1NAb_5"  "Sab2NA_1"  
## [46] "Sab2NA_2"   "Sab2NA_3"   "Sab2NA_4"   "Sab2NA_5"   "Sab2NAb_1" 
## [51] "Sab2NAb_2"  "Sab2NAb_3"  "Sab2NAb_4"  "Sab2NAb_5"  "Sab3NA_1"  
## [56] "Sab3NA_2"   "Sab3NA_3"   "Sab3NA_4"   "Sab3NA_5"   "Sab3NAb_1" 
## [61] "Sab3NAb_2"  "Sab3NAb_3"  "Sab3NAb_4"  "Sab3NAb_5"  "ECNA"      
## [66] "AttnCheck"

str(d_excl4)

## tibble [150 × 66] (S3: tbl_df/tbl/data.frame)
##  $ ResponseId: chr [1:150] "R_3jZpCHwaUpQCxkJ" "R_1pxkNNpUOUEzA21" "R_1LiGUJ1d0K1zAYR" "R_2Wxpb14An8Ls4K1" ...
##  $ Agency2   : chr [1:150] "According to personal values" "according to their own values" "According to their values" "Based on individual employee values" ...
##  $ Sab1_1    : chr [1:150] "1" NA NA NA ...
##  $ Sab1_2    : chr [1:150] NA NA "1" NA ...
##  $ Sab1_3    : chr [1:150] NA NA NA NA ...
##  $ Sab1_4    : chr [1:150] "1" NA "1" "1" ...
##  $ Sab1_5    : chr [1:150] NA NA NA NA ...
##  $ Sab1b_1   : chr [1:150] "1" NA NA "1" ...
##  $ Sab1b_2   : chr [1:150] NA NA NA NA ...
##  $ Sab1b_3   : chr [1:150] NA NA NA NA ...
##  $ Sab1b_4   : chr [1:150] NA NA "1" NA ...
##  $ Sab1b_5   : chr [1:150] NA NA NA NA ...
##  $ Sab2_1    : chr [1:150] NA NA NA "1" ...
##  $ Sab2_2    : chr [1:150] NA NA NA "1" ...
##  $ Sab2_3    : chr [1:150] NA NA NA NA ...
##  $ Sab2_4    : chr [1:150] NA NA NA "1" ...
##  $ Sab2_5    : chr [1:150] NA NA NA NA ...
##  $ Sab2b_1   : chr [1:150] NA NA NA NA ...
##  $ Sab2b_2   : chr [1:150] NA NA NA NA ...
##  $ Sab2b_3   : chr [1:150] NA NA NA NA ...
##  $ Sab2b_4   : chr [1:150] NA NA NA NA ...
##  $ Sab2b_5   : chr [1:150] NA NA NA NA ...
##  $ Sab3_1    : chr [1:150] "1" NA NA "1" ...
##  $ Sab3_2    : chr [1:150] NA NA NA "1" ...
##  $ Sab3_3    : chr [1:150] NA NA NA NA ...
##  $ Sab3_4    : chr [1:150] "1" NA "1" NA ...
##  $ Sab3_5    : chr [1:150] NA NA "1" NA ...
##  $ Sab3b_1   : chr [1:150] "1" NA NA NA ...
##  $ Sab3b_2   : chr [1:150] NA NA NA "1" ...
##  $ Sab3b_3   : chr [1:150] NA NA NA NA ...
##  $ Sab3b_4   : chr [1:150] NA NA NA NA ...
##  $ Sab3b_5   : chr [1:150] NA NA "1" NA ...
##  $ EC        : chr [1:150] "6" "7" "6" "6" ...
##  $ NonA2     : chr [1:150] NA NA NA NA ...
##  $ Sab1NA_1  : chr [1:150] NA NA NA NA ...
##  $ Sab1NA_2  : chr [1:150] NA NA NA NA ...
##  $ Sab1NA_3  : chr [1:150] NA NA NA NA ...
##  $ Sab1NA_4  : chr [1:150] NA NA NA NA ...
##  $ Sab1NA_5  : chr [1:150] NA NA NA NA ...
##  $ Sab1NAb_1 : chr [1:150] NA NA NA NA ...
##  $ Sab1NAb_2 : chr [1:150] NA NA NA NA ...
##  $ Sab1NAb_3 : chr [1:150] NA NA NA NA ...
##  $ Sab1NAb_4 : chr [1:150] NA NA NA NA ...
##  $ Sab1NAb_5 : chr [1:150] NA NA NA NA ...
##  $ Sab2NA_1  : chr [1:150] NA NA NA NA ...
##  $ Sab2NA_2  : chr [1:150] NA NA NA NA ...
##  $ Sab2NA_3  : chr [1:150] NA NA NA NA ...
##  $ Sab2NA_4  : chr [1:150] NA NA NA NA ...
##  $ Sab2NA_5  : chr [1:150] NA NA NA NA ...
##  $ Sab2NAb_1 : chr [1:150] NA NA NA NA ...
##  $ Sab2NAb_2 : chr [1:150] NA NA NA NA ...
##  $ Sab2NAb_3 : chr [1:150] NA NA NA NA ...
##  $ Sab2NAb_4 : chr [1:150] NA NA NA NA ...
##  $ Sab2NAb_5 : chr [1:150] NA NA NA NA ...
##  $ Sab3NA_1  : chr [1:150] NA NA NA NA ...
##  $ Sab3NA_2  : chr [1:150] NA NA NA NA ...
##  $ Sab3NA_3  : chr [1:150] NA NA NA NA ...
##  $ Sab3NA_4  : chr [1:150] NA NA NA NA ...
##  $ Sab3NA_5  : chr [1:150] NA NA NA NA ...
##  $ Sab3NAb_1 : chr [1:150] NA NA NA NA ...
##  $ Sab3NAb_2 : chr [1:150] NA NA NA NA ...
##  $ Sab3NAb_3 : chr [1:150] NA NA NA NA ...
##  $ Sab3NAb_4 : chr [1:150] NA NA NA NA ...
##  $ Sab3NAb_5 : chr [1:150] NA NA NA NA ...
##  $ ECNA      : chr [1:150] NA NA NA NA ...
##  $ AttnCheck : chr [1:150] "5" "5" "5" "5" ...

dat <- d_excl4

#condition1
dat[3:33] <- sapply(X = dat[3:33],
                     FUN = as.numeric)

#condition2
dat[35:66] <- sapply(X = dat[35:66],
                     FUN = as.numeric)

str(dat)

## tibble [150 × 66] (S3: tbl_df/tbl/data.frame)
##  $ ResponseId: chr [1:150] "R_3jZpCHwaUpQCxkJ" "R_1pxkNNpUOUEzA21" "R_1LiGUJ1d0K1zAYR" "R_2Wxpb14An8Ls4K1" ...
##  $ Agency2   : chr [1:150] "According to personal values" "according to their own values" "According to their values" "Based on individual employee values" ...
##  $ Sab1_1    : num [1:150] 1 NA NA NA 1 NA 1 NA NA 1 ...
##  $ Sab1_2    : num [1:150] NA NA 1 NA NA NA 1 NA NA NA ...
##  $ Sab1_3    : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab1_4    : num [1:150] 1 NA 1 1 NA NA NA NA NA 1 ...
##  $ Sab1_5    : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab1b_1   : num [1:150] 1 NA NA 1 1 NA 1 NA NA 1 ...
##  $ Sab1b_2   : num [1:150] NA NA NA NA NA NA NA 1 NA NA ...
##  $ Sab1b_3   : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab1b_4   : num [1:150] NA NA 1 NA NA NA NA NA NA 1 ...
##  $ Sab1b_5   : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab2_1    : num [1:150] NA NA NA 1 1 NA NA NA NA NA ...
##  $ Sab2_2    : num [1:150] NA NA NA 1 NA NA 1 1 NA 1 ...
##  $ Sab2_3    : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab2_4    : num [1:150] NA NA NA 1 NA NA NA NA NA NA ...
##  $ Sab2_5    : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab2b_1   : num [1:150] NA NA NA NA 1 NA NA NA NA NA ...
##  $ Sab2b_2   : num [1:150] NA NA NA NA NA NA NA 1 NA NA ...
##  $ Sab2b_3   : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab2b_4   : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab2b_5   : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab3_1    : num [1:150] 1 NA NA 1 NA NA NA NA NA NA ...
##  $ Sab3_2    : num [1:150] NA NA NA 1 NA NA 1 1 NA 1 ...
##  $ Sab3_3    : num [1:150] NA NA NA NA NA NA NA NA NA 1 ...
##  $ Sab3_4    : num [1:150] 1 NA 1 NA NA NA NA NA NA 1 ...
##  $ Sab3_5    : num [1:150] NA NA 1 NA NA NA NA NA NA NA ...
##  $ Sab3b_1   : num [1:150] 1 NA NA NA NA NA NA NA NA NA ...
##  $ Sab3b_2   : num [1:150] NA NA NA 1 NA NA NA 1 NA NA ...
##  $ Sab3b_3   : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab3b_4   : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab3b_5   : num [1:150] NA NA 1 NA NA NA NA NA NA NA ...
##  $ EC        : num [1:150] 6 7 6 6 6 7 5 5 6 7 ...
##  $ NonA2     : chr [1:150] NA NA NA NA ...
##  $ Sab1NA_1  : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab1NA_2  : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab1NA_3  : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab1NA_4  : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab1NA_5  : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab1NAb_1 : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab1NAb_2 : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab1NAb_3 : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab1NAb_4 : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab1NAb_5 : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab2NA_1  : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab2NA_2  : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab2NA_3  : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab2NA_4  : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab2NA_5  : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab2NAb_1 : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab2NAb_2 : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab2NAb_3 : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab2NAb_4 : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab2NAb_5 : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab3NA_1  : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab3NA_2  : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab3NA_3  : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab3NA_4  : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab3NA_5  : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab3NAb_1 : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab3NAb_2 : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab3NAb_3 : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab3NAb_4 : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ Sab3NAb_5 : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ ECNA      : num [1:150] NA NA NA NA NA NA NA NA NA NA ...
##  $ AttnCheck : num [1:150] 5 5 5 5 5 5 5 5 5 5 ...

Create Sum Columns (Start:dat, End:dat)

# AGENCY CONDITION - total deserve behaviors ## ==================================================== ##

# Sabotage1
dat <- dat %>% 
    rowwise() %>% 
    mutate(deserve.sab1 = sum(Sab1_1, Sab1_2, Sab1_3, Sab1_4, Sab1_5, na.rm = TRUE))

range(dat$deserve.sab1) #confirm response totals are within 0-5 range

## [1] 0 5

# Sabotage2 
dat <- dat %>% 
    rowwise() %>% 
    mutate(deserve.sab2 = sum(Sab2_1, Sab2_2, Sab2_3, Sab2_4, Sab2_5, na.rm = TRUE))

# Sabotage3 
dat <- dat %>% 
    rowwise() %>% 
    mutate(deserve.sab3 = sum(Sab3_1, Sab3_2, Sab3_3, Sab3_4, Sab3_5, na.rm = TRUE))


# AGENCY CONDITION - total actual behaviors ## ==================================================== ##

# Sabotage1
dat <- dat %>% 
    rowwise() %>% 
    mutate(actual.sab1 = sum(Sab1b_1, Sab1b_2, Sab1b_3, Sab1b_4, Sab1b_5, na.rm = TRUE))

# Sabotage2 
dat <- dat %>% 
    rowwise() %>% 
    mutate(actual.sab2 = sum(Sab2b_1, Sab2b_2, Sab2b_3, Sab2b_4, Sab2b_5, na.rm = TRUE))

# Sabotage3 
dat <- dat %>% 
    rowwise() %>% 
    mutate(actual.sab3 = sum(Sab3b_1, Sab3b_2, Sab3b_3, Sab3b_4, Sab3b_5, na.rm = TRUE))


# NON-AGENCY CONDITION - total deserve behaviors ## ==================================================== ##

# Sabotage1
dat <- dat %>% 
    rowwise() %>% 
    mutate(deserve.sab1.NA = sum(Sab1NA_1, Sab1NA_2, Sab1NA_3, Sab1NA_4, Sab1NA_5, na.rm = TRUE))

# Sabotage2 
dat <- dat %>% 
    rowwise() %>% 
    mutate(deserve.sab2.NA = sum(Sab2NA_1, Sab2NA_2, Sab2NA_3, Sab2NA_4, Sab2NA_5, na.rm = TRUE))

# Sabotage3 
dat <- dat %>% 
    rowwise() %>% 
    mutate(deserve.sab3.nA = sum(Sab3NA_1, Sab3NA_2, Sab3NA_3, Sab3NA_4, Sab3NA_5, na.rm = TRUE))


# NON-AGENCY CONDITION - total actual behaviors ## ==================================================== ##

# Sabotage1
dat <- dat %>% 
    rowwise() %>% 
    mutate(actual.sab1.NA = sum(Sab1NAb_1, Sab1NAb_2, Sab1NAb_3, Sab1NAb_4, Sab1NAb_5, na.rm = TRUE))

# Sabotage2
dat <- dat %>% 
    rowwise() %>% 
    mutate(actual.sab2.NA = sum(Sab2NAb_1, Sab2NAb_2, Sab2NAb_3, Sab2NAb_4, Sab2NAb_5, na.rm = TRUE))

# Sabotage3 
dat <- dat %>% 
    rowwise() %>% 
    mutate(actual.sab3.NA = sum(Sab3NAb_1, Sab3NAb_2, Sab3NAb_3, Sab3NAb_4, Sab3NAb_5, na.rm = TRUE))


colnames(dat) #12 total new columns

##  [1] "ResponseId"      "Agency2"         "Sab1_1"          "Sab1_2"         
##  [5] "Sab1_3"          "Sab1_4"          "Sab1_5"          "Sab1b_1"        
##  [9] "Sab1b_2"         "Sab1b_3"         "Sab1b_4"         "Sab1b_5"        
## [13] "Sab2_1"          "Sab2_2"          "Sab2_3"          "Sab2_4"         
## [17] "Sab2_5"          "Sab2b_1"         "Sab2b_2"         "Sab2b_3"        
## [21] "Sab2b_4"         "Sab2b_5"         "Sab3_1"          "Sab3_2"         
## [25] "Sab3_3"          "Sab3_4"          "Sab3_5"          "Sab3b_1"        
## [29] "Sab3b_2"         "Sab3b_3"         "Sab3b_4"         "Sab3b_5"        
## [33] "EC"              "NonA2"           "Sab1NA_1"        "Sab1NA_2"       
## [37] "Sab1NA_3"        "Sab1NA_4"        "Sab1NA_5"        "Sab1NAb_1"      
## [41] "Sab1NAb_2"       "Sab1NAb_3"       "Sab1NAb_4"       "Sab1NAb_5"      
## [45] "Sab2NA_1"        "Sab2NA_2"        "Sab2NA_3"        "Sab2NA_4"       
## [49] "Sab2NA_5"        "Sab2NAb_1"       "Sab2NAb_2"       "Sab2NAb_3"      
## [53] "Sab2NAb_4"       "Sab2NAb_5"       "Sab3NA_1"        "Sab3NA_2"       
## [57] "Sab3NA_3"        "Sab3NA_4"        "Sab3NA_5"        "Sab3NAb_1"      
## [61] "Sab3NAb_2"       "Sab3NAb_3"       "Sab3NAb_4"       "Sab3NAb_5"      
## [65] "ECNA"            "AttnCheck"       "deserve.sab1"    "deserve.sab2"   
## [69] "deserve.sab3"    "actual.sab1"     "actual.sab2"     "actual.sab3"    
## [73] "deserve.sab1.NA" "deserve.sab2.NA" "deserve.sab3.nA" "actual.sab1.NA" 
## [77] "actual.sab2.NA"  "actual.sab3.NA"

Add condition column, truncate leaving behind sum columns (Start:dat, End:dat_2)

# Add condition column. Retain totals
#Create new column for each condition
dat <- mutate(dat, condition = ifelse(is.na(Agency2),2,1)) #Agency2 is condition 1, NonA2 is condition 2

#remove Agency2 and NonA2 columns and select total columns
colnames(dat)

##  [1] "ResponseId"      "Agency2"         "Sab1_1"          "Sab1_2"         
##  [5] "Sab1_3"          "Sab1_4"          "Sab1_5"          "Sab1b_1"        
##  [9] "Sab1b_2"         "Sab1b_3"         "Sab1b_4"         "Sab1b_5"        
## [13] "Sab2_1"          "Sab2_2"          "Sab2_3"          "Sab2_4"         
## [17] "Sab2_5"          "Sab2b_1"         "Sab2b_2"         "Sab2b_3"        
## [21] "Sab2b_4"         "Sab2b_5"         "Sab3_1"          "Sab3_2"         
## [25] "Sab3_3"          "Sab3_4"          "Sab3_5"          "Sab3b_1"        
## [29] "Sab3b_2"         "Sab3b_3"         "Sab3b_4"         "Sab3b_5"        
## [33] "EC"              "NonA2"           "Sab1NA_1"        "Sab1NA_2"       
## [37] "Sab1NA_3"        "Sab1NA_4"        "Sab1NA_5"        "Sab1NAb_1"      
## [41] "Sab1NAb_2"       "Sab1NAb_3"       "Sab1NAb_4"       "Sab1NAb_5"      
## [45] "Sab2NA_1"        "Sab2NA_2"        "Sab2NA_3"        "Sab2NA_4"       
## [49] "Sab2NA_5"        "Sab2NAb_1"       "Sab2NAb_2"       "Sab2NAb_3"      
## [53] "Sab2NAb_4"       "Sab2NAb_5"       "Sab3NA_1"        "Sab3NA_2"       
## [57] "Sab3NA_3"        "Sab3NA_4"        "Sab3NA_5"        "Sab3NAb_1"      
## [61] "Sab3NAb_2"       "Sab3NAb_3"       "Sab3NAb_4"       "Sab3NAb_5"      
## [65] "ECNA"            "AttnCheck"       "deserve.sab1"    "deserve.sab2"   
## [69] "deserve.sab3"    "actual.sab1"     "actual.sab2"     "actual.sab3"    
## [73] "deserve.sab1.NA" "deserve.sab2.NA" "deserve.sab3.nA" "actual.sab1.NA" 
## [77] "actual.sab2.NA"  "actual.sab3.NA"  "condition"

dat_1 <- select(dat, 1, 67:79)
dat_2 <- relocate(dat_1, condition, .after = ResponseId)
colnames(dat_2)

##  [1] "ResponseId"      "condition"       "deserve.sab1"    "deserve.sab2"   
##  [5] "deserve.sab3"    "actual.sab1"     "actual.sab2"     "actual.sab3"    
##  [9] "deserve.sab1.NA" "deserve.sab2.NA" "deserve.sab3.nA" "actual.sab1.NA" 
## [13] "actual.sab2.NA"  "actual.sab3.NA"

head(dat_2)

## # A tibble: 6 x 14
## # Rowwise: 
##   ResponseId condition deserve.sab1 deserve.sab2 deserve.sab3 actual.sab1
##   <chr>          <dbl>        <dbl>        <dbl>        <dbl>       <dbl>
## 1 R_3jZpCHw…         1            2            0            2           1
## 2 R_1pxkNNp…         1            0            0            0           0
## 3 R_1LiGUJ1…         1            2            0            2           1
## 4 R_2Wxpb14…         1            1            3            2           1
## 5 R_2UXxMq9…         1            1            1            0           1
## 6 R_3kCeJHD…         1            0            0            0           0
## # … with 8 more variables: actual.sab2 <dbl>, actual.sab3 <dbl>,
## #   deserve.sab1.NA <dbl>, deserve.sab2.NA <dbl>, deserve.sab3.nA <dbl>,
## #   actual.sab1.NA <dbl>, actual.sab2.NA <dbl>, actual.sab3.NA <dbl>

Group, Gather and Separate. (Starts:dat_2, Ends:agendata_comb_grouped)

# Split then recombine Data Frame into one long ## ================= ##  

cond1 <- dat_2 %>% #this is it chief
  filter(condition==1) %>%
  select(1:8)

cond2 <- dat_2 %>%
  filter(condition==2) %>%
  select(1,2,9:14)

names(cond2)[3] <- "deserve.sab1"
names(cond2)[4] <- "deserve.sab2"
names(cond2)[5] <- "deserve.sab3"
names(cond2)[6] <- "actual.sab1"
names(cond2)[7] <- "actual.sab2"
names(cond2)[8] <- "actual.sab3"

colnames(cond1)

## [1] "ResponseId"   "condition"    "deserve.sab1" "deserve.sab2" "deserve.sab3"
## [6] "actual.sab1"  "actual.sab2"  "actual.sab3"

colnames(cond2)

## [1] "ResponseId"   "condition"    "deserve.sab1" "deserve.sab2" "deserve.sab3"
## [6] "actual.sab1"  "actual.sab2"  "actual.sab3"

data_combined <- rbind(cond1, cond2) #the frame we are pivoting long

# Separate and Gather to make agendata_comb_grouped ## ================= ##  

colnames(data_combined)

## [1] "ResponseId"   "condition"    "deserve.sab1" "deserve.sab2" "deserve.sab3"
## [6] "actual.sab1"  "actual.sab2"  "actual.sab3"

data_comb_long <- gather(data_combined, Key, Behaviors, 3:8) #gather is basically pivot_long. Gathers common attributes into Key (names) and Behavior (values) columns

dat_comb_sep <- separate(data_comb_long, Key, c("Kind", "Sabotage")) # splits Key into kind and sabotage 

std <- function(x) sd(x)/sqrt(length(x))

agendata_comb_grouped <- dat_comb_sep %>% # Grouped by mean. More useful. 
  group_by(condition, Kind, Sabotage) %>%
  summarize(Mean = mean(Behaviors),
            SE = std(Behaviors))

## `summarise()` regrouping output by 'condition', 'Kind' (override with `.groups` argument)

# Link used to find gather function: https://uc-r.github.io/tidyr

Visualize distribution of data-points for deserve in sabotage 1

# Plot 1
ggplot(data= data_combined, 
             mapping = aes(x = factor(condition), 
                           y = deserve.sab1,
                           group=ResponseId)) +
  geom_boxplot()

# Histogram of a sample column
hist(data_combined$deserve.sab1)

Barplot of mean unethical behaviors across condition, sabotage and kind

#convert Kind, Condition and Sabotage into factors  ## ============================== ##  
    
    # Now we have 3 categorical variables, condition, Kind, Sabotage

agendata_comb_grouped$condition <- factor(agendata_comb_grouped$condition, levels = c("1", "2"))
agendata_comb_grouped$Kind <- factor(agendata_comb_grouped$Kind, levels = c("actual", "deserve"))
agendata_comb_grouped$Sabotage <- factor(agendata_comb_grouped$Sabotage, levels = c("sab1", "sab2", "sab3"))

 # Plot G1 (Mean unethical behaviors) ## ================================ ##  


dodge <- position_dodge(width = 0.9)

g1 <- ggplot(agendata_comb_grouped, aes(x = interaction(Kind, condition), y = Mean, fill=Sabotage)) +
  geom_bar(position=position_dodge(), stat = "identity") +
  geom_errorbar(aes(ymax = Mean + SE, ymin = Mean - SE), position = dodge, width = 0.2) +
 annotate("text", x = 1:4, y = - 0.05,
           label = rep(c("Actual", "Deserve"), 2)) +
  annotate("text", c(1.5, 3.5), y = -0.2 , label = c("Agency", "Compliance"))  +
   theme(plot.margin = unit(c(1, 1, 4, 1), "lines"),
       axis.title.x = element_blank(),
       axis.text.x = element_blank(),
       text=element_text(size=12,  family="sans")
       )
g1

Barplot of Main Result - mean actual unethical behaviors across condition

agendata_comb_grouped

## # A tibble: 12 x 5
## # Groups:   condition, Kind [4]
##    condition Kind    Sabotage  Mean     SE
##    <fct>     <fct>   <fct>    <dbl>  <dbl>
##  1 1         actual  sab1     1.05  0.121 
##  2 1         actual  sab2     0.683 0.0900
##  3 1         actual  sab3     0.810 0.111 
##  4 1         deserve sab1     1.24  0.146 
##  5 1         deserve sab2     1.10  0.153 
##  6 1         deserve sab3     1.37  0.157 
##  7 2         actual  sab1     0.655 0.0690
##  8 2         actual  sab2     0.586 0.0704
##  9 2         actual  sab3     0.678 0.0740
## 10 2         deserve sab1     1.08  0.120 
## 11 2         deserve sab2     1.05  0.120 
## 12 2         deserve sab3     1.28  0.136

# I only want actuals in condition 1 and 2
agendata_actuals <- filter(agendata_comb_grouped, Kind=="actual")

# Barplot of actual unethical behavior by sabotage and condition
ggplot(agendata_actuals,
       aes(x=condition, y=Mean, fill=Sabotage)) +
  geom_bar(stat="identity", position=position_dodge()) +
  geom_errorbar(aes(ymax = Mean + SE, ymin = Mean - SE),  position = dodge, width = 0.2)

## Scatterplots

All Datapoints and Within-Group Mean Trend-Line

agendata_actuals # This data is already summarized, Seeing all data requires pre-summarized data to run.

## # A tibble: 6 x 5
## # Groups:   condition, Kind [2]
##   condition Kind   Sabotage  Mean     SE
##   <fct>     <fct>  <fct>    <dbl>  <dbl>
## 1 1         actual sab1     1.05  0.121 
## 2 1         actual sab2     0.683 0.0900
## 3 1         actual sab3     0.810 0.111 
## 4 2         actual sab1     0.655 0.0690
## 5 2         actual sab2     0.586 0.0704
## 6 2         actual sab3     0.678 0.0740

dat_comb_sep #Good. This is the pre-group-and-summarized data.

## # A tibble: 900 x 5
##    ResponseId        condition Kind    Sabotage Behaviors
##    <chr>                 <dbl> <chr>   <chr>        <dbl>
##  1 R_3jZpCHwaUpQCxkJ         1 deserve sab1             2
##  2 R_1pxkNNpUOUEzA21         1 deserve sab1             0
##  3 R_1LiGUJ1d0K1zAYR         1 deserve sab1             2
##  4 R_2Wxpb14An8Ls4K1         1 deserve sab1             1
##  5 R_2UXxMq9S5IHUU16         1 deserve sab1             1
##  6 R_3kCeJHDJmHzPg6p         1 deserve sab1             0
##  7 R_1DVkqRsxT2e2NS3         1 deserve sab1             2
##  8 R_3htC3eVF2DZMYJ2         1 deserve sab1             0
##  9 R_2curmuSXFfcTSQx         1 deserve sab1             0
## 10 R_swmW3MGvEs7ZYoF         1 deserve sab1             2
## # … with 890 more rows

dat_filt <- filter(dat_comb_sep, Kind == "actual") #filter out deserves to remain with actuals

sp_1 <- ggplot(dat_filt, aes(x = condition, y = Behaviors, group = Sabotage, color = Sabotage)) + 
  geom_point() + stat_summary(fun = mean, geom = "line") 
sp_1 #DV is ordinal, so its doesn't reveal the variation in datapoints around a single value

So Lets Visualize Summary Stats Instead

sp_2 <- ggplot(agendata_actuals,
       aes(x=condition, y=Mean, color =Sabotage)) +
  geom_point()
sp_2

With Some Trendlines

### BETTER PLOT PLOT OF SUMMARY STATISTICS
sp_3 <- ggplot(agendata_actuals, aes(x = condition, y = Mean, group = Sabotage, col = Sabotage)) + 
  geom_point() + geom_line() #stat_summary(geom = "line")
sp_3

Same Plot but Grouped by Within-Participant Variable

sp_4 <- ggplot(agendata_actuals, aes(x = Sabotage, y = Mean, group = condition, col = condition)) + 
  geom_point() + geom_line() #stat_summary(geom = "line")
sp_4

Interaction Plot

with(dat_filt, interaction.plot(x.factor = condition, trace.factor = Sabotage, 
                                response = Behaviors))

TWO WAY MIXED EFFECTS ANOVA

Boxplot of unethical behaviors across conditions by each sabotage level

The value of this is it helps visualize the distribution of datapoints, unlike a scatterplot

## convert to factors
str(dat_filt)

## tibble [450 × 5] (S3: tbl_df/tbl/data.frame)
##  $ ResponseId: chr [1:450] "R_3jZpCHwaUpQCxkJ" "R_1pxkNNpUOUEzA21" "R_1LiGUJ1d0K1zAYR" "R_2Wxpb14An8Ls4K1" ...
##  $ condition : num [1:450] 1 1 1 1 1 1 1 1 1 1 ...
##  $ Kind      : chr [1:450] "actual" "actual" "actual" "actual" ...
##  $ Sabotage  : chr [1:450] "sab1" "sab1" "sab1" "sab1" ...
##  $ Behaviors : num [1:450] 1 0 1 1 1 0 1 1 0 2 ...

dat_filt$condition <- as.factor(dat_filt$condition)
dat_filt$Sabotage <- as.factor(dat_filt$Sabotage)
str(dat_filt)

## tibble [450 × 5] (S3: tbl_df/tbl/data.frame)
##  $ ResponseId: chr [1:450] "R_3jZpCHwaUpQCxkJ" "R_1pxkNNpUOUEzA21" "R_1LiGUJ1d0K1zAYR" "R_2Wxpb14An8Ls4K1" ...
##  $ condition : Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Kind      : chr [1:450] "actual" "actual" "actual" "actual" ...
##  $ Sabotage  : Factor w/ 3 levels "sab1","sab2",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Behaviors : num [1:450] 1 0 1 1 1 0 1 1 0 2 ...

# then plot
bxp <- ggplot(dat_filt,  # looks left-skewed, makes sense since DV is likert-type aka ordinal
       aes(x = condition, y = Behaviors,  color = Sabotage)) +
  geom_boxplot()
bxp

Boxplot with alternate mapping - binned by Sabotage

bxp2 <- ggplot(dat_filt,  # look weird because data isn't normally distributed
       aes(x = Sabotage, y = Behaviors,  color = condition)) +
  geom_boxplot()
bxp2

# boxplots just not that insightful for this kind of data

Tests if data meets conditions for two-way ANOVA

Outlier check

dat_filt %>%
  group_by(condition, Sabotage) %>%
  identify_outliers(Behaviors) # 9 outliers, two of which are extreme

## # A tibble: 9 x 7
##   condition Sabotage ResponseId        Kind   Behaviors is.outlier is.extreme
##   <fct>     <fct>    <chr>             <chr>      <dbl> <lgl>      <lgl>     
## 1 1         sab1     R_37wEjP1S43lUXuh actual         5 TRUE       TRUE      
## 2 1         sab1     R_PSUGTrux930fiCd actual         3 TRUE       FALSE     
## 3 1         sab1     R_1cY4g8keLgtfKoz actual         3 TRUE       FALSE     
## 4 1         sab1     R_2ZIBGmC6fnzveHi actual         3 TRUE       FALSE     
## 5 1         sab1     R_3DpVNND6Pfjxol3 actual         3 TRUE       FALSE     
## 6 1         sab2     R_2CZAKFPGVkO1ZjA actual         3 TRUE       FALSE     
## 7 1         sab3     R_2CZAKFPGVkO1ZjA actual         3 TRUE       FALSE     
## 8 1         sab3     R_3DpVNND6Pfjxol3 actual         5 TRUE       TRUE      
## 9 2         sab3     R_1hG3vlq65rS5MJu actual         3 TRUE       FALSE

Normality Check 1: Shapiro-Wilk test

dat_filt %>%
  group_by(condition, Sabotage) %>% # if Normal, p-values are greater than 0.05
  shapiro_test(Behaviors) # all p-values are very low meaning data is super non-normal. Makes sense from the boxplots. Our 5 scale ordinal DV makes this possible

## # A tibble: 6 x 5
##   condition Sabotage variable  statistic        p
##   <fct>     <fct>    <chr>         <dbl>    <dbl>
## 1 1         sab1     Behaviors     0.792 4.93e- 8
## 2 1         sab2     Behaviors     0.778 2.33e- 8
## 3 1         sab3     Behaviors     0.729 1.80e- 9
## 4 2         sab1     Behaviors     0.762 1.51e-10
## 5 2         sab2     Behaviors     0.746 5.71e-11
## 6 2         sab3     Behaviors     0.776 3.45e-10

Normality Check 2: QQ-Plots (since Shapiro is for small sample sizes)

# QQ-Plot: Normality Check 2 for larger sample size  
ggqqplot(dat_filt, "Behaviors", ggtheme = theme_bw()) +
  facet_grid(condition ~ Sabotage) # highly non-normal behavior - should be linear. But then again makes sense since DV is ordinal

## Check for Homogeneity of Variance Assumption of between-subject factor (condition), at each within-subject level of (Sabotage) variable - Levene’s test

dat_filt %>%
  group_by(Sabotage) %>%
  levene_test(Behaviors ~ condition) # there is homogeneity of variance as assessed by Levene's p-values (p>0.05). All groups have similar variance

## # A tibble: 3 x 5
##   Sabotage   df1   df2 statistic     p
##   <fct>    <int> <int>     <dbl> <dbl>
## 1 sab1         1   148    0.326  0.569
## 2 sab2         1   148    0.0217 0.883
## 3 sab3         1   148    0.0397 0.842

Check for Sphericity and Homogeneity of Covariances

## Ok at this point data is obviously unusual because these tests are meant for CTS data. It makes sense cause our DV is Likert-type. 

## So I'll skimp on testing for the rest of the requirements i.e. Sphericity and Homogeneity of covariances.

## Google search shows there doesn't exist any non-parametric alternatives to 2-way ANOVA. 

# Opinion is mixed but most seem to agree its still okay to do two-way ANOVA on Likert-type data like mine. See: https://journal.equinoxpub.com/JRDS/article/view/9482

## So we will do ANOVA and then see if there are other alternatives. Looks like we could do linear mixed effects model but not sure if that requires normal data as well.

Running the ANOVA

res.aov <- anova_test(
  data = dat_filt, dv = Behaviors, wid = ResponseId,
  between = condition, within = Sabotage
  )
get_anova_table(res.aov) #Table shows a statistically significant two-way interaction between condition and Sabotage on Behavior score, F(1.9 332.7) = 5.1, p = 0.008. Effect of condition wasn't significant F(1, 173) = 3.1, p = 0.7 but effect of Sabotage was significant F(1.9, 332.7), p = 0.0001

## ANOVA Table (type III tests)
## 
##               Effect DFn    DFd     F        p p<.05   ges
## 1          condition 1.0 148.00 3.700 0.056000       0.018
## 2           Sabotage 1.9 281.19 8.137 0.000471     * 0.014
## 3 condition:Sabotage 1.9 281.19 4.523 0.013000     * 0.008

# effect sizes (ges) are super low though. 0.013 and 0.014 is categorized as small, not noticeable even by experts. 


# Another source has indicated I can actually do an ANOVA despite my data being Likert type because 1) I have a valid zero-point in my DV, and 2) I have a valid constant separation between categories, so my variance is not meaningless. SOurce: look up "ANOVA for likert type data". Apparently this issue is of ongoing raging debate so there is no true answer. So maybe do both. Do the ANOVA first, then do some sort of ordinal regression

Post-Hoc Tests

Simple Main Effect of Condition at each Sabotage level

Effect of condition at each Sabotage

one.way <- dat_filt %>%
  group_by(Sabotage) %>%
  anova_test(dv = Behaviors, wid = ResponseId, between = condition) %>%
  get_anova_table() %>%
  adjust_pvalue(method = "bonferroni")

## Coefficient covariances computed by hccm()
## Coefficient covariances computed by hccm()
## Coefficient covariances computed by hccm()

one.way # Considering the Bonferroni adjusted p-value (p.adj), it can be seen that the simple main effect of condition was significant at sab1 (p = 0.012) but not at sab2 (p = 1) and sab3 (p = 1).

## # A tibble: 3 x 9
##   Sabotage Effect      DFn   DFd     F     p `p<.05`   ges p.adj
##   <fct>    <chr>     <dbl> <dbl> <dbl> <dbl> <chr>   <dbl> <dbl>
## 1 sab1     condition     1   148  9.00 0.003 "*"     0.057 0.009
## 2 sab2     condition     1   148  0.73 0.394 ""      0.005 1    
## 3 sab3     condition     1   148  1.05 0.307 ""      0.007 0.921

Pairwise comparisons between conditions

pwc <- dat_filt %>%
  group_by(Sabotage) %>%
  pairwise_t_test(Behaviors ~ condition, p.adjust.method = "bonferroni")
pwc # Pairwise comparisons show that the mean Behaviors were significantly different in condition 1 vs condition 2 comparison at Sabotage 1(p = 0.00366); but not in condition 1 vs condition 2 at Sab 2 (p = 0.543) and at Sab 3 (p = 0.358)

## # A tibble: 3 x 10
##   Sabotage .y.   group1 group2    n1    n2       p p.signif   p.adj p.adj.signif
## * <fct>    <chr> <chr>  <chr>  <int> <int>   <dbl> <chr>      <dbl> <chr>       
## 1 sab1     Beha… 1      2         63    87 0.00316 **       0.00316 **          
## 2 sab2     Beha… 1      2         63    87 0.394   ns       0.394   ns          
## 3 sab3     Beha… 1      2         63    87 0.307   ns       0.307   ns

Simple Main effects of Sabotage variable

Effect of Sabotage at each level of condition

one.way2 <- dat_filt %>%
  group_by(condition) %>%
  anova_test(dv = Behaviors, wid = ResponseId, within = Sabotage) %>%
  get_anova_table() %>%
  adjust_pvalue(method = "bonferroni")
one.way2 # 0.000127 p-value for condition 1

## # A tibble: 2 x 9
##   condition Effect     DFn   DFd     F     p `p<.05`   ges p.adj
##   <fct>     <chr>    <dbl> <dbl> <dbl> <dbl> <chr>   <dbl> <dbl>
## 1 1         Sabotage  1.83  113.  6.05 0.004 "*"     0.031 0.008
## 2 2         Sabotage  2     172   1.88 0.156 ""      0.003 0.312

# There was a statistically significant effect of Sabotage on mean Behaviors for condition 1 (p = 0.000254).

Pairwise comparisons between condition at each Sabotage levels - Paired t-test is used because we have repeated measures by Sabotage

pwc2 <- dat_filt %>%
  group_by(condition) %>%
  pairwise_t_test(
    Behaviors ~ Sabotage, paired = TRUE, 
    p.adjust.method = "bonferroni"
    ) %>%
  select(-df, -statistic, -p) # Remove details
pwc2 # Using pairwise paired t-test comparisons, it can be seen that for condition 1, the mean Behaviors were statistically significantly different between Sabotage 1 and 2 (p = 0.000262), and between Sabotage 1 and 3 (p = 0.047). However, for condition 1, the mean Behaviors were not statistically significantly different between Sabotage 2 and 3 (p = 0.172), and for condition 2 the means were not significantly different between Sabotage 1 and 2 (0.609), Sabotage 1 and 3 (p = 1), and Sabotage 2 and 3 (p = 0.177).

## # A tibble: 6 x 8
##   condition .y.       group1 group2    n1    n2 p.adj p.adj.signif
##   <fct>     <chr>     <chr>  <chr>  <int> <int> <dbl> <chr>       
## 1 1         Behaviors sab1   sab2      63    63 0.005 **          
## 2 1         Behaviors sab1   sab3      63    63 0.138 ns          
## 3 1         Behaviors sab2   sab3      63    63 0.477 ns          
## 4 2         Behaviors sab1   sab2      87    87 0.609 ns          
## 5 2         Behaviors sab1   sab3      87    87 1     ns          
## 6 2         Behaviors sab2   sab3      87    87 0.177 ns

Report Summary

## REPORT SUMMARY ##

# *summarize here*

Visualizations of the statistical tests

# Visualization: boxplots with p-values
pwc <- pwc %>% add_xy_position(x = "Sabotage")
pwc.filtered <- pwc %>% filter(Sabotage != "sab2", Sabotage != "sab3")
bxp_report <- bxp + 
  stat_pvalue_manual(pwc.filtered, tip.length = 0, hide.ns = TRUE) +
  labs(
    subtitle = get_test_label(res.aov, detailed = TRUE),
    caption = get_pwc_label(pwc)
  )
bxp_report  # Not looking right - redo to fix interpretation

#reshaping
bxp2 <- ggplot(dat_filt,  # look weird because data isn't normally distributed
       aes(x = Sabotage, y = Behaviors,  color = condition)) +
  geom_boxplot()

bxp_report_2 <- bxp2 + 
  stat_pvalue_manual(pwc.filtered, tip.length = 0, hide.ns = TRUE) +
  labs(
    subtitle = get_test_label(res.aov, detailed = TRUE),
    caption = get_pwc_label(pwc)
  )
bxp_report_2

## There's a better way to visualzie this

Non-parametric alternatives — INCOMPLETE

# examine Skewness of data
hist(dat_filt$Behaviors)

library(moments)
skewness(dat_filt$Behaviors, na.rm = TRUE) # skewness coefficient of 1.23. Skewness of >1 means data is highly skewed

## [1] 1.324781

# What kind of transformation do I need
    # Well, one that treats dependent variable as one of 5 "ranks". 
    # Kruskal-Wallis and Friedman handle only 1 factor of N levels, so cant be used to examine interaction effects. 
    # Also my dv is ordinal; cant be transformed w/log transform: https://depts.washington.edu/acelab/proj/art/index.html
# looks like I need to do either an (ordinal?) linear mixed effects regression or an ordinal logistic regression or an ART ANOVA(Aligned Rank Transformed ANOVA)

# Not much info on ART ANOVA so I'll start with LMER. 

## Actually I can't do linear mixed effects cause my DV is categorical so obviously that's ordinal logistic regression.

LINEAR MIXED EFFECTS REGRESSION

First just run linear model no mixed effects

1

dat_filt

## # A tibble: 450 x 5
##    ResponseId        condition Kind   Sabotage Behaviors
##    <chr>             <fct>     <chr>  <fct>        <dbl>
##  1 R_3jZpCHwaUpQCxkJ 1         actual sab1             1
##  2 R_1pxkNNpUOUEzA21 1         actual sab1             0
##  3 R_1LiGUJ1d0K1zAYR 1         actual sab1             1
##  4 R_2Wxpb14An8Ls4K1 1         actual sab1             1
##  5 R_2UXxMq9S5IHUU16 1         actual sab1             1
##  6 R_3kCeJHDJmHzPg6p 1         actual sab1             0
##  7 R_1DVkqRsxT2e2NS3 1         actual sab1             1
##  8 R_3htC3eVF2DZMYJ2 1         actual sab1             1
##  9 R_2curmuSXFfcTSQx 1         actual sab1             0
## 10 R_swmW3MGvEs7ZYoF 1         actual sab1             2
## # … with 440 more rows

lm_test <- lm(Behaviors ~ Sabotage, data = dat_filt)
summary(lm_test)

## 
## Call:
## lm(formula = Behaviors ~ Sabotage, data = dat_filt)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8200 -0.7333  0.1800  0.3733  4.2667 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.82000    0.06186  13.256   <2e-16 ***
## Sabotagesab2 -0.19333    0.08748  -2.210   0.0276 *  
## Sabotagesab3 -0.08667    0.08748  -0.991   0.3224    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7576 on 447 degrees of freedom
## Multiple R-squared:  0.01085,    Adjusted R-squared:  0.006421 
## F-statistic: 2.451 on 2 and 447 DF,  p-value: 0.08739

#plot 2
anova(lm(Behaviors ~ Sabotage, data = dat_filt))

## Analysis of Variance Table
## 
## Response: Behaviors
##            Df  Sum Sq Mean Sq F value  Pr(>F)  
## Sabotage    2   2.813 1.40667  2.4507 0.08739 .
## Residuals 447 256.567 0.57397                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#The regression


# tutorial used: https://stat.ethz.ch/~meier/teaching/anova/random-and-mixed-effects-models.html#mixed-effects-models

library(lme4)

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

## Registered S3 methods overwritten by 'lme4':
##   method                          from
##   cooks.distance.influence.merMod car 
##   influence.merMod                car 
##   dfbeta.influence.merMod         car 
##   dfbetas.influence.merMod        car

mixed = lmer(Behaviors ~ Sabotage + (1 | condition), data = dat_filt)
summary(mixed)

## Linear mixed model fit by REML ['lmerMod']
## Formula: Behaviors ~ Sabotage + (1 | condition)
##    Data: dat_filt
## 
## REML criterion at convergence: 1030.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.2314 -0.8742  0.2145  0.4564  5.5372 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  condition (Intercept) 0.01879  0.1371  
##  Residual              0.56476  0.7515  
## Number of obs: 450, groups:  condition, 2
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept)   0.83454    0.11484   7.267
## Sabotagesab2 -0.19333    0.08678  -2.228
## Sabotagesab3 -0.08667    0.08678  -0.999
## 
## Correlation of Fixed Effects:
##             (Intr) Sbtgs2
## Sabotagesb2 -0.378       
## Sabotagesb3 -0.378  0.500

## BIG ISSUE: I DONT KNOW HOW TO TELL WHICH IS FIXED EFFECTS AND WHICH ARE RANDOM EFFECTS

# we want a model that has a random effect per condition, and a random effect per combination of condition and Sabotage

options(contrasts = c("contr.treatment", "contr.poly"))
library(lmerTest)

## 
## Attaching package: 'lmerTest'

## The following object is masked from 'package:lme4':
## 
##     lmer

## The following object is masked from 'package:stats':
## 
##     step

fit <- lmer(Behaviors ~ Sabotage + (1 | condition) + (1 | condition:Sabotage), data = dat_filt)
anova(fit) # Shows that the fixed effect of Sabotage is not significant. Fixed effect means the average Sabotage effect, where the average is taken over the two conditions. We know that every condition has its random deviation of this effect. Hence, the relevant question is whether the conditions just fluctuate around a constant Sabotage effect or whether the Sabotage effect is is substantially larger than this condition-specific Sabotage variation. Hence, this boils down to comparing the variation between different Sabotages (having 2 degrees of freedom) to the variation due to interaction between Sabotages and conditions (having 2 degrees of freedom (?))

## Type III Analysis of Variance Table with Satterthwaite's method
##          Sum Sq Mean Sq NumDF  DenDF F value Pr(>F)
## Sabotage  1.792   0.896     2 1.9303  1.5915 0.3905

#### Question: How to know whether my random effects (condition) are significant?

summary(fit)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Behaviors ~ Sabotage + (1 | condition) + (1 | condition:Sabotage)
##    Data: dat_filt
## 
## REML criterion at convergence: 1029.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.2925 -0.8929  0.2388  0.4398  5.5698 
## 
## Random effects:
##  Groups             Name        Variance Std.Dev.
##  condition:Sabotage (Intercept) 0.005386 0.07339 
##  condition          (Intercept) 0.017002 0.13039 
##  Residual                       0.562994 0.75033 
## Number of obs: 450, groups:  condition:Sabotage, 6; condition, 2
## 
## Fixed effects:
##              Estimate Std. Error       df t value Pr(>|t|)  
## (Intercept)   0.84066    0.12249  1.81684   6.863   0.0263 *
## Sabotagesab2 -0.20308    0.11390  1.93031  -1.783   0.2211  
## Sabotagesab3 -0.09526    0.11390  1.93031  -0.836   0.4937  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Sbtgs2
## Sabotagesb2 -0.465       
## Sabotagesb3 -0.465  0.500

# for approximate confidence intervals
confint(fit, oldNames = FALSE)

## Computing profile confidence intervals ...

##                                        2.5 %        97.5 %
## sd_(Intercept)|condition:Sabotage  0.0000000  0.1763207491
## sd_(Intercept)|condition           0.0000000  0.4295446885
## sigma                              0.7032863  0.8017390982
## (Intercept)                        0.5779372  1.0938926659
## Sabotagesab2                      -0.4119203 -0.0006721759
## Sabotagesab3                      -0.3036444  0.1074452947

## For residual analysis: Tukey-Anscombe plot:
plot(fit)

##QQ-plots
par(mfrow = c(1, 3))
qqnorm(ranef(fit)$condition[, 1], main = "Random effects of condition")
qqnorm(ranef(fit)$'condition:Sabotage'[, 1], main = "Random interaction")
qqnorm(resid(fit), main = "Residuals")

# Treating both as fixed effects
fit.fixed <- aov(Behaviors ~ Sabotage * condition, data = dat_filt)
summary(fit.fixed)

##                     Df Sum Sq Mean Sq F value  Pr(>F)   
## Sabotage             2   2.81   1.407   2.499 0.08336 . 
## condition            1   4.68   4.684   8.320 0.00411 **
## Sabotage:condition   2   1.91   0.957   1.699 0.18403   
## Residuals          444 249.97   0.563                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# effect of condition is much more significant


##MORE NOTES


# we assume there is a condition-level effect, but each sabotage is allowed to have its own random variation (?) hence Sabotage is random effects and Condition is fixed effects --- but not sure

# linear mixed effects then plot residuals. Look at Kurtosis. Will violate normality but doesnt matter

### NOOO, actually just do ordinal logistic regression because of my DV

RE-analyze after doing Yue exclusions

END Notes

# some helpful links
# https://stackoverflow.com/questions/20060949/ggplot2-multiple-sub-groups-of-a-bar-chart

Agency_analysis_final

Joseph

11/4/2020