Start Date: 23 Feb 2025
Report Date: 28 February 2025
Source: Psi Chi R
knitr::opts_chunk$set(echo = T,message = F,warning = F)
#setwd("C:/Users/alanh/Documents/R/Psi_Chi_R")
rm(list=ls())
setwd("~/R/Psi_Chi_R")
library(tidyverse)
#total for bottom row
sum_rows = function(x) {
x = as.data.frame(x)
sums = sapply(x,function(col) if (is.numeric(col)) sum(col, na.rm = T) else NA)
sums = as.data.frame(t(sums))
names(sums) = names(x)
rbind(x, sums)
}
## right column for total
sum_cols = function(x) {
x$Total = rowSums(x[sapply(x, is.numeric)], na.rm = T)
x
}
#dollar format function
dollars = function(x) {
paste0("$",format(x,big.mark= ",",scientific=F))
}
desc_stats = function(x){
c(min = min(x,na.rm=T),
mode = mode(x,na.rm=T),
median = median(x,na.rm=T),
max = max(x,na.rm=T),
mean = mean(x,na.rm=T),
sd = sd(x,na.rm=T))
}
#remove upper outliers
upper_fence = function(x){
quantile(x,.75) + (1.5*IQR(x))
}
#remove lower outliers
lower_fence = function(x){
quantile(x,.25) - (1.5*IQR(x))
}
Clean and EDA
data=read_csv('data.csv')
names(data) = make.names(colnames(data))
SmartEDA::ExpData(data,type=2) %>%
arrange(desc(Per_of_Missing))
Your tasks for the PsiChiR contest for Feb. 2025 include:
Level 1:
Remove participants outside of the United States AND participants who
are not identified as “white” (white participants are coded as “1” for
the variable “Race”).
data1 = data %>%
filter(VGeoCountry == 'United States',
Race == '1') %>%
tibble()
Level 2:
For the remaining participants, calculate the percentage of
participants who fell into each income category (1 through 5).
data1_table = data1 %>%
mutate(Income = as.character(Income)) %>%
group_by(Income) %>%
summarise(headcount = n()) %>%
mutate(percent = headcount/sum(headcount,na.rm = T) * 100) %>%
arrange(desc(percent)) %>%
sum_rows() %>%
mutate(percent = paste(round(percent,2),"%"))
print(data1_table)
## Income headcount percent
## 1 2 179 27.93 %
## 2 3 163 25.43 %
## 3 1 119 18.56 %
## 4 4 111 17.32 %
## 5 5 69 10.76 %
## 6 <NA> 641 100 %
Level 3:
Create a graph (such as a bar graph) that displays the M and SD for
“General Support for Affirmative Action” (variable = “AA”) across income
groups.
#M for median
data1.5 = data1 %>%
group_by(Income) %>%
summarise(Mean = mean(AA,na.rm = T),
Median = median(AA,na.rm=T),
SD = sd(AA,na.rm=T))
data1.5 %>%
ggplot(aes(x=Income,y=Median))+
geom_col(fill='darkblue')+
theme_bw()+
labs(title='Median of AA by Income')+
theme(plot.title = element_text(hjust = .5))

data1.5 %>%
ggplot(aes(x=Income,y=SD))+
geom_col(fill='darkred')+
theme_bw()+
labs(title='Standard Deviation of AA by Income')+
theme(plot.title = element_text(hjust = .5))

Level 4:
Test whether “General Support for Affirmative Action” (variable =
“AA”) differs across income group.
data1 %>%
ggplot(aes(x=Income,y=AA))+
geom_col(fill='darkgreen')+
theme_bw()+
labs(x='Income Group',y='Affirmative Action Support',title="Affirmative Action Across the Income-verse")+
theme(plot.title = element_text(hjust = .5))

#non-normality
shapiro.test(data1$Income)
##
## Shapiro-Wilk normality test
##
## data: data1$Income
## W = 0.905, p-value < 2.2e-16
shapiro.test(data1$AA)
##
## Shapiro-Wilk normality test
##
## data: data1$AA
## W = 0.96283, p-value = 1.156e-11
hist(data1$Income)

hist(data1$AA)

#yes, support for AA differs among income group
wilcox.test(data1$Income,data1$AA)
##
## Wilcoxon rank sum test with continuity correction
##
## data: data1$Income and data1$AA
## W = 89259, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
LS0tDQp0aXRsZTogIlBzaSBDaGkgUiAtIEZlYiAyMDI1Ig0KI2RhdGU6ICJgciBTeXMuRGF0ZSgpYCINCm91dHB1dDoNCiAgaHRtbF9kb2N1bWVudDoNCiAgICB0aGVtZTogcmVhZGFibGUNCiAgICBhbHdheXNfYWxsb3dfaHRtbDogeWVzDQogICAgZGZfcHJpbnQ6IHBhZ2VkDQogICAgdG9jOiB5ZXMNCiAgICB0b2NfZmxvYXQ6IHllcw0KICAgIG51bWJlcl9zZWN0aW9uczogbm8NCiAgICBhbmNob3Jfc2VjdGlvbnM6IFRSVUUNCiAgICBjb2RlX2ZvbGRpbmc6IGhpZGUNCiAgICBjb2RlX2Rvd25sb2FkOiB0cnVlDQotLS0NClN0YXJ0IERhdGU6IDIzIEZlYiAyMDI1DQoNClJlcG9ydCBEYXRlOiBgciBmb3JtYXQoU3lzLkRhdGUoKSwgJyVkICVCICVZJylgDQoNCltTb3VyY2U6IFBzaSBDaGkgUl0oaHR0cHM6Ly9vc2YuaW8vYnk0aHIvd2lraS9ob21lLykNCg0KYGBge3Igd2FybmluZz1GLG1lc3NhZ2U9Rn0NCmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVCxtZXNzYWdlID0gRix3YXJuaW5nID0gRikNCg0KI3NldHdkKCJDOi9Vc2Vycy9hbGFuaC9Eb2N1bWVudHMvUi9Qc2lfQ2hpX1IiKQ0KDQpybShsaXN0PWxzKCkpDQoNCnNldHdkKCJ+L1IvUHNpX0NoaV9SIikNCg0KbGlicmFyeSh0aWR5dmVyc2UpDQoNCiN0b3RhbCBmb3IgYm90dG9tIHJvdw0Kc3VtX3Jvd3MgPSBmdW5jdGlvbih4KSB7DQogIHggPSBhcy5kYXRhLmZyYW1lKHgpDQogIHN1bXMgPSBzYXBwbHkoeCxmdW5jdGlvbihjb2wpIGlmIChpcy5udW1lcmljKGNvbCkpIHN1bShjb2wsIG5hLnJtID0gVCkgZWxzZSBOQSkNCiAgc3VtcyA9IGFzLmRhdGEuZnJhbWUodChzdW1zKSkNCiAgbmFtZXMoc3VtcykgPSBuYW1lcyh4KQ0KICByYmluZCh4LCBzdW1zKQ0KfQ0KDQojIyByaWdodCBjb2x1bW4gZm9yIHRvdGFsDQpzdW1fY29scyA9IGZ1bmN0aW9uKHgpIHsNCiAgeCRUb3RhbCA9IHJvd1N1bXMoeFtzYXBwbHkoeCwgaXMubnVtZXJpYyldLCBuYS5ybSA9IFQpDQogIHgNCn0NCg0KI2RvbGxhciBmb3JtYXQgZnVuY3Rpb24NCmRvbGxhcnMgPSBmdW5jdGlvbih4KSB7DQogIHBhc3RlMCgiJCIsZm9ybWF0KHgsYmlnLm1hcms9ICIsIixzY2llbnRpZmljPUYpKQ0KfQ0KDQpkZXNjX3N0YXRzID0gZnVuY3Rpb24oeCl7DQogIGMobWluID0gbWluKHgsbmEucm09VCksDQogICAgbW9kZSA9IG1vZGUoeCxuYS5ybT1UKSwNCiAgICBtZWRpYW4gPSBtZWRpYW4oeCxuYS5ybT1UKSwNCiAgICBtYXggPSBtYXgoeCxuYS5ybT1UKSwNCiAgICBtZWFuID0gbWVhbih4LG5hLnJtPVQpLA0KICAgIHNkID0gc2QoeCxuYS5ybT1UKSkNCn0NCg0KI3JlbW92ZSB1cHBlciBvdXRsaWVycw0KdXBwZXJfZmVuY2UgPSBmdW5jdGlvbih4KXsNCiAgcXVhbnRpbGUoeCwuNzUpICsgKDEuNSpJUVIoeCkpDQp9DQoNCiNyZW1vdmUgbG93ZXIgb3V0bGllcnMNCmxvd2VyX2ZlbmNlID0gZnVuY3Rpb24oeCl7DQogIHF1YW50aWxlKHgsLjI1KSAtICgxLjUqSVFSKHgpKQ0KfQ0KYGBgDQoNCg0KIyMgQ2xlYW4gYW5kIEVEQQ0KYGBge3J9DQpkYXRhPXJlYWRfY3N2KCdkYXRhLmNzdicpDQoNCm5hbWVzKGRhdGEpID0gbWFrZS5uYW1lcyhjb2xuYW1lcyhkYXRhKSkNCg0KU21hcnRFREE6OkV4cERhdGEoZGF0YSx0eXBlPTIpICU+JSANCiAgYXJyYW5nZShkZXNjKFBlcl9vZl9NaXNzaW5nKSkNCmBgYA0KWW91ciB0YXNrcyBmb3IgdGhlIFBzaUNoaVIgY29udGVzdCBmb3IgRmViLiAyMDI1IGluY2x1ZGU6DQoNCiMjIExldmVsIDE6DQpSZW1vdmUgcGFydGljaXBhbnRzIG91dHNpZGUgb2YgdGhlIFVuaXRlZCBTdGF0ZXMgQU5EIHBhcnRpY2lwYW50cyB3aG8gYXJlIG5vdCBpZGVudGlmaWVkIGFzICJ3aGl0ZSIgKHdoaXRlIHBhcnRpY2lwYW50cyBhcmUgY29kZWQgYXMgIjEiIGZvciB0aGUgdmFyaWFibGUgIlJhY2UiKS4NCg0KYGBge3J9DQpkYXRhMSA9IGRhdGEgJT4lIA0KICBmaWx0ZXIoVkdlb0NvdW50cnkgPT0gJ1VuaXRlZCBTdGF0ZXMnLA0KICAgICAgICAgUmFjZSA9PSAnMScpICU+JSANCiAgdGliYmxlKCkNCg0KYGBgDQoNCg0KIyMgTGV2ZWwgMjoNCkZvciB0aGUgcmVtYWluaW5nIHBhcnRpY2lwYW50cywgY2FsY3VsYXRlIHRoZSBwZXJjZW50YWdlIG9mIHBhcnRpY2lwYW50cyB3aG8gZmVsbCBpbnRvIGVhY2ggaW5jb21lIGNhdGVnb3J5ICgxIHRocm91Z2ggNSkuDQoNCmBgYHtyfQ0KZGF0YTFfdGFibGUgPSBkYXRhMSAlPiUgDQogIG11dGF0ZShJbmNvbWUgPSBhcy5jaGFyYWN0ZXIoSW5jb21lKSkgJT4lIA0KICBncm91cF9ieShJbmNvbWUpICU+JSANCiAgc3VtbWFyaXNlKGhlYWRjb3VudCA9IG4oKSkgJT4lIA0KICBtdXRhdGUocGVyY2VudCA9IGhlYWRjb3VudC9zdW0oaGVhZGNvdW50LG5hLnJtID0gVCkgKiAxMDApICU+JQ0KICBhcnJhbmdlKGRlc2MocGVyY2VudCkpICU+JSANCiAgc3VtX3Jvd3MoKSAlPiUgDQogIG11dGF0ZShwZXJjZW50ID0gcGFzdGUocm91bmQocGVyY2VudCwyKSwiJSIpKQ0KDQpwcmludChkYXRhMV90YWJsZSkNCmBgYA0KDQojIyBMZXZlbCAzOg0KQ3JlYXRlIGEgZ3JhcGggKHN1Y2ggYXMgYSBiYXIgZ3JhcGgpIHRoYXQgZGlzcGxheXMgdGhlIE0gYW5kIFNEIGZvciAiR2VuZXJhbCBTdXBwb3J0IGZvciBBZmZpcm1hdGl2ZSBBY3Rpb24iICh2YXJpYWJsZSA9ICJBQSIpIGFjcm9zcyBpbmNvbWUgZ3JvdXBzLg0KDQpgYGB7cn0NCiNNIGZvciBtZWRpYW4NCmRhdGExLjUgPSBkYXRhMSAlPiUgDQogIGdyb3VwX2J5KEluY29tZSkgJT4lIA0KICBzdW1tYXJpc2UoTWVhbiA9IG1lYW4oQUEsbmEucm0gPSBUKSwNCiAgICAgICAgICAgIE1lZGlhbiA9IG1lZGlhbihBQSxuYS5ybT1UKSwNCiAgICAgICAgICAgIFNEID0gc2QoQUEsbmEucm09VCkpDQoNCmRhdGExLjUgJT4lIA0KICBnZ3Bsb3QoYWVzKHg9SW5jb21lLHk9TWVkaWFuKSkrDQogIGdlb21fY29sKGZpbGw9J2RhcmtibHVlJykrDQogIHRoZW1lX2J3KCkrDQogIGxhYnModGl0bGU9J01lZGlhbiBvZiBBQSBieSBJbmNvbWUnKSsNCiAgdGhlbWUocGxvdC50aXRsZSA9IGVsZW1lbnRfdGV4dChoanVzdCA9IC41KSkNCg0KZGF0YTEuNSAlPiUgDQogIGdncGxvdChhZXMoeD1JbmNvbWUseT1TRCkpKw0KICBnZW9tX2NvbChmaWxsPSdkYXJrcmVkJykrDQogIHRoZW1lX2J3KCkrDQogIGxhYnModGl0bGU9J1N0YW5kYXJkIERldmlhdGlvbiBvZiBBQSBieSBJbmNvbWUnKSsNCiAgdGhlbWUocGxvdC50aXRsZSA9IGVsZW1lbnRfdGV4dChoanVzdCA9IC41KSkNCg0KYGBgDQoNCg0KIyMgTGV2ZWwgNDoNClRlc3Qgd2hldGhlciAiR2VuZXJhbCBTdXBwb3J0IGZvciBBZmZpcm1hdGl2ZSBBY3Rpb24iICh2YXJpYWJsZSA9ICJBQSIpIGRpZmZlcnMgYWNyb3NzIGluY29tZSBncm91cC4NCg0KYGBge3J9DQpkYXRhMSAlPiUgDQogIGdncGxvdChhZXMoeD1JbmNvbWUseT1BQSkpKw0KICBnZW9tX2NvbChmaWxsPSdkYXJrZ3JlZW4nKSsNCiAgdGhlbWVfYncoKSsNCiAgbGFicyh4PSdJbmNvbWUgR3JvdXAnLHk9J0FmZmlybWF0aXZlIEFjdGlvbiBTdXBwb3J0Jyx0aXRsZT0iQWZmaXJtYXRpdmUgQWN0aW9uIEFjcm9zcyB0aGUgSW5jb21lLXZlcnNlIikrDQogIHRoZW1lKHBsb3QudGl0bGUgPSBlbGVtZW50X3RleHQoaGp1c3QgPSAuNSkpDQpgYGANCg0KYGBge3J9DQojbm9uLW5vcm1hbGl0eQ0Kc2hhcGlyby50ZXN0KGRhdGExJEluY29tZSkNCnNoYXBpcm8udGVzdChkYXRhMSRBQSkNCg0KaGlzdChkYXRhMSRJbmNvbWUpDQpoaXN0KGRhdGExJEFBKQ0KYGBgDQoNCmBgYHtyfQ0KI3llcywgc3VwcG9ydCBmb3IgQUEgZGlmZmVycyBhbW9uZyBpbmNvbWUgZ3JvdXANCndpbGNveC50ZXN0KGRhdGExJEluY29tZSxkYXRhMSRBQSkNCmBgYA0KDQoNCmBgYHtyIGluY2x1ZGU9Rn0NCiNiZWVwIHdoZW4gZG9uZQ0KaWYgKHJlcXVpcmUoImJlZXByIikpDQogIGJlZXByOjpiZWVwKDIpDQpgYGANCg==