suppressPackageStartupMessages(library("dplyr"))
suppressPackageStartupMessages(library("ggplot2"))

donations = read.csv("../data/donations.csv", header=TRUE)
people = read.csv("../data/people.csv", header=TRUE)
teams = read.csv("../data/teams.csv", header=TRUE)

donations <- filter(donations, eid == 37411)
people <- filter(people, eid == 37411)
teams <- filter(teams, eid == 37411)

donations_grouped_by_person <- donations %>% group_by(fcid) %>% summarise(raised = sum(amount), num_donations = length(amount))
family_donations_grouped_by_person <- donations %>% filter(family_donation == TRUE) %>% group_by(fcid) %>% summarise(raised_family = sum(amount))

people <- merge(people, donations_grouped_by_person, by=c("fcid"), all.x = TRUE)
people <- merge(people, family_donations_grouped_by_person, by=c("fcid"), all.x = TRUE)
people <- merge(people, teams[,c("ftid", "greek")], by=c("ftid"), all.x = TRUE)
donations <- merge(donations, teams[,c("ftid", "greek")], by=c("ftid"), all.x=TRUE)

rm(donations_grouped_by_person)
rm(family_donations_grouped_by_person)

donations <- donations[ which(donations$fdid!=2844635), ] # remove outlier $19919.34 donation from ZBT General

people_who_raised_money = people[c(!is.na(people$raised)),]
donations_to_individual = filter(donations, !is.na(fcid))
donations_to_team = filter(donations, is.na(fcid))

Donations

nrow(donations)
## [1] 9768
summary(donations$amount)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   25.00   50.00   74.38  100.00 5000.00

Donations to Individuals

nrow(donations_to_individual)
## [1] 9631
summary(donations_to_individual$amount)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0    25.0    50.0    73.8   100.0  5000.0

Donations to Teams

nrow(donations_to_team)
## [1] 137
summary(donations_to_team$amount)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   26.12   51.94  115.50  103.60 1391.00

Differences between Greek Donations vs. Non-Greek Donations

greek_donations <- filter(donations_to_individual, greek == TRUE)
nongreek_donations <- filter(donations_to_individual, greek == FALSE)
unknown_donations <- filter(donations_to_individual, is.na(greek))
nrow(greek_donations); nrow(nongreek_donations); nrow(unknown_donations)
## [1] 5832
## [1] 2123
## [1] 1676
t.test(greek_donations$amount, nongreek_donations$amount)
## 
##  Welch Two Sample t-test
## 
## data:  greek_donations$amount and nongreek_donations$amount
## t = 4.7696, df = 4637.13, p-value = 1.902e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   6.17256 14.78806
## sample estimates:
## mean of x mean of y 
##  74.94323  64.46292

Differences between Family Donations vs. Non-Family Donations

family_donations <- filter(donations_to_individual, family_donation == TRUE)
nonfamily_donations <- filter(donations_to_individual, family_donation == FALSE)
nrow(family_donations); nrow(nonfamily_donations)
## [1] 2030
## [1] 7601
t.test(family_donations$amount, nonfamily_donations$amount)
## 
##  Welch Two Sample t-test
## 
## data:  family_donations$amount and nonfamily_donations$amount
## t = 13.0281, df = 2308.158, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  44.12920 59.76779
## sample estimates:
## mean of x mean of y 
## 114.79761  62.84911

How many people got a single donation of >= 100 dollars?

length(unique(filter(donations_to_individual, amount >= 100)$fcid)) / nrow(people_who_raised_money)
## [1] 0.8647541

How many people got a single donation from their family of >= 100 dollars?

length(unique(filter(donations_to_individual, amount >= 100, family_donation == TRUE)$fcid)) / nrow(people_who_raised_money)
## [1] 0.554918

How many people raised >= 100 dollars from their family’s donations?

nrow(filter(people_who_raised_money, raised_family >= 100)) / nrow(people_who_raised_money)
## [1] 0.6229508
# How many students in greek teams, non-greek teams, and no team?
nrow(filter(people_who_raised_money, greek == TRUE)) / nrow(people_who_raised_money)
## [1] 0.5934426
nrow(filter(people_who_raised_money, greek == FALSE)) / nrow(people_who_raised_money)
## [1] 0.2418033
nrow(filter(people_who_raised_money, is.na(greek))) / nrow(people_who_raised_money)
## [1] 0.1647541
# Out of the people who raised >= 100 dollars from their family, how many were greek? how many weren't?
nrow(filter(people_who_raised_money, raised_family >= 100, greek == TRUE)) / nrow(filter(people_who_raised_money, raised_family >= 100))
## [1] 0.6565789
nrow(filter(people_who_raised_money, raised_family >= 100, greek == FALSE)) / nrow(filter(people_who_raised_money, raised_family >= 100))
## [1] 0.1960526
nrow(filter(people_who_raised_money, raised_family >= 100, is.na(greek))) / nrow(filter(people_who_raised_money, raised_family >= 100))
## [1] 0.1473684
model1 <- lm(raised ~ greek, people_who_raised_money)
summary(model1)
## 
## Call:
## lm(formula = raised ~ greek, data = people_who_raised_money)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
##  -620.3  -218.5  -103.3    67.6 10525.6 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   416.47      32.55  12.793  < 2e-16 ***
## greekTRUE     204.79      38.62   5.303  1.4e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 559.1 on 1017 degrees of freedom
##   (201 observations deleted due to missingness)
## Multiple R-squared:  0.0269, Adjusted R-squared:  0.02595 
## F-statistic: 28.12 on 1 and 1017 DF,  p-value: 1.4e-07
model2 <- lm(raised_family ~ greek, people_who_raised_money)
summary(model2)
## 
## Call:
## lm(formula = raised_family ~ greek, data = people_who_raised_money)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -253.09 -156.21  -55.84   59.50 2289.91 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   211.75      17.09  12.392   <2e-16 ***
## greekTRUE      48.34      19.69   2.455   0.0143 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 236.8 on 776 degrees of freedom
##   (442 observations deleted due to missingness)
## Multiple R-squared:  0.00771,    Adjusted R-squared:  0.006431 
## F-statistic: 6.029 on 1 and 776 DF,  p-value: 0.01429