suppressPackageStartupMessages(library("dplyr"))
suppressPackageStartupMessages(library("ggplot2"))

donations = read.csv("../data/donations.csv", header=TRUE)
people = read.csv("../data/people.csv", header=TRUE)
teams = read.csv("../data/teams.csv", header=TRUE)

donations <- filter(donations, eid == 24690)
people <- filter(people, eid == 24690)
teams <- filter(teams, eid == 24690)

donations_grouped_by_person <- donations %>% group_by(fcid) %>% summarise(raised = sum(amount), num_donations = length(amount))
family_donations_grouped_by_person <- donations %>% filter(family_donation == TRUE) %>% group_by(fcid) %>% summarise(raised_family = sum(amount))

people <- merge(people, donations_grouped_by_person, by=c("fcid"), all.x = TRUE)
people <- merge(people, family_donations_grouped_by_person, by=c("fcid"), all.x = TRUE)
people <- merge(people, teams[,c("ftid", "greek")], by=c("ftid"), all.x = TRUE)
donations <- merge(donations, teams[,c("ftid", "greek")], by=c("ftid"), all.x=TRUE)

rm(donations_grouped_by_person)
rm(family_donations_grouped_by_person)

donations <- donations[ which(donations$fdid!=1987417), ] # remove anomalous 1.3 million dollar donation

people_who_raised_money = people[c(!is.na(people$raised)),]
donations_to_individual = filter(donations, !is.na(fcid))
donations_to_team = filter(donations, is.na(fcid))

Donations

nrow(donations)
## [1] 10069
summary(donations$amount)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.65   25.00   50.00   77.23  100.00 3000.00

Donations to Individuals

nrow(donations_to_individual)
## [1] 10062
summary(donations_to_individual$amount)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.65   25.00   50.00   77.20  100.00 3000.00

Donations to Teams

nrow(donations_to_team)
## [1] 7
summary(donations_to_team$amount)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    20.0    48.5    79.0   123.7   160.0   350.0

Differences between Greek Donations vs. Non-Greek Donations

greek_donations <- filter(donations_to_individual, greek == TRUE)
nongreek_donations <- filter(donations_to_individual, greek == FALSE)
unknown_donations <- filter(donations_to_individual, is.na(greek))
nrow(greek_donations); nrow(nongreek_donations); nrow(unknown_donations)
## [1] 5754
## [1] 2939
## [1] 1369
t.test(greek_donations$amount, nongreek_donations$amount)
## 
##  Welch Two Sample t-test
## 
## data:  greek_donations$amount and nongreek_donations$amount
## t = 7.0128, df = 8091.048, p-value = 2.524e-12
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   9.998532 17.756902
## sample estimates:
## mean of x mean of y 
##  78.74800  64.87028

Differences between Family Donations vs. Non-Family Donations

family_donations <- filter(donations_to_individual, family_donation == TRUE)
nonfamily_donations <- filter(donations_to_individual, family_donation == FALSE)
nrow(family_donations); nrow(nonfamily_donations)
## [1] 2183
## [1] 7879
t.test(family_donations$amount, nonfamily_donations$amount)
## 
##  Welch Two Sample t-test
## 
## data:  family_donations$amount and nonfamily_donations$amount
## t = 11.739, df = 2903.086, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  31.32445 43.88711
## sample estimates:
## mean of x mean of y 
## 106.64899  69.04321

How many people got a single donation of >= 100 dollars?

length(unique(filter(donations_to_individual, amount >= 100)$fcid)) / nrow(people_who_raised_money)
## [1] 0.8449235

How many people got a single donation from their family of >= 100 dollars?

length(unique(filter(donations_to_individual, amount >= 100, family_donation == TRUE)$fcid)) / nrow(people_who_raised_money)
## [1] 0.511822

How many people raised >= 100 dollars from their family’s donations?

nrow(filter(people_who_raised_money, raised_family >= 100)) / nrow(people_who_raised_money)
## [1] 0.5674548
# How many students in greek teams, non-greek teams, and no team?
nrow(filter(people_who_raised_money, greek == TRUE)) / nrow(people_who_raised_money)
## [1] 0.5445063
nrow(filter(people_who_raised_money, greek == FALSE)) / nrow(people_who_raised_money)
## [1] 0.3344924
nrow(filter(people_who_raised_money, is.na(greek))) / nrow(people_who_raised_money)
## [1] 0.1210014
# Out of the people who raised >= 100 dollars from their family, how many were greek? how many weren't?
nrow(filter(people_who_raised_money, raised_family >= 100, greek == TRUE)) / nrow(filter(people_who_raised_money, raised_family >= 100))
## [1] 0.5894608
nrow(filter(people_who_raised_money, raised_family >= 100, greek == FALSE)) / nrow(filter(people_who_raised_money, raised_family >= 100))
## [1] 0.2830882
nrow(filter(people_who_raised_money, raised_family >= 100, is.na(greek))) / nrow(filter(people_who_raised_money, raised_family >= 100))
## [1] 0.127451
model1 <- lm(raised ~ greek, people_who_raised_money)
summary(model1)
## 
## Call:
## lm(formula = raised ~ greek, data = people_who_raised_money)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -574.8 -175.8  -75.8   59.7 8904.2 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   390.32      23.40  16.683  < 2e-16 ***
## greekTRUE     185.43      29.73   6.238 6.05e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 513.1 on 1262 degrees of freedom
##   (174 observations deleted due to missingness)
## Multiple R-squared:  0.02991,    Adjusted R-squared:  0.02914 
## F-statistic: 38.91 on 1 and 1262 DF,  p-value: 6.049e-10
model2 <- lm(raised_family ~ greek, people_who_raised_money)
summary(model2)
## 
## Call:
## lm(formula = raised_family ~ greek, data = people_who_raised_money)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -228.66 -129.66  -35.02   70.34 1270.34 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   185.02      10.46  17.692  < 2e-16 ***
## greekTRUE      44.64      12.80   3.487 0.000512 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 179.6 on 885 degrees of freedom
##   (551 observations deleted due to missingness)
## Multiple R-squared:  0.01356,    Adjusted R-squared:  0.01244 
## F-statistic: 12.16 on 1 and 885 DF,  p-value: 0.0005119