suppressPackageStartupMessages(library("dplyr"))
suppressPackageStartupMessages(library("ggplot2"))
donations = read.csv("../data/donations.csv", header=TRUE)
people = read.csv("../data/people.csv", header=TRUE)
teams = read.csv("../data/teams.csv", header=TRUE)
donations <- filter(donations, eid == 37411)
people <- filter(people, eid == 37411)
teams <- filter(teams, eid == 37411)
donations_grouped_by_person <- donations %>% group_by(fcid) %>% summarise(raised = sum(amount), num_donations = length(amount))
family_donations_grouped_by_person <- donations %>% filter(family_donation == TRUE) %>% group_by(fcid) %>% summarise(raised_family = sum(amount))
people <- merge(people, donations_grouped_by_person, by=c("fcid"), all.x = TRUE)
people <- merge(people, family_donations_grouped_by_person, by=c("fcid"), all.x = TRUE)
people <- merge(people, teams[,c("ftid", "greek")], by=c("ftid"), all.x = TRUE)
donations <- merge(donations, teams[,c("ftid", "greek")], by=c("ftid"), all.x=TRUE)
rm(donations_grouped_by_person)
rm(family_donations_grouped_by_person)
donations <- donations[ which(donations$fdid!=2844635), ] # remove outlier $19919.34 donation from ZBT General
people_who_raised_money = people[c(!is.na(people$raised)),]
donations_to_individual = filter(donations, !is.na(fcid))
donations_to_team = filter(donations, is.na(fcid))
Donations
nrow(donations)
## [1] 9768
summary(donations$amount)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 25.00 50.00 74.38 100.00 5000.00
Donations to Individuals
nrow(donations_to_individual)
## [1] 9631
summary(donations_to_individual$amount)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 25.0 50.0 73.8 100.0 5000.0
Donations to Teams
nrow(donations_to_team)
## [1] 137
summary(donations_to_team$amount)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 26.12 51.94 115.50 103.60 1391.00
Differences between Greek Donations vs. Non-Greek Donations
greek_donations <- filter(donations_to_individual, greek == TRUE)
nongreek_donations <- filter(donations_to_individual, greek == FALSE)
unknown_donations <- filter(donations_to_individual, is.na(greek))
nrow(greek_donations); nrow(nongreek_donations); nrow(unknown_donations)
## [1] 5832
## [1] 2123
## [1] 1676
t.test(greek_donations$amount, nongreek_donations$amount)
##
## Welch Two Sample t-test
##
## data: greek_donations$amount and nongreek_donations$amount
## t = 4.7696, df = 4637.13, p-value = 1.902e-06
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 6.17256 14.78806
## sample estimates:
## mean of x mean of y
## 74.94323 64.46292
Differences between Family Donations vs. Non-Family Donations
family_donations <- filter(donations_to_individual, family_donation == TRUE)
nonfamily_donations <- filter(donations_to_individual, family_donation == FALSE)
nrow(family_donations); nrow(nonfamily_donations)
## [1] 2030
## [1] 7601
t.test(family_donations$amount, nonfamily_donations$amount)
##
## Welch Two Sample t-test
##
## data: family_donations$amount and nonfamily_donations$amount
## t = 13.0281, df = 2308.158, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 44.12920 59.76779
## sample estimates:
## mean of x mean of y
## 114.79761 62.84911
How many people got a single donation of >= 100 dollars?
length(unique(filter(donations_to_individual, amount >= 100)$fcid)) / nrow(people_who_raised_money)
## [1] 0.8647541
How many people got a single donation from their family of >= 100 dollars?
length(unique(filter(donations_to_individual, amount >= 100, family_donation == TRUE)$fcid)) / nrow(people_who_raised_money)
## [1] 0.554918
How many people raised >= 100 dollars from their family’s donations?
nrow(filter(people_who_raised_money, raised_family >= 100)) / nrow(people_who_raised_money)
## [1] 0.6229508
# How many students in greek teams, non-greek teams, and no team?
nrow(filter(people_who_raised_money, greek == TRUE)) / nrow(people_who_raised_money)
## [1] 0.5934426
nrow(filter(people_who_raised_money, greek == FALSE)) / nrow(people_who_raised_money)
## [1] 0.2418033
nrow(filter(people_who_raised_money, is.na(greek))) / nrow(people_who_raised_money)
## [1] 0.1647541
# Out of the people who raised >= 100 dollars from their family, how many were greek? how many weren't?
nrow(filter(people_who_raised_money, raised_family >= 100, greek == TRUE)) / nrow(filter(people_who_raised_money, raised_family >= 100))
## [1] 0.6565789
nrow(filter(people_who_raised_money, raised_family >= 100, greek == FALSE)) / nrow(filter(people_who_raised_money, raised_family >= 100))
## [1] 0.1960526
nrow(filter(people_who_raised_money, raised_family >= 100, is.na(greek))) / nrow(filter(people_who_raised_money, raised_family >= 100))
## [1] 0.1473684
model1 <- lm(raised ~ greek, people_who_raised_money)
summary(model1)
##
## Call:
## lm(formula = raised ~ greek, data = people_who_raised_money)
##
## Residuals:
## Min 1Q Median 3Q Max
## -620.3 -218.5 -103.3 67.6 10525.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 416.47 32.55 12.793 < 2e-16 ***
## greekTRUE 204.79 38.62 5.303 1.4e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 559.1 on 1017 degrees of freedom
## (201 observations deleted due to missingness)
## Multiple R-squared: 0.0269, Adjusted R-squared: 0.02595
## F-statistic: 28.12 on 1 and 1017 DF, p-value: 1.4e-07
model2 <- lm(raised_family ~ greek, people_who_raised_money)
summary(model2)
##
## Call:
## lm(formula = raised_family ~ greek, data = people_who_raised_money)
##
## Residuals:
## Min 1Q Median 3Q Max
## -253.09 -156.21 -55.84 59.50 2289.91
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 211.75 17.09 12.392 <2e-16 ***
## greekTRUE 48.34 19.69 2.455 0.0143 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 236.8 on 776 degrees of freedom
## (442 observations deleted due to missingness)
## Multiple R-squared: 0.00771, Adjusted R-squared: 0.006431
## F-statistic: 6.029 on 1 and 776 DF, p-value: 0.01429