suppressPackageStartupMessages(library("dplyr"))
suppressPackageStartupMessages(library("ggplot2"))
donations = read.csv("../data/donations.csv", header=TRUE)
people = read.csv("../data/people.csv", header=TRUE)
teams = read.csv("../data/teams.csv", header=TRUE)
donations <- filter(donations, eid == 24690)
people <- filter(people, eid == 24690)
teams <- filter(teams, eid == 24690)
donations_grouped_by_person <- donations %>% group_by(fcid) %>% summarise(raised = sum(amount), num_donations = length(amount))
family_donations_grouped_by_person <- donations %>% filter(family_donation == TRUE) %>% group_by(fcid) %>% summarise(raised_family = sum(amount))
people <- merge(people, donations_grouped_by_person, by=c("fcid"), all.x = TRUE)
people <- merge(people, family_donations_grouped_by_person, by=c("fcid"), all.x = TRUE)
people <- merge(people, teams[,c("ftid", "greek")], by=c("ftid"), all.x = TRUE)
donations <- merge(donations, teams[,c("ftid", "greek")], by=c("ftid"), all.x=TRUE)
rm(donations_grouped_by_person)
rm(family_donations_grouped_by_person)
donations <- donations[ which(donations$fdid!=1987417), ] # remove anomalous 1.3 million dollar donation
people_who_raised_money = people[c(!is.na(people$raised)),]
donations_to_individual = filter(donations, !is.na(fcid))
donations_to_team = filter(donations, is.na(fcid))
Donations
nrow(donations)
## [1] 10069
summary(donations$amount)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.65 25.00 50.00 77.23 100.00 3000.00
Donations to Individuals
nrow(donations_to_individual)
## [1] 10062
summary(donations_to_individual$amount)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.65 25.00 50.00 77.20 100.00 3000.00
Donations to Teams
nrow(donations_to_team)
## [1] 7
summary(donations_to_team$amount)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 20.0 48.5 79.0 123.7 160.0 350.0
Differences between Greek Donations vs. Non-Greek Donations
greek_donations <- filter(donations_to_individual, greek == TRUE)
nongreek_donations <- filter(donations_to_individual, greek == FALSE)
unknown_donations <- filter(donations_to_individual, is.na(greek))
nrow(greek_donations); nrow(nongreek_donations); nrow(unknown_donations)
## [1] 5754
## [1] 2939
## [1] 1369
t.test(greek_donations$amount, nongreek_donations$amount)
##
## Welch Two Sample t-test
##
## data: greek_donations$amount and nongreek_donations$amount
## t = 7.0128, df = 8091.048, p-value = 2.524e-12
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 9.998532 17.756902
## sample estimates:
## mean of x mean of y
## 78.74800 64.87028
Differences between Family Donations vs. Non-Family Donations
family_donations <- filter(donations_to_individual, family_donation == TRUE)
nonfamily_donations <- filter(donations_to_individual, family_donation == FALSE)
nrow(family_donations); nrow(nonfamily_donations)
## [1] 2183
## [1] 7879
t.test(family_donations$amount, nonfamily_donations$amount)
##
## Welch Two Sample t-test
##
## data: family_donations$amount and nonfamily_donations$amount
## t = 11.739, df = 2903.086, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 31.32445 43.88711
## sample estimates:
## mean of x mean of y
## 106.64899 69.04321
How many people got a single donation of >= 100 dollars?
length(unique(filter(donations_to_individual, amount >= 100)$fcid)) / nrow(people_who_raised_money)
## [1] 0.8449235
How many people got a single donation from their family of >= 100 dollars?
length(unique(filter(donations_to_individual, amount >= 100, family_donation == TRUE)$fcid)) / nrow(people_who_raised_money)
## [1] 0.511822
How many people raised >= 100 dollars from their family’s donations?
nrow(filter(people_who_raised_money, raised_family >= 100)) / nrow(people_who_raised_money)
## [1] 0.5674548
# How many students in greek teams, non-greek teams, and no team?
nrow(filter(people_who_raised_money, greek == TRUE)) / nrow(people_who_raised_money)
## [1] 0.5445063
nrow(filter(people_who_raised_money, greek == FALSE)) / nrow(people_who_raised_money)
## [1] 0.3344924
nrow(filter(people_who_raised_money, is.na(greek))) / nrow(people_who_raised_money)
## [1] 0.1210014
# Out of the people who raised >= 100 dollars from their family, how many were greek? how many weren't?
nrow(filter(people_who_raised_money, raised_family >= 100, greek == TRUE)) / nrow(filter(people_who_raised_money, raised_family >= 100))
## [1] 0.5894608
nrow(filter(people_who_raised_money, raised_family >= 100, greek == FALSE)) / nrow(filter(people_who_raised_money, raised_family >= 100))
## [1] 0.2830882
nrow(filter(people_who_raised_money, raised_family >= 100, is.na(greek))) / nrow(filter(people_who_raised_money, raised_family >= 100))
## [1] 0.127451
model1 <- lm(raised ~ greek, people_who_raised_money)
summary(model1)
##
## Call:
## lm(formula = raised ~ greek, data = people_who_raised_money)
##
## Residuals:
## Min 1Q Median 3Q Max
## -574.8 -175.8 -75.8 59.7 8904.2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 390.32 23.40 16.683 < 2e-16 ***
## greekTRUE 185.43 29.73 6.238 6.05e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 513.1 on 1262 degrees of freedom
## (174 observations deleted due to missingness)
## Multiple R-squared: 0.02991, Adjusted R-squared: 0.02914
## F-statistic: 38.91 on 1 and 1262 DF, p-value: 6.049e-10
model2 <- lm(raised_family ~ greek, people_who_raised_money)
summary(model2)
##
## Call:
## lm(formula = raised_family ~ greek, data = people_who_raised_money)
##
## Residuals:
## Min 1Q Median 3Q Max
## -228.66 -129.66 -35.02 70.34 1270.34
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 185.02 10.46 17.692 < 2e-16 ***
## greekTRUE 44.64 12.80 3.487 0.000512 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 179.6 on 885 degrees of freedom
## (551 observations deleted due to missingness)
## Multiple R-squared: 0.01356, Adjusted R-squared: 0.01244
## F-statistic: 12.16 on 1 and 885 DF, p-value: 0.0005119