Install packages from CRAN; use nay USA mirror
library(lattice)
Read file csv ‘contribution.csv’ and view 6 the first rows
don <- read.csv("C:/Users/dailo/Downloads/contribution.csv", header = T)
head(don)
## Gender Class.Year Marital.Status Major Next.Degree FY04Giving
## 1 M 1957 M History LLB 2500
## 2 M 1957 M Physics MS 5000
## 3 F 1957 M Music NONE 5000
## 4 M 1957 M History NONE 0
## 5 M 1957 M Biology MD 1000
## 6 F 1957 M Mathematics NONE 0
## FY03Giving FY02Giving FY01Giving FY00Giving AttendenceEvent
## 1 2500 1400 12060 12000 1
## 2 5000 5000 5000 10000 1
## 3 5000 5000 5000 10000 1
## 4 5100 200 200 0 1
## 5 1000 1000 1005 1000 1
## 6 0 0 0 0 0
See the count of each data in the variable Class.Year
table(don$Class.Year)
##
## 1957 1967 1977 1987 1997
## 127 222 243 277 361
Plot the frequency of the years
barchart(table(don$Class.Year),horizontal=FALSE, xlab="Class Year",col="orange")

Calculate the total amount of donations 5 years (2000-2004)
don$TGiving=don$FY00Giving+don$FY01Giving+don$FY02Givin+don$FY03Giving+don$FY04Giving
mean(don$TGiving)
## [1] 980.0436
sd(don$TGiving)
## [1] 6670.773
Function is used to calculate sample quantiles of a dataset
quantile(don$TGiving,probs=seq(0,1,0.05))
## 0% 5% 10% 15% 20% 25% 30% 35%
## 0.0 0.0 0.0 0.0 0.0 0.0 0.0 10.0
## 40% 45% 50% 55% 60% 65% 70% 75%
## 25.0 50.0 75.0 100.0 150.8 200.0 275.0 400.0
## 80% 85% 90% 95% 100%
## 554.2 781.0 1050.0 2277.5 171870.1
quantile(don$TGiving,probs=seq(0.95,1,0.01))
## 95% 96% 97% 98% 99% 100%
## 2277.50 3133.56 5000.00 7000.00 16442.14 171870.06
hist(don$TGiving, col = '3')

hist(don$TGiving[don$TGiving!=0][don$TGiving[don$TGiving!=0]<=1000], col = '7')

Data clustering
boxplot(TGiving~Class.Year,data=don,outline=FALSE)

boxplot(TGiving~Gender,data=don,outline=FALSE)

boxplot(TGiving~Marital.Status,data=don,outline=FALSE)

boxplot(TGiving~AttendenceEvent,data=don,outline=FALSE)
