Install packages from CRAN; use nay USA mirror

library(lattice)

Read file csv ‘contribution.csv’ and view 6 the first rows

don <- read.csv("C:/Users/dailo/Downloads/contribution.csv", header = T)
head(don)
##   Gender Class.Year Marital.Status       Major Next.Degree FY04Giving
## 1      M       1957              M     History         LLB       2500
## 2      M       1957              M     Physics          MS       5000
## 3      F       1957              M       Music        NONE       5000
## 4      M       1957              M     History        NONE          0
## 5      M       1957              M     Biology          MD       1000
## 6      F       1957              M Mathematics        NONE          0
##   FY03Giving FY02Giving FY01Giving FY00Giving AttendenceEvent
## 1       2500       1400      12060      12000               1
## 2       5000       5000       5000      10000               1
## 3       5000       5000       5000      10000               1
## 4       5100        200        200          0               1
## 5       1000       1000       1005       1000               1
## 6          0          0          0          0               0

See the count of each data in the variable Class.Year

table(don$Class.Year)
## 
## 1957 1967 1977 1987 1997 
##  127  222  243  277  361

Plot the frequency of the years

barchart(table(don$Class.Year),horizontal=FALSE, xlab="Class Year",col="orange")

Calculate the total amount of donations 5 years (2000-2004)

don$TGiving=don$FY00Giving+don$FY01Giving+don$FY02Givin+don$FY03Giving+don$FY04Giving
mean(don$TGiving)
## [1] 980.0436
sd(don$TGiving)
## [1] 6670.773

Function is used to calculate sample quantiles of a dataset

quantile(don$TGiving,probs=seq(0,1,0.05))
##       0%       5%      10%      15%      20%      25%      30%      35% 
##      0.0      0.0      0.0      0.0      0.0      0.0      0.0     10.0 
##      40%      45%      50%      55%      60%      65%      70%      75% 
##     25.0     50.0     75.0    100.0    150.8    200.0    275.0    400.0 
##      80%      85%      90%      95%     100% 
##    554.2    781.0   1050.0   2277.5 171870.1
quantile(don$TGiving,probs=seq(0.95,1,0.01))
##       95%       96%       97%       98%       99%      100% 
##   2277.50   3133.56   5000.00   7000.00  16442.14 171870.06
hist(don$TGiving, col = '3')

hist(don$TGiving[don$TGiving!=0][don$TGiving[don$TGiving!=0]<=1000], col = '7')

Data clustering

boxplot(TGiving~Class.Year,data=don,outline=FALSE)

boxplot(TGiving~Gender,data=don,outline=FALSE)

boxplot(TGiving~Marital.Status,data=don,outline=FALSE)

boxplot(TGiving~AttendenceEvent,data=don,outline=FALSE)