Amrin Shaikh(S3654729)
Last updated: 23 August, 2017
Student_d1 <- read_delim("D:/Intro to Statistics/Assignment 4/student-mat.csv",
";", escape_double = FALSE, trim_ws = TRUE)
print(nrow(Student_d1))## [1] 395
## # A tibble: 2 × 10
## sex Min Q1 Median Q3 Max Mean SD n Missing
## <chr> <int> <dbl> <dbl> <dbl> <int> <dbl> <dbl> <int> <int>
## 1 F 0 8 10 13 19 9.966346 4.622338 208 0
## 2 M 0 9 11 14 20 10.914439 4.495297 187 0
## # A tibble: 2 × 10
## sex Min Q1 Median Q3 Max Mean SD n Missing
## <chr> <int> <dbl> <dbl> <dbl> <int> <dbl> <dbl> <int> <int>
## 1 F 4 8 10 13 19 10.62019 3.232530 208 0
## 2 M 3 9 11 14 19 11.22995 3.392839 187 0
#BoxPlot and summary statistics for sex vs G3 Score
Student_d1 %>% boxplot(G3 ~ sex, data = ., ylab = "Grade3 Score", xlab="Sex")Student_d1$Result[Student_d1$G3<5] <- "Very Poor"
Student_d1$Result[Student_d1$G3>=5 & Student_d1$G3<=10] <- "Poor"
Student_d1$Result[Student_d1$G3>10 & Student_d1$G3<=15] <- "Good"
Student_d1$Result[Student_d1$G3>15 & Student_d1$G3<=20] <- "Very Good"
table(Student_d1$Result, Student_d1$sex)##
## F M
## Good 87 82
## Poor 81 66
## Very Good 16 24
## Very Poor 24 15
Student_d1$Result <- Student_d1$Result %>% fct_relevel('Very Good','Good','Poor','Very Poor')
table(Student_d1$Result, Student_d1$sex)##
## F M
## Very Good 16 24
## Good 87 82
## Poor 81 66
## Very Poor 24 15
tb <- table(Student_d1$Result,Student_d1$sex) %>% addmargins()
names(attributes(tb)$dimnames) <- c("Result","sex")
kable(tb,padding=0,format="html")| F | M | Sum | |
|---|---|---|---|
| Very Good | 16 | 24 | 40 |
| Good | 87 | 82 | 169 |
| Poor | 81 | 66 | 147 |
| Very Poor | 24 | 15 | 39 |
| Sum | 208 | 187 | 395 |
tb2 <- table(Student_d1$Result,Student_d1$sex) %>% prop.table(margin=1) %>% round(2)
names(attributes(tb2)$dimnames) <- c("Result","sex")
kable(tb2,padding=0,format = "html")| F | M | |
|---|---|---|
| Very Good | 0.40 | 0.60 |
| Good | 0.51 | 0.49 |
| Poor | 0.55 | 0.45 |
| Very Poor | 0.62 | 0.38 |
barplot(tb2,ylab="Proportion Within Result",
ylim=c(0,1),legend=rownames(tb2),beside=TRUE,
args.legend=c(x = "top",horiz=TRUE,title="Result category"),
xlab="Sex", col = c("Red","Yellow","orange","Green"))chi2 <- chisq.test(table(Student_d1$Result, Student_d1$sex)) #Chi-square test between Result and Sex
chi2##
## Pearson's Chi-squared test
##
## data: table(Student_d1$Result, Student_d1$sex)
## X-squared = 4.251, df = 3, p-value = 0.2356
pchisq(q = 3.8623,df = 3,lower.tail = FALSE) #Pvalue finding with q value found from X2 value## [1] 0.2767223
qchisq(p = .95,df = 3) # Critical value identification## [1] 7.814728
chi2$observed %>% addmargins() ##
## F M Sum
## Very Good 16 24 40
## Good 87 82 169
## Poor 81 66 147
## Very Poor 24 15 39
## Sum 208 187 395
chi2$expected %>% addmargins() %>% round(2)##
## F M Sum
## Very Good 21.06 18.94 40
## Good 88.99 80.01 169
## Poor 77.41 69.59 147
## Very Poor 20.54 18.46 39
## Sum 208.00 187.00 395
chi2$observed - chi2$expected %>% round(2)##
## F M
## Very Good -5.06 5.06
## Good -1.99 1.99
## Poor 3.59 -3.59
## Very Poor 3.46 -3.46
chi2$stdres##
## F M
## Very Good -1.6913438 1.6913438
## Good -0.4058105 0.4058105
## Poor 0.7489347 -0.7489347
## Very Poor 1.1699704 -1.1699704
#Scatter plot for G1 and G3 scores
plot(G3~ G1, data =Student_d1, xlab = "G1 Score", ylab = "G3 Score")#Correlation between G1 and G3 scores
cor(Student_d1$G1,Student_d1$G3,use = "complete.obs")## [1] 0.8014679
#Full corelation analysis
bivariate<-as.matrix(dplyr::select(Student_d1, G1,G3)) #Create a matrix of the variables to be correlated
rcorr(bivariate, type = "pearson")## G1 G3
## G1 1.0 0.8
## G3 0.8 1.0
##
## n= 395
##
##
## P
## G1 G3
## G1 0
## G3 0
#The confidence interval does not capture 0 values
r=cor(Student_d1$G1,Student_d1$G3)
CIr(r=r,n=395,level=.95)## [1] 0.7631479 0.8341713