Source

Model

Predictor variable \(Y = \textrm{Chance.to.Admit}\).

Data Analysis

We see that there are 400 datapoints, with 9 variables. Here we choose the predictor variable to be Chance.of.Admit.

grad = read.table("Admission_Predict.csv", header=TRUE, sep=",")
colnames(grad)
## [1] "Serial.No."        "GRE.Score"         "TOEFL.Score"      
## [4] "University.Rating" "SOP"               "LOR"              
## [7] "CGPA"              "Research"          "Chance.of.Admit"
nrow(grad)
## [1] 400
summary(grad[colnames(grad)])
##    Serial.No.      GRE.Score      TOEFL.Score    University.Rating
##  Min.   :  1.0   Min.   :290.0   Min.   : 92.0   Min.   :1.000    
##  1st Qu.:100.8   1st Qu.:308.0   1st Qu.:103.0   1st Qu.:2.000    
##  Median :200.5   Median :317.0   Median :107.0   Median :3.000    
##  Mean   :200.5   Mean   :316.8   Mean   :107.4   Mean   :3.087    
##  3rd Qu.:300.2   3rd Qu.:325.0   3rd Qu.:112.0   3rd Qu.:4.000    
##  Max.   :400.0   Max.   :340.0   Max.   :120.0   Max.   :5.000    
##       SOP           LOR             CGPA          Research     
##  Min.   :1.0   Min.   :1.000   Min.   :6.800   Min.   :0.0000  
##  1st Qu.:2.5   1st Qu.:3.000   1st Qu.:8.170   1st Qu.:0.0000  
##  Median :3.5   Median :3.500   Median :8.610   Median :1.0000  
##  Mean   :3.4   Mean   :3.453   Mean   :8.599   Mean   :0.5475  
##  3rd Qu.:4.0   3rd Qu.:4.000   3rd Qu.:9.062   3rd Qu.:1.0000  
##  Max.   :5.0   Max.   :5.000   Max.   :9.920   Max.   :1.0000  
##  Chance.of.Admit 
##  Min.   :0.3400  
##  1st Qu.:0.6400  
##  Median :0.7300  
##  Mean   :0.7244  
##  3rd Qu.:0.8300  
##  Max.   :0.9700
fit<-lm(grad$Chance.of.Admit~grad$Serial.No.+grad$GRE.Score+grad$TOEFL.Score+grad$University.Rating+grad$SOP+grad$LOR+grad$CGPA+grad$Research)
summary(fit)
## 
## Call:
## lm(formula = grad$Chance.of.Admit ~ grad$Serial.No. + grad$GRE.Score + 
##     grad$TOEFL.Score + grad$University.Rating + grad$SOP + grad$LOR + 
##     grad$CGPA + grad$Research)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.233576 -0.026637  0.006226  0.038273  0.140252 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            -1.294e+00  1.201e-01 -10.775  < 2e-16 ***
## grad$Serial.No.         1.593e-04  2.769e-05   5.753 1.77e-08 ***
## grad$GRE.Score          1.799e-03  5.749e-04   3.129 0.001885 ** 
## grad$TOEFL.Score        3.682e-03  1.056e-03   3.487 0.000543 ***
## grad$University.Rating  8.785e-03  4.617e-03   1.903 0.057821 .  
## grad$SOP                9.937e-05  5.380e-03   0.018 0.985272    
## grad$LOR                2.154e-02  5.330e-03   4.041 6.41e-05 ***
## grad$CGPA               1.053e-01  1.198e-02   8.786  < 2e-16 ***
## grad$Research           2.438e-02  7.653e-03   3.185 0.001561 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.06132 on 391 degrees of freedom
## Multiple R-squared:  0.8188, Adjusted R-squared:  0.8151 
## F-statistic: 220.9 on 8 and 391 DF,  p-value: < 2.2e-16
pairs(grad)

plot(grad$GRE.Score~grad$TOEFL.Score)

fit.resid = resid(fit)
plot(grad$Chance.of.Admit~fit.resid)
abline(0.5, 0)