Reading data through CSV file

mbaa.df <- read.csv(paste("Deans Dilemma.csv", sep=""))

Getting summary stats:

summary(mbaa.df)
##       SlNo       Gender     Gender.B       Percent_SSC     Board_SSC  
##  Min.   :  1.0   F:127   Min.   :0.0000   Min.   :37.00   CBSE  :113  
##  1st Qu.: 98.5   M:264   1st Qu.:0.0000   1st Qu.:56.00   ICSE  : 77  
##  Median :196.0           Median :0.0000   Median :64.50   Others:201  
##  Mean   :196.0           Mean   :0.3248   Mean   :64.65               
##  3rd Qu.:293.5           3rd Qu.:1.0000   3rd Qu.:74.00               
##  Max.   :391.0           Max.   :1.0000   Max.   :87.20               
##                                                                       
##    Board_CBSE      Board_ICSE      Percent_HSC    Board_HSC  
##  Min.   :0.000   Min.   :0.0000   Min.   :40.0   CBSE  : 96  
##  1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:54.0   ISC   : 48  
##  Median :0.000   Median :0.0000   Median :63.0   Others:247  
##  Mean   :0.289   Mean   :0.1969   Mean   :63.8               
##  3rd Qu.:1.000   3rd Qu.:0.0000   3rd Qu.:72.0               
##  Max.   :1.000   Max.   :1.0000   Max.   :94.7               
##                                                              
##     Stream_HSC  Percent_Degree                Course_Degree
##  Arts    : 18   Min.   :35.00   Arts                 : 13  
##  Commerce:222   1st Qu.:57.52   Commerce             :117  
##  Science :151   Median :63.00   Computer Applications: 32  
##                 Mean   :62.98   Engineering          : 37  
##                 3rd Qu.:69.00   Management           :163  
##                 Max.   :89.00   Others               :  5  
##                                 Science              : 24  
##   Degree_Engg      Experience_Yrs   Entrance_Test     S.TEST      
##  Min.   :0.00000   Min.   :0.0000   MAT    :265   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.0000   None   : 67   1st Qu.:1.0000  
##  Median :0.00000   Median :0.0000   K-MAT  : 24   Median :1.0000  
##  Mean   :0.09463   Mean   :0.4783   CAT    : 22   Mean   :0.8286  
##  3rd Qu.:0.00000   3rd Qu.:1.0000   PGCET  :  8   3rd Qu.:1.0000  
##  Max.   :1.00000   Max.   :3.0000   GCET   :  2   Max.   :1.0000  
##                                     (Other):  3                   
##  Percentile_ET    S.TEST.SCORE    Percent_MBA   
##  Min.   : 0.00   Min.   : 0.00   Min.   :50.83  
##  1st Qu.:41.19   1st Qu.:41.19   1st Qu.:57.20  
##  Median :62.00   Median :62.00   Median :61.01  
##  Mean   :54.93   Mean   :54.93   Mean   :61.67  
##  3rd Qu.:78.00   3rd Qu.:78.00   3rd Qu.:66.02  
##  Max.   :98.69   Max.   :98.69   Max.   :77.89  
##                                                 
##            Specialization_MBA Marks_Communication Marks_Projectwork
##  Marketing & Finance:222      Min.   :50.00       Min.   :50.00    
##  Marketing & HR     :156      1st Qu.:53.00       1st Qu.:64.00    
##  Marketing & IB     : 13      Median :58.00       Median :69.00    
##                               Mean   :60.54       Mean   :68.36    
##                               3rd Qu.:67.00       3rd Qu.:74.00    
##                               Max.   :88.00       Max.   :87.00    
##                                                                    
##    Marks_BOCA         Placement    Placement_B        Salary      
##  Min.   :50.00   Not Placed: 79   Min.   :0.000   Min.   :     0  
##  1st Qu.:57.00   Placed    :312   1st Qu.:1.000   1st Qu.:172800  
##  Median :63.00                    Median :1.000   Median :240000  
##  Mean   :64.38                    Mean   :0.798   Mean   :219078  
##  3rd Qu.:72.50                    3rd Qu.:1.000   3rd Qu.:300000  
##  Max.   :96.00                    Max.   :1.000   Max.   :940000  
## 

Plotting each important variable:

barplot(table(mbaa.df$Gender),main="Frequency distribution of gender",xlab = "Gender",ylab = "Frequency")

barplot(table(mbaa.df$Board_SSC),main="Frequency distribution of Different Boards",xlab = "Boards",ylab = "Frequency")

barplot(table(mbaa.df$Entrance_Test),main="Frequency distribution of Different Entrance test",xlab = "Test",ylab = "Frequency")

barplot(table(mbaa.df$Stream_HSC),main="Frequency distribution of Different stream",xlab = "Stream",ylab = "Frequency")

barplot(table(mbaa.df$Course_Degree),main="Frequency distribution of Different Course degree",xlab = "Course",ylab = "Frequency")

barplot(table(mbaa.df$Specialization_MBA),main="Frequency distribution of Different Specialization of MBA",xlab = "Specialization",ylab = "Frequency")

barplot(table(mbaa.df$Placement),main="Frequency distribution of status of placement",xlab = "Status of placement",ylab = "Frequency")

hist(mbaa.df$Marks_Communication,main="Distribution of marks of Communication",xlab="Marks",col = "grey")

hist(mbaa.df$Marks_Projectwork,main="Distribution of marks of Project",xlab="Marks",col = "grey")

hist(mbaa.df$Marks_BOCA,main="Distribution of marks of BOCA",xlab="Marks",col = "grey")

hist(mbaa.df$Salary,main="Distribution of Salary of placed students",xlab="Salary",col = "grey")

Plotting two related variable:

barplot(by(mbaa.df$Percent_SSC,mbaa.df$Board_SSC,sum),main = "Distribution of sum of marks of each board",xlab = "Board marks sum",ylab = "Total" )

barplot(by(mbaa.df$Percent_HSC,mbaa.df$Board_HSC,sum),main = "Distribution of sum of marks of each board",xlab = "Board marks sum",ylab = "Total" )

barplot(by(mbaa.df$Percent_HSC,mbaa.df$Course_Degree,sum),main = "Distribution of selection of course degree on the basis of there marks ",xlab = "Course degree",ylab = "Total" )

barplot(by(mbaa.df$Percent_Degree,mbaa.df$Course_Degree,sum),main = "Distribution of sum of marks of each Degree",xlab = "Degree marks sum",ylab = "Total" )

barplot(table(mbaa.df$Stream_HSC,mbaa.df$Board_HSC),main = "Distribution of boards and corresponding stream",xlab = "Board ",ylab = "Frequency" )

Plotting Corrgram

library(corrgram)
corrgram(mbaa.df, order=TRUE, lower.panel=panel.shade,
          upper.panel=panel.pie, text.panel=panel.txt,
          main="Corrgram of MBA variables")

Applying OLS regression:

fit<-lm(mbaa.df$Salary~ mbaa.df$Marks_Communication+mbaa.df$Marks_Projectwork+mbaa.df$Marks_BOCA+mbaa.df$Percent_MBA+mbaa.df$Specialization_MBA+mbaa.df$Gender)
summary(fit)
## 
## Call:
## lm(formula = mbaa.df$Salary ~ mbaa.df$Marks_Communication + mbaa.df$Marks_Projectwork + 
##     mbaa.df$Marks_BOCA + mbaa.df$Percent_MBA + mbaa.df$Specialization_MBA + 
##     mbaa.df$Gender)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -313284  -54850   23349   70037  672757 
## 
## Coefficients:
##                                           Estimate Std. Error t value
## (Intercept)                              -255474.9    92562.2  -2.760
## mbaa.df$Marks_Communication                  671.3     1108.9   0.605
## mbaa.df$Marks_Projectwork                   1905.0     1072.4   1.776
## mbaa.df$Marks_BOCA                          1241.9      825.6   1.504
## mbaa.df$Percent_MBA                         3013.2     1931.6   1.560
## mbaa.df$Specialization_MBAMarketing & HR  -12163.0    14241.8  -0.854
## mbaa.df$Specialization_MBAMarketing & IB  -23476.3    38498.8  -0.610
## mbaa.df$GenderM                            64496.3    15437.0   4.178
##                                          Pr(>|t|)    
## (Intercept)                               0.00606 ** 
## mbaa.df$Marks_Communication               0.54530    
## mbaa.df$Marks_Projectwork                 0.07646 .  
## mbaa.df$Marks_BOCA                        0.13334    
## mbaa.df$Percent_MBA                       0.11959    
## mbaa.df$Specialization_MBAMarketing & HR  0.39362    
## mbaa.df$Specialization_MBAMarketing & IB  0.54236    
## mbaa.df$GenderM                          3.65e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 133300 on 383 degrees of freedom
## Multiple R-squared:  0.08781,    Adjusted R-squared:  0.07114 
## F-statistic: 5.267 on 7 and 383 DF,  p-value: 9.296e-06
confint(fit)
##                                                 2.5 %     97.5 %
## (Intercept)                              -437468.6303 -73481.239
## mbaa.df$Marks_Communication                -1509.0354   2851.632
## mbaa.df$Marks_Projectwork                   -203.4990   4013.447
## mbaa.df$Marks_BOCA                          -381.3755   2865.235
## mbaa.df$Percent_MBA                         -784.6002   6810.941
## mbaa.df$Specialization_MBAMarketing & HR  -40164.8794  15838.959
## mbaa.df$Specialization_MBAMarketing & IB  -99171.7321  52219.189
## mbaa.df$GenderM                            34144.3318  94848.216
coefficients(fit)
##                              (Intercept) 
##                             -255474.9345 
##              mbaa.df$Marks_Communication 
##                                 671.2983 
##                mbaa.df$Marks_Projectwork 
##                                1904.9742 
##                       mbaa.df$Marks_BOCA 
##                                1241.9296 
##                      mbaa.df$Percent_MBA 
##                                3013.1706 
## mbaa.df$Specialization_MBAMarketing & HR 
##                              -12162.9602 
## mbaa.df$Specialization_MBAMarketing & IB 
##                              -23476.2714 
##                          mbaa.df$GenderM 
##                               64496.2738

Summary: Moving left to right values decreases

  1. Male(250)>>Female(150)
  2. CBSE>>ICSE>>OTHERS 3)Mat(265)>>None(67)>>K-Mat(24)>>PGCET(8)>>CAT(22)>>GCET(2)>>(G-MAT,G-SAT,XAT)
  3. Commerece(>200)>>Science(150)>>Arts(50) 5)Management(>150)>>Commerce(<150)>>Engineer(<50)>>CScience>>Sci>>Art>>Oth
  4. Mar & Finance(>200)>>Mar & HR(150)>>Mar & IB(<50)
  5. Placed(300)>>NOT Placed(<100)
  6. Marks in comm are mostlly in range 50 to 55 9)Project marks are mostlly on range 65 to 75 10)Boca marks are mostlly in range of 50 to 65 11)Salary are mostlly in range of 2 to 3 Lakh. 12)Boards Marks Others>>CBSE>>ISCE For 10th. 13)Boards Marks Others>>CBSE>>ISC For 12th. 14)Management(>10000)>>Commerce(>8000)>>Engineer(<4000)>>CScience(<2000)>>Sci>>Art>>Oth 15)Marks in each stream is Commerce(1,47,676)>>Science(9,024)>>Arts(1,152)