Reading data through CSV file

mbaa.df <- read.csv(paste("Data - Deans Dilemma.csv", sep=""))

Getting summary stats:

summary(mbaa.df)
##       SlNo       Gender     Gender.B       Percent_SSC     Board_SSC  
##  Min.   :  1.0   F:127   Min.   :0.0000   Min.   :37.00   CBSE  :113  
##  1st Qu.: 98.5   M:264   1st Qu.:0.0000   1st Qu.:56.00   ICSE  : 77  
##  Median :196.0           Median :0.0000   Median :64.50   Others:201  
##  Mean   :196.0           Mean   :0.3248   Mean   :64.65               
##  3rd Qu.:293.5           3rd Qu.:1.0000   3rd Qu.:74.00               
##  Max.   :391.0           Max.   :1.0000   Max.   :87.20               
##                                                                       
##    Board_CBSE      Board_ICSE      Percent_HSC    Board_HSC  
##  Min.   :0.000   Min.   :0.0000   Min.   :40.0   CBSE  : 96  
##  1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:54.0   ISC   : 48  
##  Median :0.000   Median :0.0000   Median :63.0   Others:247  
##  Mean   :0.289   Mean   :0.1969   Mean   :63.8               
##  3rd Qu.:1.000   3rd Qu.:0.0000   3rd Qu.:72.0               
##  Max.   :1.000   Max.   :1.0000   Max.   :94.7               
##                                                              
##     Stream_HSC  Percent_Degree                Course_Degree
##  Arts    : 18   Min.   :35.00   Arts                 : 13  
##  Commerce:222   1st Qu.:57.52   Commerce             :117  
##  Science :151   Median :63.00   Computer Applications: 32  
##                 Mean   :62.98   Engineering          : 37  
##                 3rd Qu.:69.00   Management           :163  
##                 Max.   :89.00   Others               :  5  
##                                 Science              : 24  
##   Degree_Engg      Experience_Yrs   Entrance_Test     S.TEST      
##  Min.   :0.00000   Min.   :0.0000   MAT    :265   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.0000   None   : 67   1st Qu.:1.0000  
##  Median :0.00000   Median :0.0000   K-MAT  : 24   Median :1.0000  
##  Mean   :0.09463   Mean   :0.4783   CAT    : 22   Mean   :0.8286  
##  3rd Qu.:0.00000   3rd Qu.:1.0000   PGCET  :  8   3rd Qu.:1.0000  
##  Max.   :1.00000   Max.   :3.0000   GCET   :  2   Max.   :1.0000  
##                                     (Other):  3                   
##  Percentile_ET    S.TEST.SCORE    Percent_MBA   
##  Min.   : 0.00   Min.   : 0.00   Min.   :50.83  
##  1st Qu.:41.19   1st Qu.:41.19   1st Qu.:57.20  
##  Median :62.00   Median :62.00   Median :61.01  
##  Mean   :54.93   Mean   :54.93   Mean   :61.67  
##  3rd Qu.:78.00   3rd Qu.:78.00   3rd Qu.:66.02  
##  Max.   :98.69   Max.   :98.69   Max.   :77.89  
##                                                 
##            Specialization_MBA Marks_Communication Marks_Projectwork
##  Marketing & Finance:222      Min.   :50.00       Min.   :50.00    
##  Marketing & HR     :156      1st Qu.:53.00       1st Qu.:64.00    
##  Marketing & IB     : 13      Median :58.00       Median :69.00    
##                               Mean   :60.54       Mean   :68.36    
##                               3rd Qu.:67.00       3rd Qu.:74.00    
##                               Max.   :88.00       Max.   :87.00    
##                                                                    
##    Marks_BOCA         Placement    Placement_B        Salary      
##  Min.   :50.00   Not Placed: 79   Min.   :0.000   Min.   :     0  
##  1st Qu.:57.00   Placed    :312   1st Qu.:1.000   1st Qu.:172800  
##  Median :63.00                    Median :1.000   Median :240000  
##  Mean   :64.38                    Mean   :0.798   Mean   :219078  
##  3rd Qu.:72.50                    3rd Qu.:1.000   3rd Qu.:300000  
##  Max.   :96.00                    Max.   :1.000   Max.   :940000  
## 

Plotting each important variable:

barplot(table(mbaa.df$Gender),main="Frequency distribution of gender",xlab = "Gender",ylab = "Frequency")

barplot(table(mbaa.df$Board_SSC),main="Frequency distribution of Different Boards",xlab = "Boards",ylab = "Frequency")

barplot(table(mbaa.df$Entrance_Test),main="Frequency distribution of Different Entrance test",xlab = "Test",ylab = "Frequency")

barplot(table(mbaa.df$Stream_HSC),main="Frequency distribution of Different stream",xlab = "Stream",ylab = "Frequency")

barplot(table(mbaa.df$Course_Degree),main="Frequency distribution of Different Course degree",xlab = "Course",ylab = "Frequency")

barplot(table(mbaa.df$Specialization_MBA),main="Frequency distribution of Different Specialization of MBA",xlab = "Specialization",ylab = "Frequency")

barplot(table(mbaa.df$Placement),main="Frequency distribution of status of placement",xlab = "Status of placement",ylab = "Frequency")

hist(mbaa.df$Marks_Communication,main="Distribution of marks of Communication",xlab="Marks",col = "grey")

hist(mbaa.df$Marks_Projectwork,main="Distribution of marks of Project",xlab="Marks",col = "grey")

hist(mbaa.df$Marks_BOCA,main="Distribution of marks of BOCA",xlab="Marks",col = "grey")

hist(mbaa.df$Salary,main="Distribution of Salary of placed students",xlab="Salary",col = "grey")

Plotting two related variable:

barplot(by(mbaa.df$Percent_SSC,mbaa.df$Board_SSC,sum),main = "Distribution of sum of marks of each board",xlab = "Board marks sum",ylab = "Total" )

barplot(by(mbaa.df$Percent_HSC,mbaa.df$Board_HSC,sum),main = "Distribution of sum of marks of each board",xlab = "Board marks sum",ylab = "Total" )

barplot(by(mbaa.df$Percent_HSC,mbaa.df$Course_Degree,sum),main = "Distribution of selection of course degree on the basis of there marks ",xlab = "Course degree",ylab = "Total" )

barplot(by(mbaa.df$Percent_Degree,mbaa.df$Course_Degree,sum),main = "Distribution of sum of marks of each Degree",xlab = "Degree marks sum",ylab = "Total" )

barplot(table(mbaa.df$Stream_HSC,mbaa.df$Board_HSC),main = "Distribution of boards and corresponding stream",xlab = "Board ",ylab = "Frequency" )

Plotting Corrgram

library(corrgram)
corrgram(mbaa.df, order=TRUE, lower.panel=panel.shade,
          upper.panel=panel.pie, text.panel=panel.txt,
          main="Corrgram of MBA variables")

Applying OLS regression:

fit<-lm(mbaa.df$Salary~ mbaa.df$Marks_Communication+mbaa.df$Marks_Projectwork+mbaa.df$Marks_BOCA+mbaa.df$Percent_MBA+mbaa.df$Specialization_MBA+mbaa.df$Gender)
summary(fit)
## 
## Call:
## lm(formula = mbaa.df$Salary ~ mbaa.df$Marks_Communication + mbaa.df$Marks_Projectwork + 
##     mbaa.df$Marks_BOCA + mbaa.df$Percent_MBA + mbaa.df$Specialization_MBA + 
##     mbaa.df$Gender)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -313284  -54850   23349   70037  672757 
## 
## Coefficients:
##                                           Estimate Std. Error t value
## (Intercept)                              -255474.9    92562.2  -2.760
## mbaa.df$Marks_Communication                  671.3     1108.9   0.605
## mbaa.df$Marks_Projectwork                   1905.0     1072.4   1.776
## mbaa.df$Marks_BOCA                          1241.9      825.6   1.504
## mbaa.df$Percent_MBA                         3013.2     1931.6   1.560
## mbaa.df$Specialization_MBAMarketing & HR  -12163.0    14241.8  -0.854
## mbaa.df$Specialization_MBAMarketing & IB  -23476.3    38498.8  -0.610
## mbaa.df$GenderM                            64496.3    15437.0   4.178
##                                          Pr(>|t|)    
## (Intercept)                               0.00606 ** 
## mbaa.df$Marks_Communication               0.54530    
## mbaa.df$Marks_Projectwork                 0.07646 .  
## mbaa.df$Marks_BOCA                        0.13334    
## mbaa.df$Percent_MBA                       0.11959    
## mbaa.df$Specialization_MBAMarketing & HR  0.39362    
## mbaa.df$Specialization_MBAMarketing & IB  0.54236    
## mbaa.df$GenderM                          3.65e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 133300 on 383 degrees of freedom
## Multiple R-squared:  0.08781,    Adjusted R-squared:  0.07114 
## F-statistic: 5.267 on 7 and 383 DF,  p-value: 9.296e-06
confint(fit)
##                                                 2.5 %     97.5 %
## (Intercept)                              -437468.6303 -73481.239
## mbaa.df$Marks_Communication                -1509.0354   2851.632
## mbaa.df$Marks_Projectwork                   -203.4990   4013.447
## mbaa.df$Marks_BOCA                          -381.3755   2865.235
## mbaa.df$Percent_MBA                         -784.6002   6810.941
## mbaa.df$Specialization_MBAMarketing & HR  -40164.8794  15838.959
## mbaa.df$Specialization_MBAMarketing & IB  -99171.7321  52219.189
## mbaa.df$GenderM                            34144.3318  94848.216
coefficients(fit)
##                              (Intercept) 
##                             -255474.9345 
##              mbaa.df$Marks_Communication 
##                                 671.2983 
##                mbaa.df$Marks_Projectwork 
##                                1904.9742 
##                       mbaa.df$Marks_BOCA 
##                                1241.9296 
##                      mbaa.df$Percent_MBA 
##                                3013.1706 
## mbaa.df$Specialization_MBAMarketing & HR 
##                              -12162.9602 
## mbaa.df$Specialization_MBAMarketing & IB 
##                              -23476.2714 
##                          mbaa.df$GenderM 
##                               64496.2738

Summary:

Male(250)>Female(150)

CBSE>>ICSE>>OTHERS (3)Mat(265)>>None(67)>>K-Mat(24)>>PGCET(8)>>CAT(22)>>GCET(2)>>(G-MAT,G-SAT,XAT)

Placed(300)>>NOT Placed(<100)