Reading data through CSV file
mbaa.df <- read.csv(paste("Data - Deans Dilemma.csv", sep=""))
Getting summary stats:
summary(mbaa.df)
## SlNo Gender Gender.B Percent_SSC Board_SSC
## Min. : 1.0 F:127 Min. :0.0000 Min. :37.00 CBSE :113
## 1st Qu.: 98.5 M:264 1st Qu.:0.0000 1st Qu.:56.00 ICSE : 77
## Median :196.0 Median :0.0000 Median :64.50 Others:201
## Mean :196.0 Mean :0.3248 Mean :64.65
## 3rd Qu.:293.5 3rd Qu.:1.0000 3rd Qu.:74.00
## Max. :391.0 Max. :1.0000 Max. :87.20
##
## Board_CBSE Board_ICSE Percent_HSC Board_HSC
## Min. :0.000 Min. :0.0000 Min. :40.0 CBSE : 96
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:54.0 ISC : 48
## Median :0.000 Median :0.0000 Median :63.0 Others:247
## Mean :0.289 Mean :0.1969 Mean :63.8
## 3rd Qu.:1.000 3rd Qu.:0.0000 3rd Qu.:72.0
## Max. :1.000 Max. :1.0000 Max. :94.7
##
## Stream_HSC Percent_Degree Course_Degree
## Arts : 18 Min. :35.00 Arts : 13
## Commerce:222 1st Qu.:57.52 Commerce :117
## Science :151 Median :63.00 Computer Applications: 32
## Mean :62.98 Engineering : 37
## 3rd Qu.:69.00 Management :163
## Max. :89.00 Others : 5
## Science : 24
## Degree_Engg Experience_Yrs Entrance_Test S.TEST
## Min. :0.00000 Min. :0.0000 MAT :265 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 None : 67 1st Qu.:1.0000
## Median :0.00000 Median :0.0000 K-MAT : 24 Median :1.0000
## Mean :0.09463 Mean :0.4783 CAT : 22 Mean :0.8286
## 3rd Qu.:0.00000 3rd Qu.:1.0000 PGCET : 8 3rd Qu.:1.0000
## Max. :1.00000 Max. :3.0000 GCET : 2 Max. :1.0000
## (Other): 3
## Percentile_ET S.TEST.SCORE Percent_MBA
## Min. : 0.00 Min. : 0.00 Min. :50.83
## 1st Qu.:41.19 1st Qu.:41.19 1st Qu.:57.20
## Median :62.00 Median :62.00 Median :61.01
## Mean :54.93 Mean :54.93 Mean :61.67
## 3rd Qu.:78.00 3rd Qu.:78.00 3rd Qu.:66.02
## Max. :98.69 Max. :98.69 Max. :77.89
##
## Specialization_MBA Marks_Communication Marks_Projectwork
## Marketing & Finance:222 Min. :50.00 Min. :50.00
## Marketing & HR :156 1st Qu.:53.00 1st Qu.:64.00
## Marketing & IB : 13 Median :58.00 Median :69.00
## Mean :60.54 Mean :68.36
## 3rd Qu.:67.00 3rd Qu.:74.00
## Max. :88.00 Max. :87.00
##
## Marks_BOCA Placement Placement_B Salary
## Min. :50.00 Not Placed: 79 Min. :0.000 Min. : 0
## 1st Qu.:57.00 Placed :312 1st Qu.:1.000 1st Qu.:172800
## Median :63.00 Median :1.000 Median :240000
## Mean :64.38 Mean :0.798 Mean :219078
## 3rd Qu.:72.50 3rd Qu.:1.000 3rd Qu.:300000
## Max. :96.00 Max. :1.000 Max. :940000
##
Plotting each important variable:
barplot(table(mbaa.df$Gender),main="Frequency distribution of gender",xlab = "Gender",ylab = "Frequency")
barplot(table(mbaa.df$Board_SSC),main="Frequency distribution of Different Boards",xlab = "Boards",ylab = "Frequency")
barplot(table(mbaa.df$Entrance_Test),main="Frequency distribution of Different Entrance test",xlab = "Test",ylab = "Frequency")
barplot(table(mbaa.df$Stream_HSC),main="Frequency distribution of Different stream",xlab = "Stream",ylab = "Frequency")
barplot(table(mbaa.df$Course_Degree),main="Frequency distribution of Different Course degree",xlab = "Course",ylab = "Frequency")
barplot(table(mbaa.df$Specialization_MBA),main="Frequency distribution of Different Specialization of MBA",xlab = "Specialization",ylab = "Frequency")
barplot(table(mbaa.df$Placement),main="Frequency distribution of status of placement",xlab = "Status of placement",ylab = "Frequency")
hist(mbaa.df$Marks_Communication,main="Distribution of marks of Communication",xlab="Marks",col = "grey")
hist(mbaa.df$Marks_Projectwork,main="Distribution of marks of Project",xlab="Marks",col = "grey")
hist(mbaa.df$Marks_BOCA,main="Distribution of marks of BOCA",xlab="Marks",col = "grey")
hist(mbaa.df$Salary,main="Distribution of Salary of placed students",xlab="Salary",col = "grey")
Plotting two related variable:
barplot(by(mbaa.df$Percent_SSC,mbaa.df$Board_SSC,sum),main = "Distribution of sum of marks of each board",xlab = "Board marks sum",ylab = "Total" )
barplot(by(mbaa.df$Percent_HSC,mbaa.df$Board_HSC,sum),main = "Distribution of sum of marks of each board",xlab = "Board marks sum",ylab = "Total" )
barplot(by(mbaa.df$Percent_HSC,mbaa.df$Course_Degree,sum),main = "Distribution of selection of course degree on the basis of there marks ",xlab = "Course degree",ylab = "Total" )
barplot(by(mbaa.df$Percent_Degree,mbaa.df$Course_Degree,sum),main = "Distribution of sum of marks of each Degree",xlab = "Degree marks sum",ylab = "Total" )
barplot(table(mbaa.df$Stream_HSC,mbaa.df$Board_HSC),main = "Distribution of boards and corresponding stream",xlab = "Board ",ylab = "Frequency" )
Plotting Corrgram
library(corrgram)
corrgram(mbaa.df, order=TRUE, lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,
main="Corrgram of MBA variables")
Applying OLS regression:
fit<-lm(mbaa.df$Salary~ mbaa.df$Marks_Communication+mbaa.df$Marks_Projectwork+mbaa.df$Marks_BOCA+mbaa.df$Percent_MBA+mbaa.df$Specialization_MBA+mbaa.df$Gender)
summary(fit)
##
## Call:
## lm(formula = mbaa.df$Salary ~ mbaa.df$Marks_Communication + mbaa.df$Marks_Projectwork +
## mbaa.df$Marks_BOCA + mbaa.df$Percent_MBA + mbaa.df$Specialization_MBA +
## mbaa.df$Gender)
##
## Residuals:
## Min 1Q Median 3Q Max
## -313284 -54850 23349 70037 672757
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) -255474.9 92562.2 -2.760
## mbaa.df$Marks_Communication 671.3 1108.9 0.605
## mbaa.df$Marks_Projectwork 1905.0 1072.4 1.776
## mbaa.df$Marks_BOCA 1241.9 825.6 1.504
## mbaa.df$Percent_MBA 3013.2 1931.6 1.560
## mbaa.df$Specialization_MBAMarketing & HR -12163.0 14241.8 -0.854
## mbaa.df$Specialization_MBAMarketing & IB -23476.3 38498.8 -0.610
## mbaa.df$GenderM 64496.3 15437.0 4.178
## Pr(>|t|)
## (Intercept) 0.00606 **
## mbaa.df$Marks_Communication 0.54530
## mbaa.df$Marks_Projectwork 0.07646 .
## mbaa.df$Marks_BOCA 0.13334
## mbaa.df$Percent_MBA 0.11959
## mbaa.df$Specialization_MBAMarketing & HR 0.39362
## mbaa.df$Specialization_MBAMarketing & IB 0.54236
## mbaa.df$GenderM 3.65e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 133300 on 383 degrees of freedom
## Multiple R-squared: 0.08781, Adjusted R-squared: 0.07114
## F-statistic: 5.267 on 7 and 383 DF, p-value: 9.296e-06
confint(fit)
## 2.5 % 97.5 %
## (Intercept) -437468.6303 -73481.239
## mbaa.df$Marks_Communication -1509.0354 2851.632
## mbaa.df$Marks_Projectwork -203.4990 4013.447
## mbaa.df$Marks_BOCA -381.3755 2865.235
## mbaa.df$Percent_MBA -784.6002 6810.941
## mbaa.df$Specialization_MBAMarketing & HR -40164.8794 15838.959
## mbaa.df$Specialization_MBAMarketing & IB -99171.7321 52219.189
## mbaa.df$GenderM 34144.3318 94848.216
coefficients(fit)
## (Intercept)
## -255474.9345
## mbaa.df$Marks_Communication
## 671.2983
## mbaa.df$Marks_Projectwork
## 1904.9742
## mbaa.df$Marks_BOCA
## 1241.9296
## mbaa.df$Percent_MBA
## 3013.1706
## mbaa.df$Specialization_MBAMarketing & HR
## -12162.9602
## mbaa.df$Specialization_MBAMarketing & IB
## -23476.2714
## mbaa.df$GenderM
## 64496.2738
Summary:
Male(250)>Female(150)
CBSE>>ICSE>>OTHERS (3)Mat(265)>>None(67)>>K-Mat(24)>>PGCET(8)>>CAT(22)>>GCET(2)>>(G-MAT,G-SAT,XAT)
Placed(300)>>NOT Placed(<100)