Untitled

sal.df<-read.csv(paste("mba.csv", sep=""))
View(sal.df)
abc.df <- sal.df[sal.df$salary!=998&sal.df$salary!=999,]
aggregate(abc.df$salary,by=list(Gender = abc.df$sex),mean)

##   Gender        x
## 1      1 54373.45
## 2      2 56560.30

male <- abc.df[ which(abc.df$sex=='1') , ] 
female <-abc.df[ which(abc.df$sex=='2') , ]
boxplot(male$salary,female$salary,col=c("green","purple"),main="Starting Salary of male vs female",names=c("Male","Female"))

barplot(table(sal.df$sex),main = "Distribution of data(on gender)",xlab = "1=Male 2=Female",col=c("green","purple"))

barplot(table(sal.df$satis),main = "Satisfaction of students(ratings by them)",xlab = "998=not attended survey",col="blue")

hist(sal.df$gmat_tot, breaks=40,col="green",xlab="score out of 800", main="Gmat Score distribution")

hist(abc.df$salary, breaks=10,col="pink",xlab="starting salary", main="Salary  distribution")

library(car)
library(psych)

## 
## Attaching package: 'psych'

## The following object is masked from 'package:car':
## 
##     logit

scatterplot(salary ~age,     data=abc.df,
            spread=FALSE, smoother.args=list(lty=2),
            main="Scatter plot of salary vs age",
            xlab="Age", ylab="Salary")

scatterplot(salary ~sex,     data=abc.df,
            spread=FALSE, smoother.args=list(lty=2),
            main="Scatter plot of salary vs sex",
            xlab="Sex",
            ylab="Salary")

scatterplot(salary ~gmat_tot,     data=abc.df,
            main="Scatter plot of salary vs Gmat total",
            xlab="GMAT score",
            ylab="Salary")

scatterplot(salary ~work_yrs,     data=abc.df,
            main="Scatter plot of salary vs Work exp.",
            xlab="Work experience in years",
            ylab="Salary")

scatterplot(salary ~satis,     data=abc.df,
            main="Scatter plot of salary vs satisfaction",
            xlab="Degree of satisfaction",
            ylab="Salary")

chisq.test(sal.df)

## Warning in chisq.test(sal.df): Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  sal.df
## X-squared = 8451300, df = 3276, p-value < 2.2e-16

x<-xtabs(~sal.df$sex)
x

## sal.df$sex
##   1   2 
## 206  68

mba<- sal.df[which(sal.df$salary!=998 & sal.df$salary !=999 & sal.df$salary !=0),]

m2<-lm(salary ~ age +gmat_tot + gmat_qpc + gmat_vpc + gmat_tpc + satis 
                 + frstlang , data = mba)
summary(m2)

## 
## Call:
## lm(formula = salary ~ age + gmat_tot + gmat_qpc + gmat_vpc + 
##     gmat_tpc + satis + frstlang, data = mba)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -27442  -9074    -26   5449  65805 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 51877.83   47122.04   1.101   0.2737    
## age          2720.36     507.19   5.364 5.73e-07 ***
## gmat_tot      -27.96     162.79  -0.172   0.8640    
## gmat_qpc      841.99     471.63   1.785   0.0774 .  
## gmat_vpc      567.02     477.05   1.189   0.2376    
## gmat_tpc    -1309.36     699.44  -1.872   0.0643 .  
## satis       -1688.22    2036.28  -0.829   0.4091    
## frstlang     4176.03    6703.97   0.623   0.5348    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15270 on 95 degrees of freedom
## Multiple R-squared:  0.3198, Adjusted R-squared:  0.2697 
## F-statistic:  6.38 on 7 and 95 DF,  p-value: 3.716e-06

m2$coefficients

## (Intercept)         age    gmat_tot    gmat_qpc    gmat_vpc    gmat_tpc 
## 51877.82882  2720.35989   -27.95661   841.99273   567.02250 -1309.35553 
##       satis    frstlang 
## -1688.21827  4176.02630

abc.df$employed[abc.df$salary==0]<-abc.df$salary[abc.df$salary==0] 
abc.df[is.na(abc.df)] <- 1
tb <- xtabs(~employed+s_avg,data = abc.df)
chisq.test(tb)

## Warning in chisq.test(tb): Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  tb
## X-squared = 33.09, df = 30, p-value = 0.3187

tb1 <- xtabs(~employed+f_avg,data=abc.df)
chisq.test(tb1)

## Warning in chisq.test(tb1): Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  tb1
## X-squared = 14.576, df = 18, p-value = 0.6908

tb2 <- xtabs(~employed+salary,data = abc.df)
chisq.test(tb2)

## Warning in chisq.test(tb2): Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  tb2
## X-squared = 193, df = 42, p-value < 2.2e-16

tb3 <- xtabs(~employed+quarter,data = abc.df)
chisq.test(tb3)

## 
##  Pearson's Chi-squared test
## 
## data:  tb3
## X-squared = 4.9172, df = 3, p-value = 0.178

R Markdo

library(psych)
describe(sal.df)

##          vars   n     mean       sd median  trimmed     mad min    max
## age         1 274    27.36     3.71     27    26.76    2.97  22     48
## sex         2 274     1.25     0.43      1     1.19    0.00   1      2
## gmat_tot    3 274   619.45    57.54    620   618.86   59.30 450    790
## gmat_qpc    4 274    80.64    14.87     83    82.31   14.83  28     99
## gmat_vpc    5 274    78.32    16.86     81    80.33   14.83  16     99
## gmat_tpc    6 274    84.20    14.02     87    86.12   11.86   0     99
## s_avg       7 274     3.03     0.38      3     3.03    0.44   2      4
## f_avg       8 274     3.06     0.53      3     3.09    0.37   0      4
## quarter     9 274     2.48     1.11      2     2.47    1.48   1      4
## work_yrs   10 274     3.87     3.23      3     3.29    1.48   0     22
## frstlang   11 274     1.12     0.32      1     1.02    0.00   1      2
## salary     12 274 39025.69 50951.56    999 33607.86 1481.12   0 220000
## satis      13 274   172.18   371.61      6    91.50    1.48   1    998
##           range  skew kurtosis      se
## age          26  2.16     6.45    0.22
## sex           1  1.16    -0.66    0.03
## gmat_tot    340 -0.01     0.06    3.48
## gmat_qpc     71 -0.92     0.30    0.90
## gmat_vpc     83 -1.04     0.74    1.02
## gmat_tpc     99 -2.28     9.02    0.85
## s_avg         2 -0.06    -0.38    0.02
## f_avg         4 -2.08    10.85    0.03
## quarter       3  0.02    -1.35    0.07
## work_yrs     22  2.78     9.80    0.20
## frstlang      1  2.37     3.65    0.02
## salary   220000  0.70    -1.05 3078.10
## satis       997  1.77     1.13   22.45

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)

##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.