Exploratory Data Analysis

Using a data for Employees a model is estimate and applied. Also, it shows how to use the MSE to explore data and compute more appropiated models.

data<-read.csv('employees.csv')
head(data)
##   X received requested negotiated gender year
## 1 1     12.1       9.5       TRUE      M 2005
## 2 2      8.9       9.9       TRUE      F 2006
## 3 3      8.8      18.1       TRUE      M 2007
## 4 4      7.1      11.8       TRUE      F 2008
## 5 5     10.2      12.5       TRUE      M 2009
## 6 6      7.0      10.2       TRUE      F 2005
 attach(data)
 plot(data$received,data$requested)

Including Plots

You can also embed plots, for example:

plot(data$received,data$requested)
model<-lm(requested~received)
  abline(model,col="red")

plot(data$received,data$requested)
model<-lm(requested~received)
  abline(model,col="red")
e<-summary(model)$sigma
  e
## [1] 2.109446
  a<-model$coefficients[2]
   a
##  received 
## 0.6110991
   b<-model$coefficients[1]
    b
## (Intercept) 
##     3.02972
    abline(b+e,a,col="red",lty=2)
     abline(b-e,a,col="red",lty=2)

## Creating 2 Models

plot(data$received,data$requested)
abline(v=10 , col="green")

# MODEL A  X<10
modelA<-lm(received[requested<=10]~requested[requested<=10],data=data)
      summary(modelA)
## 
## Call:
## lm(formula = received[requested <= 10] ~ requested[requested <= 
##     10], data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.6700 -0.4451 -0.0764  0.2960 16.4357 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 0.79335    0.22472    3.53 0.000438 ***
## requested[requested <= 10]  0.94249    0.03152   29.90  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.375 on 830 degrees of freedom
## Multiple R-squared:  0.5186, Adjusted R-squared:  0.518 
## F-statistic: 894.2 on 1 and 830 DF,  p-value: < 2.2e-16
      #model B x>10
     modelB<-lm(received[requested>10]~requested[requested>10],data=data)
       summary(modelB)
## 
## Call:
## lm(formula = received[requested > 10] ~ requested[requested > 
##     10], data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.1237  -2.8552  -0.3816   3.1323  11.4395 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 7.8134     1.8760   4.165    5e-05 ***
## requested[requested > 10]   0.3026     0.1420   2.131   0.0346 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.544 on 166 degrees of freedom
## Multiple R-squared:  0.02662,    Adjusted R-squared:  0.02076 
## F-statistic:  4.54 on 1 and 166 DF,  p-value: 0.03458
#  Coefficicientes 
       
       eA<-summary(modelA)$sigma 
eA
## [1] 1.374526
eB<-summary(modelB)$sigma 
 eB
## [1] 4.544424
 aA<-modelA$coefficients[2]
  bA<-modelA$coefficients[1]
  aB<-modelB$coefficients[2]
   bB<-modelB$coefficients[1]
plot(data$received,data$requested)
abline(v=10 , col="green")
# Segments for plotting
   # segments(X0,Y0,X1,y1,col="blue")
   x0=0
    y0=x0*aA+bA
    x1=10
     y1=x1*aA+(bA)   
     segments(x0,y0,x1,y1,col="blue")
     
     x0=10
   y0=x0*aB+bB
  x1=20
     y1=x1*aB+bB
     segments(x0,y0,x1,y1,col="darkgreen")
     
 x0=0
     y0=x0*aA+(bA+eA)
     x1=10
      y1=x1*aA+(bA+eA)
     segments(x0,y0,x1,y1,col="blue",lty=2)
     
     x0=0
      y0=x0*aA+(bA-eA)
      x1=10
      y1=x1*aA+(bA-eA)
      segments(x0,y0,x1,y1,col="blue",lty=2)
      
      x0=10
      y0=x0*aB+(bB+eB)
      x1=20
     y1=x1*aB+(bB+eB)
      segments(x0,y0,x1,y1,col="green",lty=2)
      x0=10
     y0=x0*aB+(bB-eA)
     x1=20
      y1=x1*aB+(bB-eA)
      segments(x0,y0,x1,y1,col="green",lty=2)