rm(list = ls())
#install.packages('tinytex')
#tinytex::install_tinytex()
library(readxl)
## Warning: package 'readxl' was built under R version 4.3.2
dados <- read_excel("ceosal2.xlsx")
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...17`
head(dados)
## # A tibble: 6 × 17
## ...1 ...2 ...3 ...4 ...5 ...6 Variable Description ...9 ...10 ...11
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Data on … <NA> <NA> <NA> <NA> <NA> salary 1990 compe… <NA> <NA> <NA>
## 2 Goal: Ex… <NA> <NA> <NA> <NA> <NA> age age in yea… <NA> <NA> <NA>
## 3 Source: … <NA> <NA> <NA> <NA> <NA> college 1 if atten… <NA> <NA> <NA>
## 4 <NA> <NA> <NA> <NA> <NA> <NA> grad 1 if atten… <NA> <NA> <NA>
## 5 <NA> <NA> <NA> <NA> <NA> <NA> comten years with… <NA> <NA> <NA>
## 6 <NA> <NA> <NA> <NA> <NA> <NA> ceoten years as c… <NA> <NA> <NA>
## # ℹ 6 more variables: ...12 <chr>, ...13 <chr>, ...14 <chr>, ...15 <chr>,
## # ...16 <chr>, ...17 <lgl>
#Selecionando a descricao dos dados
Descricao=dados[1:15,c(1,7,8)]
head(Descricao,15)
## # A tibble: 15 × 3
## ...1 Variable Description
## <chr> <chr> <chr>
## 1 Data on 177 chief executive officers salary 1990 compe…
## 2 Goal: Examine the effects of firm performance on CEO sa… age age in yea…
## 3 Source: Wooldridge (2012) page 111 college 1 if atten…
## 4 <NA> grad 1 if atten…
## 5 <NA> comten years with…
## 6 <NA> ceoten years as c…
## 7 <NA> sales 1990 firm …
## 8 <NA> profits 1990 profi…
## 9 <NA> mktval market val…
## 10 <NA> lsalary log salary
## 11 <NA> lsales log sales
## 12 <NA> lmktval log mktval
## 13 <NA> comtensq square com…
## 14 <NA> ceotensq square ceo…
## 15 <NA> profmarg profits as…
#Selecionando os dados
dados=dados[18:nrow(dados),1:16]
head(dados)
## # A tibble: 6 × 16
## ...1 ...2 ...3 ...4 ...5 ...6 Variable Description ...9 ...10 ...11
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Obs salary age college grad comten ceoten sales prof… mktv… lsal…
## 2 1 1091 33 1 0 9 9 181 36 1300 6.99…
## 3 2 310 40 1 0 18 1 2400 60 1300 5.73…
## 4 3 474 40 1 0 18 1 2700 117 2000 6.16…
## 5 4 989 40 1 0 18 5 439 30 582 6.89…
## 6 5 2792 40 1 0 11 11 534 35 888 7.93…
## # ℹ 5 more variables: ...12 <chr>, ...13 <chr>, ...14 <chr>, ...15 <chr>,
## # ...16 <chr>
#Adicionando o nome das colunas
colnames(dados)=dados[1,]
dados=dados[-1,]
head(dados)
## # A tibble: 6 × 16
## Obs salary age college grad comten ceoten sales profits mktval lsalary
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1 1091 33 1 0 9 9 181 36 1300 6.9948499…
## 2 2 310 40 1 0 18 1 2400 60 1300 5.7365719…
## 3 3 474 40 1 0 18 1 2700 117 2000 6.1612070…
## 4 4 989 40 1 0 18 5 439 30 582 6.8966940…
## 5 5 2792 40 1 0 11 11 534 35 888 7.9345140…
## 6 6 693 42 1 0 17 12 1400 206 3000 6.5410300…
## # ℹ 5 more variables: lsales <chr>, lmktval <chr>, comtensq <chr>,
## # ceotensq <chr>, profmarg <chr>
attach(dados)
sapply(dados, class)
## Obs salary age college grad comten
## "character" "character" "character" "character" "character" "character"
## ceoten sales profits mktval lsalary lsales
## "character" "character" "character" "character" "character" "character"
## lmktval comtensq ceotensq profmarg
## "character" "character" "character" "character"
dados=as.data.frame(lapply(dados, as.numeric))
sapply(dados, class)
## Obs salary age college grad comten ceoten sales
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
## profits mktval lsalary lsales lmktval comtensq ceotensq profmarg
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
#dados$college=factor(dados$college)
dados=dados[,-c(11:15)]
library(psych)
## Warning: package 'psych' was built under R version 4.3.1
describe(dados[,-1])
## vars n mean sd median trimmed mad min max
## salary 1 177 865.86 587.59 707.00 787.95 440.33 100.00 5299.00
## age 2 177 56.43 8.42 57.00 56.49 7.41 33.00 86.00
## college 3 177 0.97 0.17 1.00 1.00 0.00 0.00 1.00
## grad 4 177 0.53 0.50 1.00 0.54 0.00 0.00 1.00
## comten 5 177 22.50 12.29 23.00 22.45 16.31 2.00 58.00
## ceoten 6 177 7.95 7.15 6.00 6.82 5.93 0.00 37.00
## sales 7 177 3529.46 6088.65 1400.00 2179.81 1577.49 29.00 51300.00
## profits 8 177 207.83 404.45 63.00 118.82 68.20 -463.00 2700.00
## mktval 9 177 3600.32 6442.28 1200.00 2029.79 1040.79 387.00 45400.00
## profmarg 10 177 6.42 17.86 6.83 7.48 5.19 -203.08 47.46
## range skew kurtosis se
## salary 5199.00 2.97 17.17 44.17
## age 53.00 0.05 0.55 0.63
## college 1.00 -5.65 30.05 0.01
## grad 1.00 -0.12 -2.00 0.04
## comten 56.00 0.02 -1.02 0.92
## ceoten 37.00 1.63 3.02 0.54
## sales 51271.00 4.14 23.27 457.65
## profits 3163.00 3.14 11.71 30.40
## mktval 45013.00 3.85 17.84 484.23
## profmarg 250.53 -9.21 104.74 1.34
plot(salary,sales)
colnames(dados[,2])
## NULL
varnames <- colnames(dados)
for (i in 2:11){
hist(dados[,i], freq=F, col='lightblue',
breaks=150, main=paste(varnames[i]))
lines(density(dados[,i]), col='red', lwd=3)
}
# Boxplot dos dados
library(robustbase)
## Warning: package 'robustbase' was built under R version 4.3.2
for (i in 2:11){
adjbox(dados[,i],main=varnames[i])
}
## The default of 'doScale' is FALSE now for stability;
## set options(mc_doScale_quiet=TRUE) to suppress this (once per session) message
round(cor(dados[,-1],method = "pearson"),3)
## salary age college grad comten ceoten sales profits mktval
## salary 1.000 0.115 -0.067 -0.003 0.038 0.143 0.380 0.394 0.406
## age 0.115 1.000 -0.178 -0.123 0.479 0.339 0.127 0.115 0.107
## college -0.067 -0.178 1.000 0.181 -0.157 -0.106 -0.021 -0.046 -0.028
## grad -0.003 -0.123 0.181 1.000 -0.228 -0.103 0.076 0.098 0.123
## comten 0.038 0.479 -0.157 -0.228 1.000 0.315 0.104 0.144 0.136
## ceoten 0.143 0.339 -0.106 -0.103 0.315 1.000 -0.068 -0.022 0.007
## sales 0.380 0.127 -0.021 0.076 0.104 -0.068 1.000 0.798 0.755
## profits 0.394 0.115 -0.046 0.098 0.144 -0.022 0.798 1.000 0.918
## mktval 0.406 0.107 -0.028 0.123 0.136 0.007 0.755 0.918 1.000
## profmarg -0.029 0.015 -0.018 -0.015 0.047 0.049 -0.017 0.125 0.067
## profmarg
## salary -0.029
## age 0.015
## college -0.018
## grad -0.015
## comten 0.047
## ceoten 0.049
## sales -0.017
## profits 0.125
## mktval 0.067
## profmarg 1.000
corrplot::corrplot(cor(dados[,-1],method = "pearson"))
library(ggcorrplot)
## Warning: package 'ggcorrplot' was built under R version 4.3.2
## Carregando pacotes exigidos: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.2
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
w=corrplot::corrplot(cor(dados[,-1],method = "pearson"))
w$corr[,1]
## salary age college grad comten ceoten
## 1.000000000 0.115383944 -0.067025223 -0.002999832 0.037698187 0.142947678
## sales profits mktval profmarg
## 0.380223875 0.393927574 0.406307097 -0.028935405
p=as.data.frame(round(cor(dados[,-1]),2))
ggcorrplot(p,lab = TRUE)
iNDICIOS DE CORRELACAO
varnames <- colnames(dados)
#par(mfrow=c(7,1))
for (i in 3:11){
plot(dados[,i], dados$salary, col='blue', main=paste(varnames[i]))
abline(lm(dados$salary~dados[,i]), col='red', lwd=3)
}
#install.packages("stargazer")
#stargazer(mod1, mod2, type="text")
#Propondo modelo com todas as variaveis explicativas
colnames(dados)
## [1] "Obs" "salary" "age" "college" "grad" "comten"
## [7] "ceoten" "sales" "profits" "mktval" "profmarg"
fit1 <- lm( dados$salary ~ age + factor(college) + factor(grad) + comten + ceoten + sales + profits + mktval + profmarg
,data = dados[,-1])
#Resumo
summary(fit1)
##
## Call:
## lm(formula = dados$salary ~ age + factor(college) + factor(grad) +
## comten + ceoten + sales + profits + mktval + profmarg, data = dados[,
## -1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -1108.9 -272.7 -104.8 212.1 4485.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 711.48819 401.30075 1.773 0.0781 .
## age 3.17751 5.67010 0.560 0.5760
## factor(college)1 -132.53517 250.59622 -0.529 0.5976
## factor(grad)1 -56.05080 84.82088 -0.661 0.5096
## comten -5.27596 3.91034 -1.349 0.1791
## ceoten 13.75820 6.17652 2.228 0.0273 *
## sales 0.01606 0.01133 1.417 0.1582
## profits 0.10527 0.28315 0.372 0.7105
## mktval 0.02115 0.01606 1.316 0.1898
## profmarg -1.83252 2.33473 -0.785 0.4336
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 534.4 on 167 degrees of freedom
## Multiple R-squared: 0.2153, Adjusted R-squared: 0.173
## F-statistic: 5.091 on 9 and 167 DF, p-value: 4.383e-06
shapiro.test(fit1$residuals)
##
## Shapiro-Wilk normality test
##
## data: fit1$residuals
## W = 0.72149, p-value < 2.2e-16
require(MASS)
## Carregando pacotes exigidos: MASS
## Warning: package 'MASS' was built under R version 4.3.2
require(car)
## Carregando pacotes exigidos: car
## Warning: package 'car' was built under R version 4.3.1
## Carregando pacotes exigidos: carData
## Warning: package 'carData' was built under R version 4.3.1
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
vif(fit1)
## age factor(college) factor(grad) comten ceoten
## 1.405599 1.068617 1.110672 1.424715 1.202431
## sales profits mktval profmarg
## 2.931698 8.084125 6.600523 1.071851
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.3.1
## Carregando pacotes exigidos: zoo
## Warning: package 'zoo' was built under R version 4.3.1
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
bptest(fit1)
##
## studentized Breusch-Pagan test
##
## data: fit1
## BP = 3.1801, df = 9, p-value = 0.9567
par(mfrow=c(1, 2)); par(bg='grey'); plot(fit1, 1:4)
require(MASS)
stepAIC(fit1)
## Start: AIC=2233.2
## dados$salary ~ age + factor(college) + factor(grad) + comten +
## ceoten + sales + profits + mktval + profmarg
##
## Df Sum of Sq RSS AIC
## - profits 1 39469 47723036 2231.3
## - factor(college) 1 79867 47763433 2231.5
## - age 1 89669 47773236 2231.5
## - factor(grad) 1 124684 47808250 2231.7
## - profmarg 1 175903 47859470 2231.8
## - mktval 1 494803 48178369 2233.0
## - comten 1 519787 48203353 2233.1
## <none> 47683567 2233.2
## - sales 1 573686 48257253 2233.3
## - ceoten 1 1416733 49100300 2236.4
##
## Step: AIC=2231.35
## dados$salary ~ age + factor(college) + factor(grad) + comten +
## ceoten + sales + mktval + profmarg
##
## Df Sum of Sq RSS AIC
## - factor(college) 1 85783 47808819 2229.7
## - age 1 89221 47812257 2229.7
## - factor(grad) 1 125951 47848987 2229.8
## - profmarg 1 147994 47871030 2229.9
## - comten 1 507756 48230791 2231.2
## <none> 47723036 2231.3
## - sales 1 861049 48584085 2232.5
## - ceoten 1 1395275 49118310 2234.4
## - mktval 1 2017511 49740547 2236.7
##
## Step: AIC=2229.66
## dados$salary ~ age + factor(grad) + comten + ceoten + sales +
## mktval + profmarg
##
## Df Sum of Sq RSS AIC
## - age 1 110041 47918861 2228.1
## - profmarg 1 146404 47955223 2228.2
## - factor(grad) 1 163314 47972134 2228.3
## - comten 1 491045 48299864 2229.5
## <none> 47808819 2229.7
## - sales 1 857073 48665892 2230.8
## - ceoten 1 1417647 49226466 2232.8
## - mktval 1 2037717 49846536 2235.1
##
## Step: AIC=2228.07
## dados$salary ~ factor(grad) + comten + ceoten + sales + mktval +
## profmarg
##
## Df Sum of Sq RSS AIC
## - profmarg 1 149159 48068020 2226.6
## - factor(grad) 1 167505 48086365 2226.7
## - comten 1 384568 48303429 2227.5
## <none> 47918861 2228.1
## - sales 1 935063 48853923 2229.5
## - ceoten 1 1710449 49629309 2232.3
## - mktval 1 2003113 49921973 2233.3
##
## Step: AIC=2226.62
## dados$salary ~ factor(grad) + comten + ceoten + sales + mktval
##
## Df Sum of Sq RSS AIC
## - factor(grad) 1 162312 48230332 2225.2
## - comten 1 396375 48464394 2226.1
## <none> 48068020 2226.6
## - sales 1 1022114 49090133 2228.3
## - ceoten 1 1685579 49753599 2230.7
## - mktval 1 1902849 49970868 2231.5
##
## Step: AIC=2225.22
## dados$salary ~ comten + ceoten + sales + mktval
##
## Df Sum of Sq RSS AIC
## - comten 1 305000 48535332 2224.3
## <none> 48230332 2225.2
## - sales 1 1046688 49277019 2227.0
## - ceoten 1 1719615 49949946 2229.4
## - mktval 1 1793451 50023782 2229.7
##
## Step: AIC=2224.33
## dados$salary ~ ceoten + sales + mktval
##
## Df Sum of Sq RSS AIC
## <none> 48535332 2224.3
## - sales 1 1003525 49538857 2225.9
## - ceoten 1 1434425 49969758 2227.5
## - mktval 1 1708453 50243786 2228.5
##
## Call:
## lm(formula = dados$salary ~ ceoten + sales + mktval, data = dados[,
## -1])
##
## Coefficients:
## (Intercept) ceoten sales mktval
## 613.43610 12.70337 0.01902 0.02340
fit2=lm(formula = dados$salary ~ ceoten + sales + mktval, data = dados[,
-1])
summary(fit2)
##
## Call:
## lm(formula = dados$salary ~ ceoten + sales + mktval, data = dados[,
## -1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -1131.4 -292.9 -117.9 251.4 4439.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.134e+02 6.524e+01 9.403 <2e-16 ***
## ceoten 1.270e+01 5.618e+00 2.261 0.0250 *
## sales 1.902e-02 1.006e-02 1.891 0.0603 .
## mktval 2.340e-02 9.483e-03 2.468 0.0146 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 529.7 on 173 degrees of freedom
## Multiple R-squared: 0.2013, Adjusted R-squared: 0.1874
## F-statistic: 14.53 on 3 and 173 DF, p-value: 1.744e-08
library(nortest)
## Warning: package 'nortest' was built under R version 4.3.1
library(stargazer)
## Warning: package 'stargazer' was built under R version 4.3.1
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
shapiro.test(rstudent(fit2))
##
## Shapiro-Wilk normality test
##
## data: rstudent(fit2)
## W = 0.63465, p-value < 2.2e-16
library(lmtest)
bptest(fit2)
##
## studentized Breusch-Pagan test
##
## data: fit2
## BP = 1.4258, df = 3, p-value = 0.6995
vif(fit2)
## ceoten sales mktval
## 1.012476 2.351835 2.341153
stargazer(fit2, fit2, type="text")
##
## ===========================================================
## Dependent variable:
## ----------------------------
## salary
## (1) (2)
## -----------------------------------------------------------
## ceoten 12.703** 12.703**
## (5.618) (5.618)
##
## sales 0.019* 0.019*
## (0.010) (0.010)
##
## mktval 0.023** 0.023**
## (0.009) (0.009)
##
## Constant 613.436*** 613.436***
## (65.237) (65.237)
##
## -----------------------------------------------------------
## Observations 177 177
## R2 0.201 0.201
## Adjusted R2 0.187 0.187
## Residual Std. Error (df = 173) 529.671 529.671
## F Statistic (df = 3; 173) 14.532*** 14.532***
## ===========================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
fit3 <- lm( log(dados$salary) ~.
,data = dados[,-1])
summary(fit3)
##
## Call:
## lm(formula = log(dados$salary) ~ ., data = dados[, -1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.27002 -0.30914 0.02067 0.35067 2.08874
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.525e+00 4.106e-01 15.891 <2e-16 ***
## age 2.511e-03 5.802e-03 0.433 0.6657
## college -1.641e-01 2.564e-01 -0.640 0.5231
## grad -4.715e-02 8.679e-02 -0.543 0.5877
## comten -6.959e-03 4.001e-03 -1.739 0.0839 .
## ceoten 1.347e-02 6.320e-03 2.131 0.0345 *
## sales 2.356e-05 1.159e-05 2.033 0.0437 *
## profits 1.117e-04 2.897e-04 0.386 0.7004
## mktval 1.687e-05 1.644e-05 1.027 0.3061
## profmarg -2.699e-03 2.389e-03 -1.130 0.2602
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5468 on 167 degrees of freedom
## Multiple R-squared: 0.2277, Adjusted R-squared: 0.1861
## F-statistic: 5.471 on 9 and 167 DF, p-value: 1.392e-06
shapiro.test(fit3$residuals)
##
## Shapiro-Wilk normality test
##
## data: fit3$residuals
## W = 0.96723, p-value = 0.0003546
library(lmtest)
bptest(fit3)
##
## studentized Breusch-Pagan test
##
## data: fit3
## BP = 6.4754, df = 9, p-value = 0.6916
stepAIC(fit3)
## Start: AIC=-204.01
## log(dados$salary) ~ age + college + grad + comten + ceoten +
## sales + profits + mktval + profmarg
##
## Df Sum of Sq RSS AIC
## - profits 1 0.04443 49.970 -205.86
## - age 1 0.05600 49.982 -205.81
## - grad 1 0.08822 50.014 -205.70
## - college 1 0.12243 50.048 -205.58
## - mktval 1 0.31506 50.241 -204.90
## - profmarg 1 0.38154 50.307 -204.67
## <none> 49.926 -204.01
## - comten 1 0.90421 50.830 -202.84
## - sales 1 1.23508 51.161 -201.69
## - ceoten 1 1.35817 51.284 -201.26
##
## Step: AIC=-205.86
## log(dados$salary) ~ age + college + grad + comten + ceoten +
## sales + mktval + profmarg
##
## Df Sum of Sq RSS AIC
## - age 1 0.05562 50.026 -207.66
## - grad 1 0.08935 50.059 -207.54
## - college 1 0.13022 50.100 -207.40
## - profmarg 1 0.34265 50.313 -206.65
## <none> 49.970 -205.86
## - comten 1 0.88772 50.858 -204.74
## - ceoten 1 1.33555 51.306 -203.19
## - mktval 1 1.44462 51.415 -202.81
## - sales 1 1.75284 51.723 -201.75
##
## Step: AIC=-207.66
## log(dados$salary) ~ college + grad + comten + ceoten + sales +
## mktval + profmarg
##
## Df Sum of Sq RSS AIC
## - grad 1 0.08920 50.115 -209.34
## - college 1 0.15065 50.176 -209.13
## - profmarg 1 0.34586 50.372 -208.44
## <none> 50.026 -207.66
## - comten 1 0.85191 50.878 -206.67
## - mktval 1 1.42327 51.449 -204.69
## - ceoten 1 1.55153 51.577 -204.25
## - sales 1 1.84194 51.868 -203.26
##
## Step: AIC=-209.34
## log(dados$salary) ~ college + comten + ceoten + sales + mktval +
## profmarg
##
## Df Sum of Sq RSS AIC
## - college 1 0.19266 50.308 -210.66
## - profmarg 1 0.34048 50.455 -210.15
## <none> 50.115 -209.34
## - comten 1 0.77422 50.889 -208.63
## - mktval 1 1.35537 51.470 -206.62
## - ceoten 1 1.56808 51.683 -205.89
## - sales 1 1.86753 51.982 -204.87
##
## Step: AIC=-210.66
## log(dados$salary) ~ comten + ceoten + sales + mktval + profmarg
##
## Df Sum of Sq RSS AIC
## - profmarg 1 0.33621 50.644 -211.49
## <none> 50.308 -210.66
## - comten 1 0.68991 50.997 -210.25
## - mktval 1 1.35417 51.662 -207.96
## - ceoten 1 1.64214 51.950 -206.98
## - sales 1 1.87810 52.186 -206.18
##
## Step: AIC=-211.49
## log(dados$salary) ~ comten + ceoten + sales + mktval
##
## Df Sum of Sq RSS AIC
## <none> 50.644 -211.49
## - comten 1 0.71826 51.362 -210.99
## - mktval 1 1.21798 51.862 -209.28
## - ceoten 1 1.60432 52.248 -207.97
## - sales 1 2.06144 52.705 -206.42
##
## Call:
## lm(formula = log(dados$salary) ~ comten + ceoten + sales + mktval,
## data = dados[, -1])
##
## Coefficients:
## (Intercept) comten ceoten sales mktval
## 6.427e+00 -5.535e-03 1.418e-02 2.728e-05 1.980e-05
mf=lm(formula = log(dados$salary) ~ comten + ceoten + sales + mktval,
data = dados[, -1])
summary(mf)
##
## Call:
## lm(formula = log(dados$salary) ~ comten + ceoten + sales + mktval,
## data = dados[, -1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.32020 -0.32179 0.00244 0.37996 2.10120
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.427e+00 9.025e-02 71.214 <2e-16 ***
## comten -5.535e-03 3.544e-03 -1.562 0.1202
## ceoten 1.418e-02 6.074e-03 2.334 0.0207 *
## sales 2.728e-05 1.031e-05 2.646 0.0089 **
## mktval 1.980e-05 9.734e-06 2.034 0.0435 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5426 on 172 degrees of freedom
## Multiple R-squared: 0.2166, Adjusted R-squared: 0.1984
## F-statistic: 11.89 on 4 and 172 DF, p-value: 1.498e-08
shapiro.test(mf$residuals)
##
## Shapiro-Wilk normality test
##
## data: mf$residuals
## W = 0.96669, p-value = 0.0003091
#install.packages("lmtest")
# Carregar o pacote
library(lmtest)
bptest(mf)
##
## studentized Breusch-Pagan test
##
## data: mf
## BP = 5.4822, df = 4, p-value = 0.2413
require(MASS)
require(car)
vif(mf)
## comten ceoten sales mktval
## 1.134659 1.127529 2.355614 2.350433
par(mfrow=c(1, 2)); par(bg='grey'); plot(mf, 1:4)
summary(mf)
##
## Call:
## lm(formula = log(dados$salary) ~ comten + ceoten + sales + mktval,
## data = dados[, -1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.32020 -0.32179 0.00244 0.37996 2.10120
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.427e+00 9.025e-02 71.214 <2e-16 ***
## comten -5.535e-03 3.544e-03 -1.562 0.1202
## ceoten 1.418e-02 6.074e-03 2.334 0.0207 *
## sales 2.728e-05 1.031e-05 2.646 0.0089 **
## mktval 1.980e-05 9.734e-06 2.034 0.0435 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5426 on 172 degrees of freedom
## Multiple R-squared: 0.2166, Adjusted R-squared: 0.1984
## F-statistic: 11.89 on 4 and 172 DF, p-value: 1.498e-08
dados2=dados[-c(37,69,153), -1]
mf2=lm(formula = log(dados2$salary) ~ dados2$comten+dados2$ceoten + dados2$sales + dados2$mktval, data = dados2)
shapiro.test(mf2$residuals)
##
## Shapiro-Wilk normality test
##
## data: mf2$residuals
## W = 0.99059, p-value = 0.3087
#install.packages("lmtest")
# Carregar o pacote
library(lmtest)
bptest(mf2)
##
## studentized Breusch-Pagan test
##
## data: mf2
## BP = 3.5621, df = 4, p-value = 0.4685
require(MASS)
require(car)
vif(mf2)
## dados2$comten dados2$ceoten dados2$sales dados2$mktval
## 1.129297 1.120444 1.858221 1.863486
par(mfrow=c(1, 2)); par(bg='grey'); plot(mf2, 1:4)
summary(mf)
##
## Call:
## lm(formula = log(dados$salary) ~ comten + ceoten + sales + mktval,
## data = dados[, -1])
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.32020 -0.32179 0.00244 0.37996 2.10120
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.427e+00 9.025e-02 71.214 <2e-16 ***
## comten -5.535e-03 3.544e-03 -1.562 0.1202
## ceoten 1.418e-02 6.074e-03 2.334 0.0207 *
## sales 2.728e-05 1.031e-05 2.646 0.0089 **
## mktval 1.980e-05 9.734e-06 2.034 0.0435 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5426 on 172 degrees of freedom
## Multiple R-squared: 0.2166, Adjusted R-squared: 0.1984
## F-statistic: 11.89 on 4 and 172 DF, p-value: 1.498e-08
summary(mf2)
##
## Call:
## lm(formula = log(dados2$salary) ~ dados2$comten + dados2$ceoten +
## dados2$sales + dados2$mktval, data = dados2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.60284 -0.27949 -0.01643 0.35236 1.36758
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.406e+00 8.125e-02 78.849 < 2e-16 ***
## dados2$comten -7.760e-03 3.169e-03 -2.449 0.015351 *
## dados2$ceoten 1.860e-02 5.489e-03 3.388 0.000877 ***
## dados2$sales 3.321e-05 1.009e-05 3.290 0.001219 **
## dados2$mktval 2.661e-05 8.704e-06 3.057 0.002603 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4815 on 169 degrees of freedom
## Multiple R-squared: 0.2966, Adjusted R-squared: 0.2799
## F-statistic: 17.81 on 4 and 169 DF, p-value: 3.201e-12
Com base nos testes realizados, os pressupostos de normalidade e homocedasticidade parecem ser razoavelmente atendidos pelos resíduos do seu modelo e de multicolinearidade também.
melhor modelo: log(dados2\(salary) ~ dados2\)comten+dados2\(ceoten + dados2\)sales + dados2$mktval, data = dados2)
exp(mf2$coefficients)
## (Intercept) dados2$comten dados2$ceoten dados2$sales dados2$mktval
## 605.7028863 0.9922699 1.0187690 1.0000332 1.0000266
mf2$coefficients
## (Intercept) dados2$comten dados2$ceoten dados2$sales dados2$mktval
## 6.406390e+00 -7.760157e-03 1.859504e-02 3.320838e-05 2.660456e-05
par(mfrow=c(1, 2)); par(bg='grey'); plot(mf2, 1:4)