library(tidyverse)
summary(FeeMW_NM)
CIK AFEE TA DA ROA
Min. : 2135 Min. : 10000 Min. :1.000e+06 Min. :0.020 Min. :-1.000
1st Qu.: 831451 1st Qu.: 190000 1st Qu.:2.844e+07 1st Qu.:0.310 1st Qu.:-0.240
Median :1036848 Median : 730000 Median :1.799e+08 Median :0.530 Median :-0.020
Mean : 978860 Mean : 1655960 Mean :1.729e+09 Mean :0.611 Mean :-0.169
3rd Qu.:1318807 3rd Qu.: 1780000 3rd Qu.:7.491e+08 3rd Qu.:0.765 3rd Qu.: 0.030
Max. :1652923 Max. :85800000 Max. :6.972e+11 Max. :2.000 Max. : 0.400
NA's :2
LOSS SURP GC ACCF AUDCHG
Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :0.0000 Min. :0.0000
1st Qu.:0.0000 1st Qu.:1.0000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000
Median :1.0000 Median :1.0000 Median :0.000 Median :1.0000 Median :0.0000
Mean :0.5609 Mean :0.7802 Mean :0.186 Mean :0.5889 Mean :0.1796
3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.000 3rd Qu.:1.0000 3rd Qu.:0.0000
Max. :1.0000 Max. :1.0000 Max. :1.000 Max. :1.0000 Max. :1.0000
NTK RESTT NEWCEO NEWCFO SIC
Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000 Min. : 100
1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:2860
Median :0.0000 Median :0.0000 Median :0.0000 Median :0.000 Median :3829
Mean :0.1405 Mean :0.1878 Mean :0.1971 Mean :0.316 Mean :4534
3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:1.000 3rd Qu.:5990
Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000 Max. :8900
YEAR FYEND AUDITOR BIG4 AUDSIZE
Min. :2004 Min. : 101 Min. :1.000 Min. :0.0000 Min. :1.000
1st Qu.:2006 1st Qu.: 930 1st Qu.:2.000 1st Qu.:0.0000 1st Qu.:1.000
Median :2011 Median :1231 Median :4.000 Median :1.0000 Median :3.000
Mean :2010 Mean :1039 Mean :3.953 Mean :0.5499 Mean :2.182
3rd Qu.:2013 3rd Qu.:1231 3rd Qu.:6.000 3rd Qu.:1.0000 3rd Qu.:3.000
Max. :2016 Max. :1231 Max. :6.000 Max. :1.0000 Max. :3.000
NA's :5
MWNUM
Min. :1.000
1st Qu.:1.000
Median :1.000
Mean :1.759
3rd Qu.:2.000
Max. :5.000
LF<-log(FeeMW_NM$AFEE)
LA<-log(FeeMW_NM$TA)
D<-FeeMW_NM$DA
C<-FeeMW_NM$CIK
S<-FeeMW_NM$SIC
RA<-FeeMW_NM$ROA
YR<-FeeMW_NM$YEAR
YE<-FeeMW_NM$FYEND
LS<-factor(FeeMW_NM$LOSS)
B4<-factor(FeeMW_NM$BIG4)
S<-factor(FeeMW_NM$SURP)
G<-factor(FeeMW_NM$GC)
AF<-factor(FeeMW_NM$ACCF)
AC<-factor(FeeMW_NM$AUDCHG)
NT<-factor(FeeMW_NM$NTK)
RE<-factor(FeeMW_NM$RESTT)
CEO<-factor(FeeMW_NM$NEWCEO)
CFO<-factor(FeeMW_NM$NEWCFO)
MW<-FeeMW_NM$MWNUM
AS<-factor(FeeMW_NM$AUDSIZE)
AD<-factor(FeeMW_NM$AUDITOR)
head(FeeMW_NM)
result1<-lm(LF~LA+B4+LS)
summary(result1)
Call:
lm(formula = LF ~ LA + B4 + LS)
Residuals:
Min 1Q Median 3Q Max
-3.8637 -0.4348 0.0125 0.4712 2.6643
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.62320 0.18814 19.258 <2e-16 ***
LA 0.48866 0.01040 47.007 <2e-16 ***
B41 0.75264 0.04716 15.961 <2e-16 ***
LS1 0.09668 0.03800 2.544 0.011 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.7272 on 1709 degrees of freedom
(2 observations deleted due to missingness)
Multiple R-squared: 0.7878, Adjusted R-squared: 0.7874
F-statistic: 2115 on 3 and 1709 DF, p-value: < 2.2e-16
cINT1= 3.62320
cLA1= 0.48866
cB41= 0.75264
cLS1= 0.09668
nLA=log(600000000)
B4yes1=exp(cINT1+cLA1*nLA+cB41*1+cLS1*0)
print(B4yes1)
[1] 1548583
B4no1=exp(cINT1+cLA1*nLA1+cB41*0+cLS1*0)
print(B4no1)
[1] 729570.3
Big 4 Premium Dollar Amount
Q1d=B4yes1-B4no1
print(Q1d)
[1] 819012.9
Big 4 Premium Percentage(displayed as a decimal)
Q1p=B4yes1/B4no1
print(Q1p)
[1] 2.122596
MFG<-subset(FeeMW_NM, SIC>1999 & SIC<4000)
MB4<-factor(MFG$BIG4)
MFS<-factor(MFG$LOSS)
MLF<-log(MFG$AFEE)
MLA<-log(MFG$TA)
MSP<- factor(MFG$SURP)
resultMFG<-lm(MLF~MLA+MB4+MFS+MSP)
summary(resultMFG)
Call:
lm(formula = MLF ~ MLA + MB4 + MFS + MSP)
Residuals:
Min 1Q Median 3Q Max
-4.0367 -0.3801 0.0048 0.4169 2.3339
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.34039 0.27435 12.176 < 2e-16 ***
MLA 0.51486 0.01473 34.965 < 2e-16 ***
MB41 0.66589 0.06470 10.292 < 2e-16 ***
MFS1 0.17898 0.05433 3.294 0.00103 **
MSP1 -0.12803 0.05959 -2.149 0.03197 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.6909 on 789 degrees of freedom
(1 observation deleted due to missingness)
Multiple R-squared: 0.8042, Adjusted R-squared: 0.8032
F-statistic: 810.1 on 4 and 789 DF, p-value: < 2.2e-16
cINT2= 3.34039
cMLA2= 0.51486
cMB42=0.66589
cMLS2=0.17898
cMSP2=-0.12803
Q2Ayes=exp(cINT2+cMLA2*nLA+cMB42*1+cMLS2*1+cMSP2*1)
print(Q2Ayes)
[1] 1912280
Q2Ano=exp(cINT2+cMLA2*nLA+cMB42*1+cMLS2*1+cMSP2*0)
print(Q2Ano)
[1] 2173473
2A) If no surprise price will be higher by:
Q2A=Q2Ano-Q2Ayes
print(Q2A)
[1] 261192.8
2B)Expected audit fees for a company with $750 million in assets that is audited by a Big 4 firm, reports a loss for the year, and has a surprise material weakness in internal controls?
Q2BLA<-log(750000000)
Q2B=exp(cINT2+cMLA2*Q2BLA+cMB42*1+cMLS2*1+cMSP2*1)
print(Q2B)
[1] 2145096
2C)
Q2C2=exp(cINT2+cMLA2*Q2BLA+cMB42*1+cMLS2*0+cMSP2*0)
print(Q2C2)
[1] 2038540
The company in B pays more that C by
Q2C=Q2B-Q2C2
print(Q2C)
[1] 106555.1
result3.1<-lm(LF~LA+B4+MW+RA)
summary(result3.1)
Call:
lm(formula = LF ~ LA + B4 + MW + RA)
Residuals:
Min 1Q Median 3Q Max
-4.1033 -0.4060 0.0243 0.4542 2.4660
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.89302 0.19887 14.547 < 2e-16 ***
LA 0.51684 0.01100 46.971 < 2e-16 ***
B41 0.75033 0.04584 16.368 < 2e-16 ***
MW 0.10365 0.01485 6.981 4.17e-12 ***
RA -0.43841 0.05967 -7.347 3.12e-13 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.7067 on 1708 degrees of freedom
(2 observations deleted due to missingness)
Multiple R-squared: 0.7997, Adjusted R-squared: 0.7993
F-statistic: 1705 on 4 and 1708 DF, p-value: < 2.2e-16
result3.2<-lm(LF~LA+MW+G+NT+RA)
summary(result3.2)
Call:
lm(formula = LF ~ LA + MW + G + NT + RA)
Residuals:
Min 1Q Median 3Q Max
-4.5862 -0.4480 0.0241 0.4990 2.4802
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.673041 0.194374 8.607 < 2e-16 ***
LA 0.606096 0.009837 61.616 < 2e-16 ***
MW 0.101664 0.015839 6.419 1.78e-10 ***
G1 -0.371382 0.061006 -6.088 1.41e-09 ***
NT1 -0.054596 0.054706 -0.998 0.318
RA -0.650325 0.069839 -9.312 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.7513 on 1707 degrees of freedom
(2 observations deleted due to missingness)
Multiple R-squared: 0.7737, Adjusted R-squared: 0.7731
F-statistic: 1167 on 5 and 1707 DF, p-value: < 2.2e-16
result3.3<-lm(LF~LA+CFO+RA)
summary(result3.3)
Call:
lm(formula = LF ~ LA + CFO + RA)
Residuals:
Min 1Q Median 3Q Max
-4.3235 -0.4422 0.0173 0.4893 2.9098
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.379364 0.185822 7.423 1.80e-13 ***
LA 0.626077 0.009506 65.864 < 2e-16 ***
CFO1 0.160322 0.039801 4.028 5.87e-05 ***
RA -0.471367 0.064510 -7.307 4.18e-13 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.764 on 1709 degrees of freedom
(2 observations deleted due to missingness)
Multiple R-squared: 0.7658, Adjusted R-squared: 0.7653
F-statistic: 1862 on 3 and 1709 DF, p-value: < 2.2e-16
result3.4<-lm(LF~LA+AC+RA)
summary(result3.4)
Call:
lm(formula = LF ~ LA + AC + RA)
Residuals:
Min 1Q Median 3Q Max
-4.2084 -0.4522 0.0145 0.5043 2.8651
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.394893 0.188632 7.395 2.21e-13 ***
LA 0.627614 0.009599 65.380 < 2e-16 ***
AC1 0.019669 0.048672 0.404 0.686
RA -0.486763 0.064700 -7.523 8.58e-14 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.7676 on 1709 degrees of freedom
(2 observations deleted due to missingness)
Multiple R-squared: 0.7636, Adjusted R-squared: 0.7631
F-statistic: 1840 on 3 and 1709 DF, p-value: < 2.2e-16
Result 3.1 had the best result with a standar error of 0.7067. Those are the variables I belived would have the biggest impact on the fees. I tried to see if other asspects would impact fees as much but they did not.
summary(result3.1)
Call:
lm(formula = LF ~ LA + B4 + MW + RA)
Residuals:
Min 1Q Median 3Q Max
-4.1033 -0.4060 0.0243 0.4542 2.4660
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.89302 0.19887 14.547 < 2e-16 ***
LA 0.51684 0.01100 46.971 < 2e-16 ***
B41 0.75033 0.04584 16.368 < 2e-16 ***
MW 0.10365 0.01485 6.981 4.17e-12 ***
RA -0.43841 0.05967 -7.347 3.12e-13 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.7067 on 1708 degrees of freedom
(2 observations deleted due to missingness)
Multiple R-squared: 0.7997, Adjusted R-squared: 0.7993
F-statistic: 1705 on 4 and 1708 DF, p-value: < 2.2e-16