library(tidyverse)
summary(FeeMW_NM)
      CIK               AFEE                TA                  DA             ROA        
 Min.   :   2135   Min.   :   10000   Min.   :1.000e+06   Min.   :0.020   Min.   :-1.000  
 1st Qu.: 831451   1st Qu.:  190000   1st Qu.:2.844e+07   1st Qu.:0.310   1st Qu.:-0.240  
 Median :1036848   Median :  730000   Median :1.799e+08   Median :0.530   Median :-0.020  
 Mean   : 978860   Mean   : 1655960   Mean   :1.729e+09   Mean   :0.611   Mean   :-0.169  
 3rd Qu.:1318807   3rd Qu.: 1780000   3rd Qu.:7.491e+08   3rd Qu.:0.765   3rd Qu.: 0.030  
 Max.   :1652923   Max.   :85800000   Max.   :6.972e+11   Max.   :2.000   Max.   : 0.400  
                   NA's   :2                                                              
      LOSS             SURP              GC             ACCF            AUDCHG      
 Min.   :0.0000   Min.   :0.0000   Min.   :0.000   Min.   :0.0000   Min.   :0.0000  
 1st Qu.:0.0000   1st Qu.:1.0000   1st Qu.:0.000   1st Qu.:0.0000   1st Qu.:0.0000  
 Median :1.0000   Median :1.0000   Median :0.000   Median :1.0000   Median :0.0000  
 Mean   :0.5609   Mean   :0.7802   Mean   :0.186   Mean   :0.5889   Mean   :0.1796  
 3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.000   3rd Qu.:1.0000   3rd Qu.:0.0000  
 Max.   :1.0000   Max.   :1.0000   Max.   :1.000   Max.   :1.0000   Max.   :1.0000  
                                                                                    
      NTK             RESTT            NEWCEO           NEWCFO           SIC      
 Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.000   Min.   : 100  
 1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:2860  
 Median :0.0000   Median :0.0000   Median :0.0000   Median :0.000   Median :3829  
 Mean   :0.1405   Mean   :0.1878   Mean   :0.1971   Mean   :0.316   Mean   :4534  
 3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:1.000   3rd Qu.:5990  
 Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.000   Max.   :8900  
                                                                                  
      YEAR          FYEND         AUDITOR           BIG4           AUDSIZE     
 Min.   :2004   Min.   : 101   Min.   :1.000   Min.   :0.0000   Min.   :1.000  
 1st Qu.:2006   1st Qu.: 930   1st Qu.:2.000   1st Qu.:0.0000   1st Qu.:1.000  
 Median :2011   Median :1231   Median :4.000   Median :1.0000   Median :3.000  
 Mean   :2010   Mean   :1039   Mean   :3.953   Mean   :0.5499   Mean   :2.182  
 3rd Qu.:2013   3rd Qu.:1231   3rd Qu.:6.000   3rd Qu.:1.0000   3rd Qu.:3.000  
 Max.   :2016   Max.   :1231   Max.   :6.000   Max.   :1.0000   Max.   :3.000  
                NA's   :5                                                      
     MWNUM      
 Min.   :1.000  
 1st Qu.:1.000  
 Median :1.000  
 Mean   :1.759  
 3rd Qu.:2.000  
 Max.   :5.000  
                
LF<-log(FeeMW_NM$AFEE)
LA<-log(FeeMW_NM$TA)
D<-FeeMW_NM$DA
C<-FeeMW_NM$CIK
S<-FeeMW_NM$SIC
RA<-FeeMW_NM$ROA
YR<-FeeMW_NM$YEAR
YE<-FeeMW_NM$FYEND
LS<-factor(FeeMW_NM$LOSS)
B4<-factor(FeeMW_NM$BIG4)
S<-factor(FeeMW_NM$SURP)
G<-factor(FeeMW_NM$GC)
AF<-factor(FeeMW_NM$ACCF)
AC<-factor(FeeMW_NM$AUDCHG)
NT<-factor(FeeMW_NM$NTK)
RE<-factor(FeeMW_NM$RESTT)
CEO<-factor(FeeMW_NM$NEWCEO)
CFO<-factor(FeeMW_NM$NEWCFO)
MW<-FeeMW_NM$MWNUM
AS<-factor(FeeMW_NM$AUDSIZE)
AD<-factor(FeeMW_NM$AUDITOR)
head(FeeMW_NM)

Question 1

result1<-lm(LF~LA+B4+LS)
summary(result1)

Call:
lm(formula = LF ~ LA + B4 + LS)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.8637 -0.4348  0.0125  0.4712  2.6643 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  3.62320    0.18814  19.258   <2e-16 ***
LA           0.48866    0.01040  47.007   <2e-16 ***
B41          0.75264    0.04716  15.961   <2e-16 ***
LS1          0.09668    0.03800   2.544    0.011 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.7272 on 1709 degrees of freedom
  (2 observations deleted due to missingness)
Multiple R-squared:  0.7878,    Adjusted R-squared:  0.7874 
F-statistic:  2115 on 3 and 1709 DF,  p-value: < 2.2e-16
cINT1= 3.62320
cLA1= 0.48866
cB41= 0.75264
cLS1= 0.09668
nLA=log(600000000)
B4yes1=exp(cINT1+cLA1*nLA+cB41*1+cLS1*0)
print(B4yes1)
[1] 1548583
B4no1=exp(cINT1+cLA1*nLA1+cB41*0+cLS1*0)
print(B4no1)
[1] 729570.3

Big 4 Premium Dollar Amount

Q1d=B4yes1-B4no1
print(Q1d)
[1] 819012.9

Big 4 Premium Percentage(displayed as a decimal)

Q1p=B4yes1/B4no1
print(Q1p)
[1] 2.122596

Question 2

MFG<-subset(FeeMW_NM, SIC>1999 & SIC<4000)
MB4<-factor(MFG$BIG4)
MFS<-factor(MFG$LOSS)
MLF<-log(MFG$AFEE)
MLA<-log(MFG$TA)
MSP<- factor(MFG$SURP)
resultMFG<-lm(MLF~MLA+MB4+MFS+MSP)
summary(resultMFG)

Call:
lm(formula = MLF ~ MLA + MB4 + MFS + MSP)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.0367 -0.3801  0.0048  0.4169  2.3339 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  3.34039    0.27435  12.176  < 2e-16 ***
MLA          0.51486    0.01473  34.965  < 2e-16 ***
MB41         0.66589    0.06470  10.292  < 2e-16 ***
MFS1         0.17898    0.05433   3.294  0.00103 ** 
MSP1        -0.12803    0.05959  -2.149  0.03197 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.6909 on 789 degrees of freedom
  (1 observation deleted due to missingness)
Multiple R-squared:  0.8042,    Adjusted R-squared:  0.8032 
F-statistic: 810.1 on 4 and 789 DF,  p-value: < 2.2e-16
cINT2= 3.34039
cMLA2= 0.51486 
cMB42=0.66589
cMLS2=0.17898
cMSP2=-0.12803
Q2Ayes=exp(cINT2+cMLA2*nLA+cMB42*1+cMLS2*1+cMSP2*1)
print(Q2Ayes)
[1] 1912280
Q2Ano=exp(cINT2+cMLA2*nLA+cMB42*1+cMLS2*1+cMSP2*0)
print(Q2Ano)
[1] 2173473

2A) If no surprise price will be higher by:

Q2A=Q2Ano-Q2Ayes
print(Q2A)
[1] 261192.8

2B)Expected audit fees for a company with $750 million in assets that is audited by a Big 4 firm, reports a loss for the year, and has a surprise material weakness in internal controls?

Q2BLA<-log(750000000)
Q2B=exp(cINT2+cMLA2*Q2BLA+cMB42*1+cMLS2*1+cMSP2*1)
print(Q2B)
[1] 2145096

2C)

Q2C2=exp(cINT2+cMLA2*Q2BLA+cMB42*1+cMLS2*0+cMSP2*0)
print(Q2C2)
[1] 2038540

The company in B pays more that C by

Q2C=Q2B-Q2C2
print(Q2C)
[1] 106555.1

Question 3

result3.1<-lm(LF~LA+B4+MW+RA)
summary(result3.1)

Call:
lm(formula = LF ~ LA + B4 + MW + RA)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.1033 -0.4060  0.0243  0.4542  2.4660 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  2.89302    0.19887  14.547  < 2e-16 ***
LA           0.51684    0.01100  46.971  < 2e-16 ***
B41          0.75033    0.04584  16.368  < 2e-16 ***
MW           0.10365    0.01485   6.981 4.17e-12 ***
RA          -0.43841    0.05967  -7.347 3.12e-13 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.7067 on 1708 degrees of freedom
  (2 observations deleted due to missingness)
Multiple R-squared:  0.7997,    Adjusted R-squared:  0.7993 
F-statistic:  1705 on 4 and 1708 DF,  p-value: < 2.2e-16
result3.2<-lm(LF~LA+MW+G+NT+RA)
summary(result3.2)

Call:
lm(formula = LF ~ LA + MW + G + NT + RA)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.5862 -0.4480  0.0241  0.4990  2.4802 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  1.673041   0.194374   8.607  < 2e-16 ***
LA           0.606096   0.009837  61.616  < 2e-16 ***
MW           0.101664   0.015839   6.419 1.78e-10 ***
G1          -0.371382   0.061006  -6.088 1.41e-09 ***
NT1         -0.054596   0.054706  -0.998    0.318    
RA          -0.650325   0.069839  -9.312  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.7513 on 1707 degrees of freedom
  (2 observations deleted due to missingness)
Multiple R-squared:  0.7737,    Adjusted R-squared:  0.7731 
F-statistic:  1167 on 5 and 1707 DF,  p-value: < 2.2e-16
result3.3<-lm(LF~LA+CFO+RA)
summary(result3.3)

Call:
lm(formula = LF ~ LA + CFO + RA)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.3235 -0.4422  0.0173  0.4893  2.9098 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  1.379364   0.185822   7.423 1.80e-13 ***
LA           0.626077   0.009506  65.864  < 2e-16 ***
CFO1         0.160322   0.039801   4.028 5.87e-05 ***
RA          -0.471367   0.064510  -7.307 4.18e-13 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.764 on 1709 degrees of freedom
  (2 observations deleted due to missingness)
Multiple R-squared:  0.7658,    Adjusted R-squared:  0.7653 
F-statistic:  1862 on 3 and 1709 DF,  p-value: < 2.2e-16
result3.4<-lm(LF~LA+AC+RA)
summary(result3.4)

Call:
lm(formula = LF ~ LA + AC + RA)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.2084 -0.4522  0.0145  0.5043  2.8651 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  1.394893   0.188632   7.395 2.21e-13 ***
LA           0.627614   0.009599  65.380  < 2e-16 ***
AC1          0.019669   0.048672   0.404    0.686    
RA          -0.486763   0.064700  -7.523 8.58e-14 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.7676 on 1709 degrees of freedom
  (2 observations deleted due to missingness)
Multiple R-squared:  0.7636,    Adjusted R-squared:  0.7631 
F-statistic:  1840 on 3 and 1709 DF,  p-value: < 2.2e-16

Result 3.1 had the best result with a standar error of 0.7067. Those are the variables I belived would have the biggest impact on the fees. I tried to see if other asspects would impact fees as much but they did not.

summary(result3.1)

Call:
lm(formula = LF ~ LA + B4 + MW + RA)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.1033 -0.4060  0.0243  0.4542  2.4660 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  2.89302    0.19887  14.547  < 2e-16 ***
LA           0.51684    0.01100  46.971  < 2e-16 ***
B41          0.75033    0.04584  16.368  < 2e-16 ***
MW           0.10365    0.01485   6.981 4.17e-12 ***
RA          -0.43841    0.05967  -7.347 3.12e-13 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.7067 on 1708 degrees of freedom
  (2 observations deleted due to missingness)
Multiple R-squared:  0.7997,    Adjusted R-squared:  0.7993 
F-statistic:  1705 on 4 and 1708 DF,  p-value: < 2.2e-16
LS0tCnRpdGxlOiAiQUNHIDY0NTUgLSBQcm9qZWN0IDIiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCmBgYHtyfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKc3VtbWFyeShGZWVNV19OTSkKYGBgCgpgYGB7cn0KTEY8LWxvZyhGZWVNV19OTSRBRkVFKQpMQTwtbG9nKEZlZU1XX05NJFRBKQpEPC1GZWVNV19OTSREQQpDPC1GZWVNV19OTSRDSUsKUzwtRmVlTVdfTk0kU0lDClJBPC1GZWVNV19OTSRST0EKWVI8LUZlZU1XX05NJFlFQVIKWUU8LUZlZU1XX05NJEZZRU5ECkxTPC1mYWN0b3IoRmVlTVdfTk0kTE9TUykKQjQ8LWZhY3RvcihGZWVNV19OTSRCSUc0KQpTPC1mYWN0b3IoRmVlTVdfTk0kU1VSUCkKRzwtZmFjdG9yKEZlZU1XX05NJEdDKQpBRjwtZmFjdG9yKEZlZU1XX05NJEFDQ0YpCkFDPC1mYWN0b3IoRmVlTVdfTk0kQVVEQ0hHKQpOVDwtZmFjdG9yKEZlZU1XX05NJE5USykKUkU8LWZhY3RvcihGZWVNV19OTSRSRVNUVCkKQ0VPPC1mYWN0b3IoRmVlTVdfTk0kTkVXQ0VPKQpDRk88LWZhY3RvcihGZWVNV19OTSRORVdDRk8pCk1XPC1GZWVNV19OTSRNV05VTQpBUzwtZmFjdG9yKEZlZU1XX05NJEFVRFNJWkUpCkFEPC1mYWN0b3IoRmVlTVdfTk0kQVVESVRPUikKYGBgCgpgYGB7cn0KaGVhZChGZWVNV19OTSkKYGBgCiMgUXVlc3Rpb24gMQpgYGB7cn0KcmVzdWx0MTwtbG0oTEZ+TEErQjQrTFMpCnN1bW1hcnkocmVzdWx0MSkKYGBgCgpgYGB7cn0KY0lOVDE9IDMuNjIzMjAKY0xBMT0gMC40ODg2NgpjQjQxPSAwLjc1MjY0CmNMUzE9IDAuMDk2NjgKCm5MQT1sb2coNjAwMDAwMDAwKQpgYGAKCmBgYHtyfQpCNHllczE9ZXhwKGNJTlQxK2NMQTEqbkxBK2NCNDEqMStjTFMxKjApCnByaW50KEI0eWVzMSkKYGBgCgpgYGB7cn0KQjRubzE9ZXhwKGNJTlQxK2NMQTEqbkxBMStjQjQxKjArY0xTMSowKQpwcmludChCNG5vMSkKYGBgCkJpZyA0IFByZW1pdW0gRG9sbGFyIEFtb3VudApgYGB7cn0KUTFkPUI0eWVzMS1CNG5vMQpwcmludChRMWQpCmBgYApCaWcgNCBQcmVtaXVtIFBlcmNlbnRhZ2UoZGlzcGxheWVkIGFzIGEgZGVjaW1hbCkKYGBge3J9ClExcD1CNHllczEvQjRubzEKcHJpbnQoUTFwKQpgYGAKCiMgUXVlc3Rpb24gMgpgYGB7cn0KTUZHPC1zdWJzZXQoRmVlTVdfTk0sIFNJQz4xOTk5ICYgU0lDPDQwMDApCk1CNDwtZmFjdG9yKE1GRyRCSUc0KQpNRlM8LWZhY3RvcihNRkckTE9TUykKTUxGPC1sb2coTUZHJEFGRUUpCk1MQTwtbG9nKE1GRyRUQSkKTVNQPC0gZmFjdG9yKE1GRyRTVVJQKQpgYGAKCmBgYHtyfQpyZXN1bHRNRkc8LWxtKE1MRn5NTEErTUI0K01GUytNU1ApCnN1bW1hcnkocmVzdWx0TUZHKQpgYGAKCmBgYHtyfQpjSU5UMj0gMy4zNDAzOQpjTUxBMj0gMC41MTQ4NiAKY01CNDI9MC42NjU4OQpjTUxTMj0wLjE3ODk4CmNNU1AyPS0wLjEyODAzCmBgYAoKYGBge3J9ClEyQXllcz1leHAoY0lOVDIrY01MQTIqbkxBK2NNQjQyKjErY01MUzIqMStjTVNQMioxKQpwcmludChRMkF5ZXMpCmBgYAoKYGBge3J9ClEyQW5vPWV4cChjSU5UMitjTUxBMipuTEErY01CNDIqMStjTUxTMioxK2NNU1AyKjApCnByaW50KFEyQW5vKQpgYGAKMkEpIElmIG5vIHN1cnByaXNlIHByaWNlIHdpbGwgYmUgaGlnaGVyIGJ5OgpgYGB7cn0KUTJBPVEyQW5vLVEyQXllcwpwcmludChRMkEpCmBgYAoyQilFeHBlY3RlZCBhdWRpdCBmZWVzIGZvciBhIGNvbXBhbnkgd2l0aCAkNzUwIG1pbGxpb24gaW4gYXNzZXRzIHRoYXQgaXMgYXVkaXRlZCBieSBhIEJpZyA0IGZpcm0sIHJlcG9ydHMgYSBsb3NzIGZvciB0aGUgeWVhciwgYW5kIGhhcyBhIHN1cnByaXNlIG1hdGVyaWFsIHdlYWtuZXNzIGluIGludGVybmFsIGNvbnRyb2xzPwpgYGB7cn0KUTJCTEE8LWxvZyg3NTAwMDAwMDApClEyQj1leHAoY0lOVDIrY01MQTIqUTJCTEErY01CNDIqMStjTUxTMioxK2NNU1AyKjEpCnByaW50KFEyQikKYGBgCjJDKQpgYGB7cn0KUTJDMj1leHAoY0lOVDIrY01MQTIqUTJCTEErY01CNDIqMStjTUxTMiowK2NNU1AyKjApCnByaW50KFEyQzIpCmBgYApUaGUgY29tcGFueSBpbiBCIHBheXMgbW9yZSB0aGF0IEMgYnkKYGBge3J9ClEyQz1RMkItUTJDMgpwcmludChRMkMpCmBgYAojIFF1ZXN0aW9uIDMKYGBge3J9CnJlc3VsdDMuMTwtbG0oTEZ+TEErQjQrTVcrUkEpCnN1bW1hcnkocmVzdWx0My4xKQpgYGAKCmBgYHtyfQpyZXN1bHQzLjI8LWxtKExGfkxBK01XK0crTlQrUkEpCnN1bW1hcnkocmVzdWx0My4yKQpgYGAKCmBgYHtyfQpyZXN1bHQzLjM8LWxtKExGfkxBK0NGTytSQSkKc3VtbWFyeShyZXN1bHQzLjMpCmBgYAoKYGBge3J9CnJlc3VsdDMuNDwtbG0oTEZ+TEErQUMrUkEpCnN1bW1hcnkocmVzdWx0My40KQpgYGAKClJlc3VsdCAzLjEgaGFkIHRoZSBiZXN0IHJlc3VsdCB3aXRoIGEgc3RhbmRhciBlcnJvciBvZiAwLjcwNjcuIFRob3NlIGFyZSB0aGUgdmFyaWFibGVzIEkgYmVsaXZlZCB3b3VsZCBoYXZlIHRoZSBiaWdnZXN0IGltcGFjdCBvbiB0aGUgZmVlcy4gSSB0cmllZCB0byBzZWUgaWYgb3RoZXIgYXNzcGVjdHMgd291bGQgaW1wYWN0IGZlZXMgYXMgbXVjaCBidXQgdGhleSBkaWQgbm90LgpgYGB7cn0Kc3VtbWFyeShyZXN1bHQzLjEpCmBgYAo=