加载经常用的R包
library(pacman)
p_load(tidyverse,stringr,DT,skimr,DataExplorer,grf,glmnet,caret,tidytext,
explore,patchwork,ggrepel,ggcorrplot,gghighlight,ggthemes,fpp2,
forecast,magrittr,readxl,writexl,listviewer,car,tseries,vtable)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
## [1] "wage" "edu" "wage0" "gender" "minority" "job"
##
## Call:
## lm(formula = log(wage) ~ edu)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.66260 -0.19303 -0.03559 0.16538 0.95223
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.062102 0.062738 144.4 <2e-16 ***
## edu 0.095963 0.004548 21.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2853 on 472 degrees of freedom
## Multiple R-squared: 0.4854, Adjusted R-squared: 0.4844
## F-statistic: 445.3 on 1 and 472 DF, p-value: < 2.2e-16

## (Intercept) edu
## 9.06210165 0.09596304
## [,1]
## [1,] 160.2177
## [2,] 172.7014
## Analysis of Variance Table
##
## Response: log(wage)
## Df Sum Sq Mean Sq F value Pr(>F)
## edu 1 36.251 36.251 445.3 < 2.2e-16 ***
## Residuals 472 38.424 0.081
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Loading required package: timeDate
## Loading required package: timeSeries
##
## Attaching package: 'fBasics'
## The following object is masked from 'package:car':
##
## densityPlot
## resid.lm_we
## nobs 474.000000
## NAs 0.000000
## Minimum -0.662598
## Maximum 0.952229
## 1. Quartile -0.193032
## 3. Quartile 0.165382
## Mean 0.000000
## Median -0.035591
## Sum 0.000000
## SE Mean 0.013091
## LCL Mean -0.025724
## UCL Mean 0.025724
## Variance 0.081235
## Stdev 0.285017
## Skewness 0.516700
## Kurtosis 0.242913
##
## Title:
## Jarque - Bera Normalality Test
##
## Test Results:
## STATISTIC:
## X-squared: 22.5262
## P VALUE:
## Asymptotic p Value: 1.284e-05
##
## Description:
## Sun May 17 09:14:03 2020 by user: Lenovo
##
## Title:
## Jarque - Bera Normalality Test
##
## Test Results:
## STATISTIC:
## X-squared: 22.5262
## P VALUE:
## Asymptotic p Value: 1.284e-05
##
## Description:
## Sun May 17 09:14:03 2020 by user: Lenovo
##
## Shapiro-Wilk normality test
##
## data: resid(lm_we)
## W = 0.98184, p-value = 1.199e-05
## [1] 0.2429132
## attr(,"method")
## [1] "excess"
## [1] 3.242913
## attr(,"method")
## [1] "moment"
## [1] 18 343
## [1] 18 343


## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 8.791185, Df = 1, p = 0.0030269
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following object is masked from 'package:timeSeries':
##
## time<-
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## studentized Breusch-Pagan test
##
## data: lm_we
## BP = 7.7914, df = 1, p-value = 0.00525
##
## Breusch-Pagan test
##
## data: lm_we
## BP = 8.7912, df = 1, p-value = 0.003027
## lag Autocorrelation D-W Statistic p-value
## 1 0.1149518 1.764697 0.006
## Alternative hypothesis: rho != 0
## Potentially influential observations of
## lm(formula = log(wage) ~ edu) :
##
## dfb.1_ dfb.edu dffit cov.r cook.d hat
## 4 0.06 -0.05 0.06 1.01_* 0.00 0.01
## 18 -0.10 0.14 0.21_* 0.96_* 0.02 0.00
## 27 -0.03 0.04 0.04 1.01_* 0.00 0.01
## 29 -0.25 0.29 0.33_* 0.97_* 0.05 0.01
## 32 -0.20 0.23 0.26_* 0.99_* 0.03 0.01
## 40 0.03 -0.05 -0.12 0.99_* 0.01 0.00
## 65 0.06 -0.05 0.06 1.01_* 0.00 0.01
## 90 -0.05 0.04 -0.05 1.01_* 0.00 0.01
## 103 -0.16 0.19 0.21_* 1.00 0.02 0.01
## 121 0.03 -0.06 -0.12 0.98_* 0.01 0.00
## 130 0.00 0.00 0.00 1.02_* 0.00 0.01_*
## 137 0.00 0.00 0.00 1.02_* 0.00 0.02_*
## 139 0.03 -0.02 0.03 1.01_* 0.00 0.01
## 144 -0.04 0.03 -0.04 1.01_* 0.00 0.01
## 173 -0.05 0.06 0.07 1.02_* 0.00 0.01_*
## 209 0.02 -0.02 0.02 1.01_* 0.00 0.01
## 218 -0.04 0.07 0.14 0.97_* 0.01 0.00
## 232 -0.01 0.01 0.01 1.01_* 0.00 0.01
## 241 -0.02 0.02 -0.02 1.01_* 0.00 0.01
## 253 -0.03 0.03 -0.03 1.01_* 0.00 0.01
## 256 0.01 -0.01 -0.01 1.01_* 0.00 0.01
## 257 -0.04 0.05 0.05 1.01_* 0.00 0.01
## 258 0.05 -0.04 0.05 1.01_* 0.00 0.01
## 274 -0.08 0.10 0.16 0.98_* 0.01 0.00
## 278 0.04 -0.04 0.04 1.01_* 0.00 0.01
## 281 0.21 -0.19 0.22_* 0.99 0.02 0.01
## 325 -0.03 0.03 -0.04 1.01_* 0.00 0.01
## 338 -0.05 0.05 -0.05 1.01_* 0.00 0.01
## 340 0.05 -0.05 0.06 1.01_* 0.00 0.01
## 341 0.09 -0.07 0.14 0.98_* 0.01 0.00
## 343 -0.10 0.14 0.21_* 0.96_* 0.02 0.00
## 352 0.04 -0.04 0.05 1.01_* 0.00 0.01
## 357 -0.02 0.02 -0.02 1.01_* 0.00 0.01
## 362 -0.03 0.03 -0.03 1.01_* 0.00 0.01
## 365 0.05 -0.04 0.05 1.01_* 0.00 0.01
## 379 0.02 -0.02 0.02 1.01_* 0.00 0.01
## 408 -0.04 0.04 0.05 1.01_* 0.00 0.01
## 443 0.05 -0.05 0.05 1.01_* 0.00 0.01
## 446 -0.10 0.13 0.20_* 0.96_* 0.02 0.00
## 450 -0.01 0.01 0.01 1.01_* 0.00 0.01
## 458 -0.04 0.05 0.05 1.01_* 0.00 0.01
## 461 0.05 -0.05 0.05 1.01_* 0.00 0.01
## 464 0.03 -0.04 -0.04 1.01_* 0.00 0.01
##
## Call:
## lm(formula = log(wage) ~ edu + I(edu^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.68571 -0.15861 -0.02415 0.16519 0.96679
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.132436 0.189092 58.873 < 2e-16 ***
## edu -0.230009 0.028742 -8.002 9.58e-15 ***
## I(edu^2) 0.012229 0.001068 11.454 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2526 on 471 degrees of freedom
## Multiple R-squared: 0.5975, Adjusted R-squared: 0.5958
## F-statistic: 349.7 on 2 and 471 DF, p-value: < 2.2e-16
##
## Correlation of Coefficients:
## (Intercept) edu
## edu -0.99
## I(edu^2) 0.96 -0.99

##
## Call:
## lm(formula = log(wage) ~ poly(edu, 2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.68571 -0.15861 -0.02415 0.16519 0.96679
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.3568 0.0116 892.65 <2e-16 ***
## poly(edu, 2)1 6.0208 0.2526 23.84 <2e-16 ***
## poly(edu, 2)2 2.8933 0.2526 11.45 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2526 on 471 degrees of freedom
## Multiple R-squared: 0.5975, Adjusted R-squared: 0.5958
## F-statistic: 349.7 on 2 and 471 DF, p-value: < 2.2e-16
##
## Correlation of Coefficients:
## (Intercept) poly(edu, 2)1
## poly(edu, 2)1 0.00
## poly(edu, 2)2 0.00 0.00

##
## Title:
## Jarque - Bera Normalality Test
##
## Test Results:
## STATISTIC:
## X-squared: 41.315
## P VALUE:
## Asymptotic p Value: 1.068e-09
##
## Description:
## Sun May 17 09:14:04 2020 by user: Lenovo
## resid.polm_we
## nobs 474.000000
## NAs 0.000000
## Minimum -0.685714
## Maximum 0.966785
## 1. Quartile -0.158605
## 3. Quartile 0.165189
## Mean 0.000000
## Median -0.024152
## Sum 0.000000
## SE Mean 0.011578
## LCL Mean -0.022750
## UCL Mean 0.022750
## Variance 0.063537
## Stdev 0.252065
## Skewness 0.489820
## Kurtosis 1.044062
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 28.29595, Df = 1, p = 1.0411e-07
##
## Breusch-Pagan test
##
## data: polm_we
## BP = 28.573, df = 2, p-value = 6.244e-07
## lag Autocorrelation D-W Statistic p-value
## 1 0.105941 1.778422 0.01
## Alternative hypothesis: rho != 0
##
## Call:
## lm(formula = log(wage) ~ edu + log(wage0))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.45035 -0.11750 -0.01215 0.11453 0.90229
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.646916 0.274598 5.998 3.99e-09 ***
## edu 0.023122 0.003894 5.938 5.59e-09 ***
## log(wage0) 0.868505 0.031835 27.282 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1778 on 471 degrees of freedom
## Multiple R-squared: 0.8006, Adjusted R-squared: 0.7997
## F-statistic: 945.4 on 2 and 471 DF, p-value: < 2.2e-16
## Analysis of Variance Table
##
## Model 1: log(wage) ~ edu
## Model 2: log(wage) ~ edu + log(wage0)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 472 38.424
## 2 471 14.892 1 23.532 744.29 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 13.41802, Df = 1, p = 0.00024922
##
## Breusch-Pagan test
##
## data: lm2_wew
## BP = 16.069, df = 2, p-value = 0.000324
## [1] 536.4418
##
## Call:
## lm(formula = log(wage) ~ edu + log(wage0) + gender)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.45101 -0.11130 -0.01224 0.10796 0.88370
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.932281 0.307832 6.277 7.85e-10 ***
## edu 0.023378 0.003883 6.021 3.50e-09 ***
## log(wage0) 0.836406 0.035468 23.582 < 2e-16 ***
## genderMale 0.039600 0.019551 2.025 0.0434 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1772 on 470 degrees of freedom
## Multiple R-squared: 0.8023, Adjusted R-squared: 0.801
## F-statistic: 635.8 on 3 and 470 DF, p-value: < 2.2e-16
## Wald test
##
## Model 1: log(wage) ~ edu + log(wage0) + gender
## Model 2: log(wage) ~ edu + gender
## Res.Df Df F Pr(>F)
## 1 470
## 2 471 -1 556.12 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Call:
## lm(formula = log(wage) ~ edu + log(wage0) + gender + gender *
## edu)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.45056 -0.11127 -0.00943 0.10693 0.88486
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.885367 0.332427 5.672 2.48e-08 ***
## edu 0.024805 0.005433 4.565 6.38e-06 ***
## log(wage0) 0.839500 0.036441 23.037 < 2e-16 ***
## genderMale 0.071119 0.086097 0.826 0.409
## edu:genderMale -0.002471 0.006573 -0.376 0.707
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1774 on 469 degrees of freedom
## Multiple R-squared: 0.8024, Adjusted R-squared: 0.8007
## F-statistic: 476 on 4 and 469 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = log(wage) ~ edu + log(wage0) + (gender + edu)^2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.45056 -0.11127 -0.00943 0.10693 0.88486
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.885367 0.332427 5.672 2.48e-08 ***
## edu 0.024805 0.005433 4.565 6.38e-06 ***
## log(wage0) 0.839500 0.036441 23.037 < 2e-16 ***
## genderMale 0.071119 0.086097 0.826 0.409
## edu:genderMale -0.002471 0.006573 -0.376 0.707
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1774 on 469 degrees of freedom
## Multiple R-squared: 0.8024, Adjusted R-squared: 0.8007
## F-statistic: 476 on 4 and 469 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = log(wage) ~ edu + log(wage0) + (edu + gender + minority)^2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.45697 -0.11790 -0.00293 0.10435 0.86635
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.173917 0.339770 6.398 3.83e-10 ***
## edu 0.029143 0.005551 5.250 2.32e-07 ***
## log(wage0) 0.804249 0.037496 21.449 < 2e-16 ***
## genderMale 0.044760 0.090223 0.496 0.62006
## minorityYes 0.222798 0.102574 2.172 0.03035 *
## edu:genderMale -0.000342 0.006709 -0.051 0.95937
## edu:minorityYes -0.021623 0.007903 -2.736 0.00645 **
## genderMale:minorityYes 0.022999 0.041332 0.556 0.57817
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1757 on 466 degrees of freedom
## Multiple R-squared: 0.8074, Adjusted R-squared: 0.8045
## F-statistic: 279 on 7 and 466 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = log(wage) ~ edu + log(wage0) + (edu + gender + minority)^3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.45714 -0.11783 -0.00318 0.10515 0.86618
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.1838785 0.3430826 6.365 4.67e-10 ***
## edu 0.0287972 0.0057725 4.989 8.60e-07 ***
## log(wage0) 0.8036479 0.0376322 21.355 < 2e-16 ***
## genderMale 0.0363215 0.0980328 0.371 0.711
## minorityYes 0.1850335 0.1991397 0.929 0.353
## edu:genderMale 0.0003006 0.0073162 0.041 0.967
## edu:minorityYes -0.0186024 0.0157751 -1.179 0.239
## genderMale:minorityYes 0.0740376 0.2342868 0.316 0.752
## edu:genderMale:minorityYes -0.0040124 0.0181287 -0.221 0.825
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1759 on 465 degrees of freedom
## Multiple R-squared: 0.8074, Adjusted R-squared: 0.8041
## F-statistic: 243.6 on 8 and 465 DF, p-value: < 2.2e-16