#package library
library(corrgram)
## Warning: 程辑包'corrgram'是用R版本4.1.3 来建造的
library(PerformanceAnalytics)
## Warning: 程辑包'PerformanceAnalytics'是用R版本4.1.3 来建造的
## 载入需要的程辑包:xts
## Warning: 程辑包'xts'是用R版本4.1.3 来建造的
## 载入需要的程辑包:zoo
## Warning: 程辑包'zoo'是用R版本4.1.3 来建造的
## 
## 载入程辑包:'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## 载入程辑包:'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(car)
## Warning: 程辑包'car'是用R版本4.1.3 来建造的
## 载入需要的程辑包:carData
## Warning: 程辑包'carData'是用R版本4.1.3 来建造的

#导入数据

#导入数据
data <- read.delim("C:/Users/1/Desktop/Xing's Paper/data.txt", encoding="UTF-8", header=FALSE)
data <- data[-1,-5]
colnames(data) <- c("year","inc_tot","tour_dom","tour_ove","inc_urban","inc_rural","popul","highway","railway","prod_regi","inve_fix","highway_lev","hot_star")
#数据类型变换
for(i in 2:13){
  data[,i] <- as.numeric(data[,i])
}
data1 <- data
colnames(data1) <- c("年份","旅游总收入","国内游客人数","海外游客人数","城镇居民人均可支配收入","农村居民人均可支配收入","人口数量","公路里程","铁路里程","地区生产总值","固定资产投资","等级公路","星级宾馆数")
data4 <- data1[,-11]
data4 <- data4[,-11]
data4 <- data4[,-1]

由于本文所选取的变量较多,而且变量之间的单位不统一,量纲不同,无法直接进行数据分析,因此需要进行标准化处理,转化成无量纲数据。数据标准化处理公式见下式(1): \[Z X_{i}=\frac{\left(X_{i}-\bar{X}\right)}{\sqrt{\operatorname{Var}\left(X_{i}\right)}}\] 式中,\(ZX_{i}\)是变量\(X_{i}\)标准化后的形式,\(\bar{X}\)代表变量\(X_{i}\)的数据均值。\(\operatorname{Var}\)代表变量\(X_{i}\)的方差。本文将所有变量的数据按照公式(1)进行标准化处理,消除量纲影响,同时变量的数 据经过标准化处理后符合正态分布规律,可以进行后面的回归分析。

for(i in 1:10){
  data4[,i] <- (data4[,i]-mean(data4[,i]))/sd(data4[,i])
} 
head(data4)
##   旅游总收入 国内游客人数 海外游客人数 城镇居民人均可支配收入
## 2 -0.8129251   -0.9302084   -0.7358325             -1.1529057
## 3 -0.7915752   -0.9169570   -0.6932889             -1.1013615
## 4 -0.7792470   -0.8886678   -0.6414966             -1.0850332
## 5 -0.7625761   -0.8786077   -0.9881516             -1.0193899
## 6 -0.7357085   -0.8597711   -0.6522587             -0.9328592
## 7 -0.7072951   -0.8329567   -0.5123523             -0.8497043
##   农村居民人均可支配收入   人口数量  公路里程   铁路里程 地区生产总值
## 2             -1.0956789 -1.4823721 -1.665858 -1.4682613   -1.1455024
## 3             -1.0685705 -1.3603901 -1.584103 -0.9323995   -1.1225131
## 4             -1.0421572 -1.2425584 -1.324892 -0.9323995   -1.0963074
## 5             -1.0064760 -1.1187719 -1.318844 -0.7180548   -1.0546222
## 6             -0.9293212 -0.9928200 -1.281247 -0.9323995   -0.9741171
## 7             -0.8574954 -0.8686726 -1.276622 -0.8252272   -0.8958904
##   星级宾馆数
## 2 -2.0684169
## 3 -1.5754308
## 4 -1.1419429
## 5 -0.8189520
## 6 -0.3259658
## 7 -0.5724589

#查看数据

stargazer(data4,type = "text")
## 
## ===========================================
## Statistic   N   Mean  St. Dev.  Min    Max 
## -------------------------------------------
## 旅游总收入       20 0.000   1.000   -0.813 2.519
## 国内游客人数      20 0.000   1.000   -0.930 2.156
## 海外游客人数      20 0.000   1.000   -0.997 2.808
## 城镇居民人均可支配收入 20 0.000   1.000   -1.153 1.931
## 农村居民人均可支配收入 20 0.000   1.000   -1.096 1.954
## 人口数量        20 -0.000  1.000   -1.482 1.277
## 公路里程        20 -0.000  1.000   -1.666 0.892
## 铁路里程        20 0.000   1.000   -1.468 2.068
## 地区生产总值      20 -0.000  1.000   -1.146 1.834
## 星级宾馆数       20 -0.000  1.000   -2.068 1.119
## -------------------------------------------

“年份”,“旅游总收入”,“国内游客人数”,“海外游客人数”,“城镇居民人均可支配收入”,“农村居民人均可支配收入”,“人口数量”,“公路里程”,“铁路里程”,“地区生产总值”,“固定资产投资”,“等级公路”,“星级宾馆数”

#分别做散点图

data5 <- data4
colnames(data5) <- c("inc_tot","tour_dom","tour_ove","inc_urban","inc_rural","popul","highway","railway","prod_regi","hot_star")
# head(data5)

plot(data5[,1],data5[,2],xlab = "X2",ylab = "Y")

plot(data5[,1],data5[,3],xlab = "X3",ylab = "Y")

plot(data5[,1],data5[,4],xlab = "X4",ylab = "Y")

plot(data5[,1],data5[,5],xlab = "X5",ylab = "Y")

plot(data5[,1],data5[,6],xlab = "X6",ylab = "Y")

plot(data5[,1],data5[,7],xlab = "X7",ylab = "Y")

plot(data5[,1],data5[,8],xlab = "X8",ylab = "Y")

plot(data5[,1],data5[,9],xlab = "X9",ylab = "Y")

plot(data5[,1],data5[,10],xlab = "X10",ylab = "Y")

#多元线性回归及统计检验

myline1 <- lm(inc_tot~tour_dom+tour_ove+inc_urban+inc_rural+popul+highway+railway+prod_regi+hot_star,data5)
stargazer(myline1,type = "text")
## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                               inc_tot          
## -----------------------------------------------
## tour_dom                     1.576***          
##                               (0.289)          
##                                                
## tour_ove                     0.211***          
##                               (0.031)          
##                                                
## inc_urban                     -1.288           
##                               (0.969)          
##                                                
## inc_rural                      0.280           
##                               (1.385)          
##                                                
## popul                        -0.293**          
##                               (0.107)          
##                                                
## highway                       -0.162*          
##                               (0.074)          
##                                                
## railway                        0.047           
##                               (0.055)          
##                                                
## prod_regi                      0.579           
##                               (0.556)          
##                                                
## hot_star                      0.167**          
##                               (0.062)          
##                                                
## Constant                      -0.000           
##                               (0.011)          
##                                                
## -----------------------------------------------
## Observations                    20             
## R2                             0.999           
## Adjusted R2                    0.998           
## Residual Std. Error       0.048 (df = 10)      
## F Statistic           907.251*** (df = 9; 10)  
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01
# star(myline1)

#多重共线性检验 ##Pearson相关系数矩阵:

cor <- cor(data5)
stargazer(cor,type = "text")
## 
## ================================================================================================
##           inc_tot tour_dom tour_ove inc_urban inc_rural popul highway railway prod_regi hot_star
## ------------------------------------------------------------------------------------------------
## inc_tot      1     0.981    0.885     0.945     0.948   0.818  0.663   0.921    0.925    -0.091 
## tour_dom   0.981     1      0.807     0.987     0.990   0.900  0.755   0.964    0.978    0.024  
## tour_ove   0.885   0.807      1       0.755     0.754   0.627  0.542   0.726    0.725    -0.113 
## inc_urban  0.945   0.987    0.755       1       1.000   0.950  0.838   0.977    0.997    0.170  
## inc_rural  0.948   0.990    0.754     1.000       1     0.947  0.828   0.978    0.997    0.148  
## popul      0.818   0.900    0.627     0.950     0.947     1    0.939   0.936    0.967    0.402  
## highway    0.663   0.755    0.542     0.838     0.828   0.939    1     0.817    0.860    0.639  
## railway    0.921   0.964    0.726     0.977     0.978   0.936  0.817     1      0.976    0.162  
## prod_regi  0.925   0.978    0.725     0.997     0.997   0.967  0.860   0.976      1      0.206  
## hot_star  -0.091   0.024    -0.113    0.170     0.148   0.402  0.639   0.162    0.206      1    
## ------------------------------------------------------------------------------------------------

由上图可知,有较多解释变量两两之间的相关系数较大,甚至超过了0.95,因此怀疑存在多重共线性。 PS. ##VIF(方差膨胀因子):

vif(myline1)
##     tour_dom     tour_ove    inc_urban    inc_rural        popul      highway 
##   680.619611     8.082585  7677.587603 15680.167189    93.929534    44.578795 
##      railway    prod_regi     hot_star 
##    24.466363  2531.735828    31.722930

做逐步回归,以克服多重共线性问题

myline2 <- step(myline1)
## Start:  AIC=-115.15
## inc_tot ~ tour_dom + tour_ove + inc_urban + inc_rural + popul + 
##     highway + railway + prod_regi + hot_star
## 
##             Df Sum of Sq      RSS      AIC
## - inc_rural  1  0.000095 0.023336 -117.070
## - railway    1  0.001723 0.024964 -115.721
## <none>                   0.023241 -115.152
## - prod_regi  1  0.002519 0.025760 -115.093
## - inc_urban  1  0.004108 0.027348 -113.897
## - highway    1  0.011143 0.034384 -109.318
## - hot_star   1  0.016694 0.039935 -106.325
## - popul      1  0.017408 0.040648 -105.971
## - tour_dom   1  0.069372 0.092613  -89.501
## - tour_ove   1  0.104745 0.127986  -83.031
## 
## Step:  AIC=-117.07
## inc_tot ~ tour_dom + tour_ove + inc_urban + popul + highway + 
##     railway + prod_regi + hot_star
## 
##             Df Sum of Sq      RSS      AIC
## - railway    1  0.001870 0.025205 -117.529
## <none>                   0.023336 -117.070
## - prod_regi  1  0.007069 0.030404 -113.778
## - highway    1  0.011130 0.034466 -111.270
## - inc_urban  1  0.014439 0.037775 -109.437
## - popul      1  0.017325 0.040660 -107.965
## - hot_star   1  0.021184 0.044520 -106.151
## - tour_dom   1  0.086551 0.109887  -88.081
## - tour_ove   1  0.113205 0.136540  -83.737
## 
## Step:  AIC=-117.53
## inc_tot ~ tour_dom + tour_ove + inc_urban + popul + highway + 
##     prod_regi + hot_star
## 
##             Df Sum of Sq      RSS      AIC
## <none>                   0.025205 -117.529
## - prod_regi  1  0.006406 0.031612 -114.999
## - highway    1  0.011690 0.036895 -111.908
## - inc_urban  1  0.012972 0.038177 -111.225
## - popul      1  0.015653 0.040858 -109.868
## - hot_star   1  0.020227 0.045432 -107.745
## - tour_dom   1  0.085440 0.110645  -89.943
## - tour_ove   1  0.112755 0.137960  -85.530
myline2
## 
## Call:
## lm(formula = inc_tot ~ tour_dom + tour_ove + inc_urban + popul + 
##     highway + prod_regi + hot_star, data = data5)
## 
## Coefficients:
## (Intercept)     tour_dom     tour_ove    inc_urban        popul      highway  
##  -6.760e-17    1.588e+00    2.087e-01   -1.036e+00   -2.708e-01   -1.653e-01  
##   prod_regi     hot_star  
##   6.265e-01    1.561e-01

逐步回归后的方程: \[旅游总收入=国内游客人数+海外游客人数+城镇居民人均可支配收入+人口总数+公路里程+地区生产总值+星级宾馆数\]

vif(myline2)
##    tour_dom    tour_ove   inc_urban       popul     highway   prod_regi 
##  560.852304    7.341054 1572.012719   88.997894   44.434095 1164.113020 
##    hot_star 
##   22.880793

#异方差检验 ##BP检验

#step后的模型
myline3 <- lm(inc_tot ~ tour_dom + tour_ove + inc_urban + popul + highway + prod_regi + hot_star,data5)
#BP检验
# bptest(myline3)
e2 <- (myline3$residuals)^2
myline4 <- lm(e2~tour_dom + tour_ove + inc_urban + popul + highway + prod_regi + hot_star,data5)
stargazer(myline4,type="text")
## 
## ===============================================
##                         Dependent variable:    
##                     ---------------------------
##                                 e2             
## -----------------------------------------------
## tour_dom                       0.002           
##                               (0.007)          
##                                                
## tour_ove                     -0.002**          
##                               (0.001)          
##                                                
## inc_urban                      0.008           
##                               (0.011)          
##                                                
## popul                         -0.001           
##                               (0.003)          
##                                                
## highway                        0.001           
##                               (0.002)          
##                                                
## prod_regi                     -0.009           
##                               (0.009)          
##                                                
## hot_star                      -0.0001          
##                               (0.001)          
##                                                
## Constant                     0.001***          
##                              (0.0003)          
##                                                
## -----------------------------------------------
## Observations                    20             
## R2                             0.549           
## Adjusted R2                    0.286           
## Residual Std. Error       0.001 (df = 12)      
## F Statistic             2.086 (df = 7; 12)     
## ===============================================
## Note:               *p<0.1; **p<0.05; ***p<0.01
ys1 <- c("tour_dom=0","tour_ove=0","inc_urban=0","popul =0","highway=0","prod_regi=0","hot_star=0")
myline5 <- linearHypothesis(myline4,ys1)
myline5
## Linear hypothesis test
## 
## Hypothesis:
## tour_dom = 0
## tour_ove = 0
## inc_urban = 0
## popul = 0
## highway = 0
## prod_regi = 0
## hot_star = 0
## 
## Model 1: restricted model
## Model 2: e2 ~ tour_dom + tour_ove + inc_urban + popul + highway + prod_regi + 
##     hot_star
## 
##   Res.Df        RSS Df  Sum of Sq      F Pr(>F)
## 1     19 3.9115e-05                            
## 2     12 1.7643e-05  7 2.1472e-05 2.0863 0.1257