setwd("C:/Users/ASUS/Desktop/快手")

1.读取并处理数据

data = read_csv("All_Random_PK_TWFE.csv")
# 修改列名:使用sub()函数移除列名前面的"data."前缀
#new_column_names = sub("^author_live_downsample\\.", "", names(data))
#names(data) = new_column_names

1.1 对于每个author按照p_date的顺序对于live_id进行排序(处理为panel结构)

data = data %>%
  group_by(author_id) %>%
  mutate(live_id_order = row_number(live_id)) %>% ungroup()
max(data$live_id_order)
## [1] 2699

1.2 处理categorical variables

处理类别变量函数

assign_numbers <- function(data, column) {
  unique_categories <- unique(data[[column]])
  category_numbers <- match(data[[column]], unique_categories)
  data[[paste0(column, "_number")]] <- category_numbers - 1 # 将编号减1,使得"M"对应1,"F"对应0
  return(data)
}
  1. 性别:gender
data <- assign_numbers(data, "gender")
table(data$gender_number)
## 
##      0      1      2 
## 239427 253848     22
table(data$gender)
## 
##      F      M      U 
## 239427 253848     22
  1. 年龄:age_range
data <- assign_numbers(data, "age_range")
table(data$age_range_number)
## 
##      0      1      2      3      4      5      6      7 
##  91376  63118  95561  72786 160369   8659   1405     23
table(data$age_range)
## 
##    0-12   12-17   18-23   24-30   31-40   41-49     50+ UNKNOWN 
##    1405    8659   63118   91376  160369   95561   72786      23
  1. 地区:fre_country_region
data <- assign_numbers(data, "fre_country_region")
table(data$fre_country_region_number)
## 
##      0      1      2 
## 383826 108811    660
table(data$fre_country_region)
## 
## UNKNOWN    北方    南方 
##     660  383826  108811
  1. 城市:fre_city_level
data <- assign_numbers(data, "fre_city_level")
table(data$fre_city_level_number)
## 
##      0      1      2      3      4      5      6 
## 106830 143928 101048  75843  49724   1178  14746
table(data$fre_city_level)
## 
##    UNKNOWN   二线城市   三线城市   四线城市   五线城市 新一线城市   一线城市 
##       1178      75843     106830     101048     143928      49724      14746
  1. 作者类型:author_type
data <- assign_numbers(data, "author_type")
table(data$author_type_number)
## 
##      0      1      2 
## 477119   5709  10469
table(data$author_type)
## 
## 电商主播 秀场主播 游戏主播 
##    10469   477119     5709
  1. 作者细分类型:author_category_type
data <- assign_numbers(data, "author_category_type")
table(data$author_category_type_number)
## 
##      0      1      2      3      4 
## 222237  90025 131470  18939  30626
table(data$author_category_type)
## 
##        A        B        C        D 职业电商 
##    90025    30626   222237   131470    18939
  1. 主播收入能力:author_income_range
data <- assign_numbers(data, "author_income_range")
table(data$author_income_range_number)
## 
##      0      1      2      3      4      5      6      7 
## 485425   3147   1715     64   1303    573   1044     26
table(data$author_income_range)
## 
##      (0,10]     (10,50] (1000,5000]    (50,500]  (500,1000]     (5000+) 
##        1044         573        1303        3147        1715          26 
##      无营收 
##          64
  1. 粉丝范围:fans_range
data <- assign_numbers(data, "fans_range")
table(data$fans_range_number)
## 
##      0      1      2      3      4 
## 232556 164761  44031  41583  10366
table(data$fans_range)
## 
##    0-100   100-1k 10w-100w    1k-1w   1w-10w 
##    41583   232556    10366   164761    44031

2.查看panel data特征

  1. 查看数据treatment
library(fect)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(panelView)
## ## See bit.ly/panelview4r for more info.
## ## Report bugs -> yiqingxu@stanford.edu.
library(patchwork)


panelview(total_cost_amt ~ is_pk_live, data = data, index = c("author_id", "live_id_order"), 
  axis.lab = "time", xlab = "live_id_order", ylab = "author_id", 
  background = "white", main = "Data: Treatment Status")
## If the number of units is more than 300, we set "gridOff = TRUE".
## If the number of units is more than 500, we randomly select 500 units to present.
##         You can set "display.all = TRUE" to show all units.

# 删掉一些live_id_order太长的author
result <- data %>%
  group_by(author_id) %>%
  filter(n() < 200) %>%
  select(author_id) %>%
  distinct()

data <- data %>%
  semi_join(result, by = "author_id")
  1. 查看数据outcome
panelview(total_cost_amt ~ is_pk_live, data = data, index = c("author_id", "live_id_order"), 
  axis.lab = "time", xlab = "live_id_order", ylab = "author", 
  theme.bw = TRUE, type = "outcome", main = "Data: Outcome")
## If the number of units is more than 500, we randomly select 500 units to present.
##         You can set "display.all = TRUE" to show all units.
## Treatment has reversals.

查看下live_id_order的分布

# 加载所需的库
library(ggplot2)

# 绘制live_id分布的直方图
ggplot(data, aes(x = live_id_order)) +
  geom_histogram(binwidth =1, fill = "steelblue", color = "black") +
  labs(title = "Live ID Distribution",
       x = "Live ID",
       y = "Frequency") +
  theme_minimal()

3.Panel data regression - TWFE:固定两个: author_id fixed, live_sequence fixed

3.1 直播间DV

library(plm) 
# author_id fixed, live_sequence fixed
# 指定author_id和live_id_order作为索引
pdata <- pdata.frame(data, index = c("author_id", "live_id_order"))

(1) Y:直播间打赏-total_cost_amt

# 加载所需的库
library(ggplot2)

# 绘制 total_cost_amt 分布的直方图
ggplot(data, aes(x = log(total_cost_amt + 1))) +
  geom_histogram(binwidth = 1, fill = "steelblue", color = "black") +
  labs(title = "Distribution of Log-Transformed Total Cost Amount",
       x = "Log(total_cost_amt + 1)",
       y = "Frequency") +
  theme_minimal()

model <- plm(total_cost_amt ~ is_pk_live, data = pdata, model = "within", effect = "twoways")
summary(model)
## Twoways effects Within Model
## 
## Call:
## plm(formula = total_cost_amt ~ is_pk_live, data = pdata, effect = "twoways", 
##     model = "within")
## 
## Unbalanced Panel: n = 7634, T = 1-199, N = 306745
## 
## Residuals:
##        Min.     1st Qu.      Median     3rd Qu.        Max. 
## -41713.3557    -10.4416      1.8944     12.5543 638168.7174 
## 
## Coefficients:
##            Estimate Std. Error t-value  Pr(>|t|)    
## is_pk_live  337.495     58.088  5.8101 6.249e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    7.6652e+11
## Residual Sum of Squares: 7.6644e+11
## R-Squared:      0.00011292
## Adj. R-Squared: -0.026086
## F-statistic: 33.7574 on 1 and 298912 DF, p-value: 6.2495e-09
model <- plm(log(total_cost_amt + 1) ~ is_pk_live, data = pdata, model = "within", effect = "twoways")
summary(model)
## Twoways effects Within Model
## 
## Call:
## plm(formula = log(total_cost_amt + 1) ~ is_pk_live, data = pdata, 
##     effect = "twoways", model = "within")
## 
## Unbalanced Panel: n = 7634, T = 1-199, N = 306745
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -7.9593992 -0.3236615 -0.0806947  0.0031297  9.7837949 
## 
## Coefficients:
##            Estimate Std. Error t-value  Pr(>|t|)    
## is_pk_live 1.780572   0.039246  45.369 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    352280
## Residual Sum of Squares: 349870
## R-Squared:      0.0068391
## Adj. R-Squared: -0.019183
## F-statistic: 2058.38 on 1 and 298912 DF, p-value: < 2.22e-16

(2) Y:直播间人均观看时长-avg_valid_play_duration

model <- plm(avg_valid_play_duration ~ is_pk_live, data = pdata, model = "within", effect = "twoways")
summary(model)
## Twoways effects Within Model
## 
## Call:
## plm(formula = avg_valid_play_duration ~ is_pk_live, data = pdata, 
##     effect = "twoways", model = "within")
## 
## Unbalanced Panel: n = 7634, T = 1-199, N = 306745
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -2626515.4   -31169.3    -8516.7    12972.9 10600508.0 
## 
## Coefficients:
##            Estimate Std. Error t-value  Pr(>|t|)    
## is_pk_live  28211.8     4931.4  5.7209 1.061e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    5.5245e+15
## Residual Sum of Squares: 5.5239e+15
## R-Squared:      0.00010948
## Adj. R-Squared: -0.026089
## F-statistic: 32.7287 on 1 and 298912 DF, p-value: 1.0606e-08
model <- plm(log(avg_valid_play_duration+1) ~ is_pk_live, data = pdata, model = "within", effect = "twoways")
summary(model)
## Twoways effects Within Model
## 
## Call:
## plm(formula = log(avg_valid_play_duration + 1) ~ is_pk_live, 
##     data = pdata, effect = "twoways", model = "within")
## 
## Unbalanced Panel: n = 7634, T = 1-199, N = 306745
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -14.32210  -0.40197   0.40838   1.31298  12.42230 
## 
## Coefficients:
##            Estimate Std. Error t-value  Pr(>|t|)    
## is_pk_live  0.93327    0.10144  9.2006 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    2337900
## Residual Sum of Squares: 2337200
## R-Squared:      0.00028312
## Adj. R-Squared: -0.025911
## F-statistic: 84.6505 on 1 and 298912 DF, p-value: < 2.22e-16

(3) Y:直播间人均互动

人均评论:avg_comment_cnt

model <- plm(avg_comment_cnt ~ is_pk_live, data = pdata, model = "within", effect = "twoways")
summary(model)
## Twoways effects Within Model
## 
## Call:
## plm(formula = avg_comment_cnt ~ is_pk_live, data = pdata, effect = "twoways", 
##     model = "within")
## 
## Unbalanced Panel: n = 7634, T = 1-199, N = 306745
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -122.43907   -1.27492   -0.39860    0.44053  264.52314 
## 
## Coefficients:
##            Estimate Std. Error t-value  Pr(>|t|)    
## is_pk_live  1.81013    0.12965  13.961 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    3820900
## Residual Sum of Squares: 3818400
## R-Squared:      0.00065165
## Adj. R-Squared: -0.025533
## F-statistic: 194.914 on 1 and 298912 DF, p-value: < 2.22e-16
model <- plm(log(avg_comment_cnt+1) ~ is_pk_live, data = pdata, model = "within", effect = "twoways")
summary(model)
## Twoways effects Within Model
## 
## Call:
## plm(formula = log(avg_comment_cnt + 1) ~ is_pk_live, data = pdata, 
##     effect = "twoways", model = "within")
## 
## Unbalanced Panel: n = 7634, T = 1-199, N = 306745
## 
## Residuals:
##      Min.   1st Qu.    Median   3rd Qu.      Max. 
## -3.537526 -0.411790 -0.076239  0.352216  5.082455 
## 
## Coefficients:
##            Estimate Std. Error t-value  Pr(>|t|)    
## is_pk_live  0.44793    0.02412  18.571 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    132300
## Residual Sum of Squares: 132150
## R-Squared:      0.0011524
## Adj. R-Squared: -0.025019
## F-statistic: 344.877 on 1 and 298912 DF, p-value: < 2.22e-16

人均点赞;avg_like_cnt

model <- plm(avg_like_cnt ~ is_pk_live, data = pdata, model = "within", effect = "twoways")
summary(model)
## Twoways effects Within Model
## 
## Call:
## plm(formula = avg_like_cnt ~ is_pk_live, data = pdata, effect = "twoways", 
##     model = "within")
## 
## Unbalanced Panel: n = 7634, T = 1-199, N = 306745
## 
## Residuals:
##        Min.     1st Qu.      Median     3rd Qu.        Max. 
## -2136.76022   -13.22080    -4.77567     0.92468 14991.80503 
## 
## Coefficients:
##            Estimate Std. Error t-value Pr(>|t|)  
## is_pk_live   8.0647     4.7326  1.7041  0.08837 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    5087500000
## Residual Sum of Squares: 5087500000
## R-Squared:      9.7147e-06
## Adj. R-Squared: -0.026192
## F-statistic: 2.90388 on 1 and 298912 DF, p-value: 0.088368
model <- plm(log(avg_like_cnt+1) ~ is_pk_live, data = pdata, model = "within", effect = "twoways")
summary(model)
## Twoways effects Within Model
## 
## Call:
## plm(formula = log(avg_like_cnt + 1) ~ is_pk_live, data = pdata, 
##     effect = "twoways", model = "within")
## 
## Unbalanced Panel: n = 7634, T = 1-199, N = 306745
## 
## Residuals:
##     Min.  1st Qu.   Median  3rd Qu.     Max. 
## -6.54652 -0.79951 -0.19769  0.66619  7.77202 
## 
## Coefficients:
##            Estimate Std. Error t-value  Pr(>|t|)    
## is_pk_live 0.521257   0.048394  10.771 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    532190
## Residual Sum of Squares: 531980
## R-Squared:      0.00038798
## Adj. R-Squared: -0.025804
## F-statistic: 116.015 on 1 and 298912 DF, p-value: < 2.22e-16

人均转发:avg_share_success_cnt

model <- plm(avg_share_success_cnt ~ is_pk_live, data = pdata, model = "within", effect = "twoways")
summary(model)
## Twoways effects Within Model
## 
## Call:
## plm(formula = avg_share_success_cnt ~ is_pk_live, data = pdata, 
##     effect = "twoways", model = "within")
## 
## Unbalanced Panel: n = 7634, T = 1-199, N = 306745
## 
## Residuals:
##        Min.     1st Qu.      Median     3rd Qu.        Max. 
## -5.8348e+00 -1.2106e-01 -3.1415e-02  2.0624e-04  1.4128e+02 
## 
## Coefficients:
##            Estimate Std. Error t-value  Pr(>|t|)    
## is_pk_live 0.106588   0.021989  4.8473 1.252e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    109840
## Residual Sum of Squares: 109830
## R-Squared:      7.8599e-05
## Adj. R-Squared: -0.026121
## F-statistic: 23.4961 on 1 and 298912 DF, p-value: 1.2523e-06
model <- plm(log(avg_share_success_cnt+1) ~ is_pk_live, data = pdata, model = "within", effect = "twoways")
summary(model)
## Twoways effects Within Model
## 
## Call:
## plm(formula = log(avg_share_success_cnt + 1) ~ is_pk_live, data = pdata, 
##     effect = "twoways", model = "within")
## 
## Unbalanced Panel: n = 7634, T = 1-199, N = 306745
## 
## Residuals:
##        Min.     1st Qu.      Median     3rd Qu.        Max. 
## -1.61549509 -0.07401480 -0.02034752  0.00001718  4.69359931 
## 
## Coefficients:
##             Estimate Std. Error t-value  Pr(>|t|)    
## is_pk_live 0.0579358  0.0091823  6.3095 2.803e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    19154
## Residual Sum of Squares: 19152
## R-Squared:      0.00013317
## Adj. R-Squared: -0.026065
## F-statistic: 39.8101 on 1 and 298912 DF, p-value: 2.8029e-10

(4) Y:关注主播数:follow_author_cnt

model <- plm(follow_author_cnt ~ is_pk_live, data = pdata, model = "within", effect = "twoways")
summary(model)
## Twoways effects Within Model
## 
## Call:
## plm(formula = follow_author_cnt ~ is_pk_live, data = pdata, effect = "twoways", 
##     model = "within")
## 
## Unbalanced Panel: n = 7634, T = 1-199, N = 306745
## 
## Residuals:
##        Min.     1st Qu.      Median     3rd Qu.        Max. 
## -3246.44757    -1.95968     0.52166     2.06853 36026.20314 
## 
## Coefficients:
##            Estimate Std. Error t-value Pr(>|t|)
## is_pk_live   1.5690     5.9805  0.2624    0.793
## 
## Total Sum of Squares:    8124200000
## Residual Sum of Squares: 8124200000
## R-Squared:      2.3028e-07
## Adj. R-Squared: -0.026201
## F-statistic: 0.0688321 on 1 and 298912 DF, p-value: 0.79305
model <- plm(log(follow_author_cnt+1) ~ is_pk_live, data = pdata, model = "within", effect = "twoways")
summary(model)
## Twoways effects Within Model
## 
## Call:
## plm(formula = log(follow_author_cnt + 1) ~ is_pk_live, data = pdata, 
##     effect = "twoways", model = "within")
## 
## Unbalanced Panel: n = 7634, T = 1-199, N = 306745
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -5.8849478 -0.1938182 -0.0605977  0.0099967  6.9698957 
## 
## Coefficients:
##            Estimate Std. Error t-value  Pr(>|t|)    
## is_pk_live 0.274081   0.021051   13.02 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    100720
## Residual Sum of Squares: 100660
## R-Squared:      0.00056678
## Adj. R-Squared: -0.02562
## F-statistic: 169.512 on 1 and 298912 DF, p-value: < 2.22e-16

(5) Y: 取消关注主播数:cancel_follow_author_cnt

model <- plm(cancel_follow_author_cnt ~ is_pk_live, data = pdata, model = "within", effect = "twoways")
summary(model)
## Twoways effects Within Model
## 
## Call:
## plm(formula = cancel_follow_author_cnt ~ is_pk_live, data = pdata, 
##     effect = "twoways", model = "within")
## 
## Unbalanced Panel: n = 7634, T = 1-199, N = 306745
## 
## Residuals:
##        Min.     1st Qu.      Median     3rd Qu.        Max. 
## -276.875884   -0.276687    0.010095    0.192248 5908.953669 
## 
## Coefficients:
##            Estimate Std. Error t-value Pr(>|t|)
## is_pk_live  0.88998    0.67272  1.3229   0.1859
## 
## Total Sum of Squares:    102800000
## Residual Sum of Squares: 102800000
## R-Squared:      5.8552e-06
## Adj. R-Squared: -0.026196
## F-statistic: 1.75019 on 1 and 298912 DF, p-value: 0.18585
model <- plm(log(cancel_follow_author_cnt+1) ~ is_pk_live, data = pdata, model = "within", effect = "twoways")
summary(model)
## Twoways effects Within Model
## 
## Call:
## plm(formula = log(cancel_follow_author_cnt + 1) ~ is_pk_live, 
##     data = pdata, effect = "twoways", model = "within")
## 
## Unbalanced Panel: n = 7634, T = 1-199, N = 306745
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -3.7793821 -0.0869240 -0.0271058 -0.0012831  5.9109399 
## 
## Coefficients:
##            Estimate Std. Error t-value  Pr(>|t|)    
## is_pk_live 0.193082   0.012639  15.276 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    36315
## Residual Sum of Squares: 36287
## R-Squared:      0.00078012
## Adj. R-Squared: -0.025401
## F-statistic: 233.369 on 1 and 298912 DF, p-value: < 2.22e-16

(6) Y;关注观众数:follow_user_cnt

model <- plm(follow_user_cnt ~ is_pk_live, data = pdata, model = "within", effect = "twoways")
summary(model)
## Twoways effects Within Model
## 
## Call:
## plm(formula = follow_user_cnt ~ is_pk_live, data = pdata, effect = "twoways", 
##     model = "within")
## 
## Unbalanced Panel: n = 7634, T = 1-199, N = 306745
## 
## Residuals:
##        Min.     1st Qu.      Median     3rd Qu.        Max. 
## -2.6302e+03 -2.4911e-01 -7.4481e-02  5.4009e-02  1.5932e+03 
## 
## Coefficients:
##            Estimate Std. Error t-value Pr(>|t|)
## is_pk_live  0.48623    0.35644  1.3641   0.1725
## 
## Total Sum of Squares:    28859000
## Residual Sum of Squares: 28859000
## R-Squared:      6.2255e-06
## Adj. R-Squared: -0.026195
## F-statistic: 1.86088 on 1 and 298912 DF, p-value: 0.17252
model <- plm(log(follow_user_cnt+1) ~ is_pk_live, data = pdata, model = "within", effect = "twoways")
summary(model)
## Twoways effects Within Model
## 
## Call:
## plm(formula = log(follow_user_cnt + 1) ~ is_pk_live, data = pdata, 
##     effect = "twoways", model = "within")
## 
## Unbalanced Panel: n = 7634, T = 1-199, N = 306745
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -3.7544762 -0.0931899 -0.0278857 -0.0015099  5.4796746 
## 
## Coefficients:
##            Estimate Std. Error t-value  Pr(>|t|)    
## is_pk_live 0.124366   0.013327  9.3322 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    40353
## Residual Sum of Squares: 40341
## R-Squared:      0.00029127
## Adj. R-Squared: -0.025903
## F-statistic: 87.0901 on 1 and 298912 DF, p-value: < 2.22e-16

(7) Y;取消关注关注数:cancel_follow_user_cnt

model <- plm(cancel_follow_user_cnt ~ is_pk_live, data = pdata, model = "within", effect = "twoways")
summary(model)
## Twoways effects Within Model
## 
## Call:
## plm(formula = cancel_follow_user_cnt ~ is_pk_live, data = pdata, 
##     effect = "twoways", model = "within")
## 
## Unbalanced Panel: n = 7634, T = 1-199, N = 306745
## 
## Residuals:
##        Min.     1st Qu.      Median     3rd Qu.        Max. 
## -33.2848128  -0.0716934  -0.0230475   0.0076503 257.3031156 
## 
## Coefficients:
##            Estimate Std. Error t-value  Pr(>|t|)    
## is_pk_live 0.144573   0.043185  3.3478 0.0008147 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    423630
## Residual Sum of Squares: 423610
## R-Squared:      3.7494e-05
## Adj. R-Squared: -0.026163
## F-statistic: 11.2077 on 1 and 298912 DF, p-value: 0.00081468
model <- plm(log(cancel_follow_user_cnt+1) ~ is_pk_live, data = pdata, model = "within", effect = "twoways")
summary(model)
## Twoways effects Within Model
## 
## Call:
## plm(formula = log(cancel_follow_user_cnt + 1) ~ is_pk_live, data = pdata, 
##     effect = "twoways", model = "within")
## 
## Unbalanced Panel: n = 7634, T = 1-199, N = 306745
## 
## Residuals:
##        Min.     1st Qu.      Median     3rd Qu.        Max. 
## -3.3747e+00 -4.0776e-02 -1.2482e-02  1.8624e-06  3.7374e+00 
## 
## Coefficients:
##             Estimate Std. Error t-value  Pr(>|t|)    
## is_pk_live 0.0553999  0.0080064  6.9195 4.542e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    14563
## Residual Sum of Squares: 14561
## R-Squared:      0.00016015
## Adj. R-Squared: -0.026037
## F-statistic: 47.8793 on 1 and 298912 DF, p-value: 4.5418e-12

(9) 加入粉丝团次数:join_fans_group_cnt

model <- plm(join_fans_group_cnt ~ is_pk_live, data = pdata, model = "within", effect = "twoways")
summary(model)
## Twoways effects Within Model
## 
## Call:
## plm(formula = join_fans_group_cnt ~ is_pk_live, data = pdata, 
##     effect = "twoways", model = "within")
## 
## Unbalanced Panel: n = 7634, T = 1-199, N = 306745
## 
## Residuals:
##        Min.     1st Qu.      Median     3rd Qu.        Max. 
## -1.6429e+02 -1.2639e-01  3.2932e-03  7.3497e-02  1.2138e+03 
## 
## Coefficients:
##            Estimate Std. Error t-value Pr(>|t|)
## is_pk_live  0.11064    0.24092  0.4592   0.6461
## 
## Total Sum of Squares:    13185000
## Residual Sum of Squares: 13185000
## R-Squared:      7.0554e-07
## Adj. R-Squared: -0.026201
## F-statistic: 0.210894 on 1 and 298912 DF, p-value: 0.64607
model <- plm(log(join_fans_group_cnt+1) ~ is_pk_live, data = pdata, model = "within", effect = "twoways")
summary(model)
## Twoways effects Within Model
## 
## Call:
## plm(formula = log(join_fans_group_cnt + 1) ~ is_pk_live, data = pdata, 
##     effect = "twoways", model = "within")
## 
## Unbalanced Panel: n = 7634, T = 1-199, N = 306745
## 
## Residuals:
##        Min.     1st Qu.      Median     3rd Qu.        Max. 
## -3.89391792 -0.03060061 -0.00756777  0.00070489  6.31682693 
## 
## Coefficients:
##             Estimate Std. Error t-value  Pr(>|t|)    
## is_pk_live 0.0441838  0.0088508  4.9921 5.977e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Total Sum of Squares:    17796
## Residual Sum of Squares: 17794
## R-Squared:      8.3364e-05
## Adj. R-Squared: -0.026116
## F-statistic: 24.9206 on 1 and 298912 DF, p-value: 5.9773e-07

(10) 第一次看这个主播的用户:live_new_user_num

model <- plm(live_new_user_num ~ is_pk_live, data = pdata, model = "within", effect = "twoways")
summary(model)
## Twoways effects Within Model
## 
## Call:
## plm(formula = live_new_user_num ~ is_pk_live, data = pdata, effect = "twoways", 
##     model = "within")
## 
## Unbalanced Panel: n = 7634, T = 1-199, N = 306745
## 
## Residuals:
##        Min.     1st Qu.      Median     3rd Qu.        Max. 
## -59.8315020  -0.0193860   0.0010871   0.0131851 733.9818686 
## 
## Coefficients:
##              Estimate Std. Error t-value Pr(>|t|)
## is_pk_live -0.0035381  0.0710173 -0.0498   0.9603
## 
## Total Sum of Squares:    1145600
## Residual Sum of Squares: 1145600
## R-Squared:      8.3035e-09
## Adj. R-Squared: -0.026202
## F-statistic: 0.00248202 on 1 and 298912 DF, p-value: 0.96027
model <- plm(log(live_new_user_num+1) ~ is_pk_live, data = pdata, model = "within", effect = "twoways")
summary(model)
## Twoways effects Within Model
## 
## Call:
## plm(formula = log(live_new_user_num + 1) ~ is_pk_live, data = pdata, 
##     effect = "twoways", model = "within")
## 
## Unbalanced Panel: n = 7634, T = 1-199, N = 306745
## 
## Residuals:
##        Min.     1st Qu.      Median     3rd Qu.        Max. 
## -2.45140365 -0.00620724 -0.00102865  0.00094096  4.45355428 
## 
## Coefficients:
##              Estimate Std. Error t-value Pr(>|t|)
## is_pk_live -0.0024353  0.0041857 -0.5818   0.5607
## 
## Total Sum of Squares:    3979.6
## Residual Sum of Squares: 3979.6
## R-Squared:      1.1325e-06
## Adj. R-Squared: -0.026201
## F-statistic: 0.338518 on 1 and 298912 DF, p-value: 0.56069

(11) 举报直播次数:report_live_cnt

model <- plm(report_live_cnt ~ is_pk_live, data = pdata, model = "within", effect = "twoways")
summary(model)
## Twoways effects Within Model
## 
## Call:
## plm(formula = report_live_cnt ~ is_pk_live, data = pdata, effect = "twoways", 
##     model = "within")
## 
## Unbalanced Panel: n = 7634, T = 1-199, N = 306745
## 
## Residuals:
##        Min.     1st Qu.      Median     3rd Qu.        Max. 
## -1.7448e+02 -3.3174e-02  3.2744e-03  4.8315e-02  1.3098e+03 
## 
## Coefficients:
##            Estimate Std. Error t-value Pr(>|t|)
## is_pk_live  0.17505    0.15601   1.122   0.2619
## 
## Total Sum of Squares:    5528900
## Residual Sum of Squares: 5528900
## R-Squared:      4.2115e-06
## Adj. R-Squared: -0.026197
## F-statistic: 1.25888 on 1 and 298912 DF, p-value: 0.26186
model <- plm(log(report_live_cnt+1) ~ is_pk_live, data = pdata, model = "within", effect = "twoways")
summary(model)
## Twoways effects Within Model
## 
## Call:
## plm(formula = log(report_live_cnt + 1) ~ is_pk_live, data = pdata, 
##     effect = "twoways", model = "within")
## 
## Unbalanced Panel: n = 7634, T = 1-199, N = 306745
## 
## Residuals:
##       Min.    1st Qu.     Median    3rd Qu.       Max. 
## -4.7272071 -0.0093079 -0.0012947  0.0018795  6.1613834 
## 
## Coefficients:
##             Estimate Std. Error t-value Pr(>|t|)
## is_pk_live 0.0079300  0.0068356  1.1601    0.246
## 
## Total Sum of Squares:    10614
## Residual Sum of Squares: 10614
## R-Squared:      4.5025e-06
## Adj. R-Squared: -0.026197
## F-statistic: 1.34585 on 1 and 298912 DF, p-value: 0.24601

```