setwd("C:/Users/ASUS/Desktop/快手")

数据:2022.01.01–2024.09.01 全部的random PK pair

1.prepare

data = read.csv("Within_Random_Pk_final_clean_1.csv")

自变量

### fan-size
data = data %>%mutate(fans_piar_cat = case_when(
  (before_fans_count>=10000) & (other_before_fans_count>=10000)~ 3,
  (before_fans_count>=10000) & (other_before_fans_count<10000)~ 2,
  (before_fans_count<10000) & (other_before_fans_count>= 10000)~ 1,
  (before_fans_count<10000) & (other_before_fans_count<10000)~ 0
))

data <- data %>% 
  mutate(fans_piar_cat = factor(fans_piar_cat, level = c(0,1,2,3), labels = c("Small vs Small ", "Small vs Big", "Big vs Small", "Big vs Big")))

### 合作
# (1) 按照直播类型来划分:同类:1
data <- data %>% 
  mutate(is_cooperative = case_when(
    (live_operation_tag == other_live_operation_tag) ~ 1,  
    TRUE ~ 0
  ))

table(data$fans_piar_cat)
## 
## Small vs Small     Small vs Big    Big vs Small      Big vs Big 
##          149898           10906           10701           15621
table(data$is_cooperative)
## 
##      0      1 
## 121925  65201

因变量

# table(data$total_cost_amt) # 119056 为0
# (1) 人均粉丝打赏
data$avg_fan_total_cost_amt= (data$total_cost_amt)/(data$before_fans_count + 1)

# (1.1) 人均(viewer)打赏= 总打赏/总的viewer人数
data$avg_viewer_total_cost_amt= (data$total_cost_amt)/(data$valid_play_user_num + 1)

# (1.2) 单位观看时长的打赏 = 总打赏/总观看时长
data$avg_time_total_cost_amt = (data$total_cost_amt)/(data$valid_play_duration + 1)

# (2) 涨粉
# data$follow_author_fans_count
# (3) 掉粉
# data$unfollow_author_fans_count
# (4) net 涨粉
data$net_follow_fans= data$follow_author_fans_count - data$unfollow_author_fans_count
summary(data$net_follow_fans)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -254.000    0.000    0.000    0.958    0.000 5064.000
# (5) 吸粉
# data$already_follow_other_fans_count
# (6) 粉丝被对方吸走
# data$other_already_follow_other_fans_count
# (7) net吸粉
data$net_attract_fans= data$already_follow_other_fans_count - data$other_already_follow_other_fans_count
summary(data$net_attract_fans)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -1.25e+02  0.00e+00  0.00e+00 -1.27e-03  0.00e+00  8.60e+01

(1) plot_predictions_with_ci

library(ggplot2)
library(dplyr)

# 创建函数
plot_predictions_with_ci <- function(model_1, data) {
  
  # 预测值和标准误差
  predictions <- predict(model_1, newdata = data, se.fit = TRUE)
  
  # 将预测值和标准误差添加到数据框中
  data$predicted_y <- predictions$fit 
  data$se_fit <- predictions$se.fit
  
  # 按照分组变量计算均值和95%的置信区间
  summary_data <- data %>%
    group_by(fans_piar_cat, is_cooperative) %>%
    summarise(
      mean_predicted_y = mean(predicted_y),
      ci_lower = mean(predicted_y) - 1.96 * mean(se_fit),
      ci_upper = mean(predicted_y) + 1.96 * mean(se_fit)
    )
  
  # 绘制图表
  plot <- ggplot(summary_data, aes(x = factor(fans_piar_cat), y = mean_predicted_y, color = factor(is_cooperative))) +
    geom_point(size = 3) +
    geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
    labs(
      title = "Mean Predicted y Values with Confidence Intervals",
      x = "Pair Categories",
      y = "Mean Predicted y",
      color = "Is Same Category"
    ) +
    theme_minimal()
  
  # 返回图表
  return(plot)
}

(2) plot_predictions_fans

library(ggplot2)
library(dplyr)

# 创建函数
plot_predictions_fans <- function(model_1, data) {
  
  # 预测值和标准误差
  predictions <- predict(model_1, newdata = data, se.fit = TRUE)
  
  # 将预测值和标准误差添加到数据框中
  data$predicted_y <- predictions$fit 
  data$se_fit <- predictions$se.fit
  
  # 按照分组变量计算均值和95%的置信区间
  summary_data <- data %>%
    group_by(fans_piar_cat) %>%
    summarise(
      mean_predicted_y = mean(predicted_y),
      ci_lower = mean(predicted_y) - 1.96 * mean(se_fit),
      ci_upper = mean(predicted_y) + 1.96 * mean(se_fit)
    )
  
  # 绘制图表
  plot <- ggplot(summary_data, aes(x = factor(fans_piar_cat), y = mean_predicted_y, )) +
    geom_point(size = 3) +
    geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
    labs(
      title = "Mean Predicted y Values with Confidence Intervals",
      x = "fans Pair",
      y = "Mean Predicted y",
      color = "Is Same Category"
    ) +
    theme_minimal()
  
  # 返回图表
  return(plot)
}

(3) plot_predictions_cate

library(ggplot2)
library(dplyr)

# 创建函数
plot_predictions_cate <- function(model_1, data) {
  
  # 预测值和标准误差
  predictions <- predict(model_1, newdata = data, se.fit = TRUE)
  
  # 将预测值和标准误差添加到数据框中
  data$predicted_y <- predictions$fit 
  data$se_fit <- predictions$se.fit
  
  # 按照分组变量计算均值和95%的置信区间
  summary_data <- data %>%
    group_by(is_cooperative) %>%
    summarise(
      mean_predicted_y = mean(predicted_y),
      ci_lower = mean(predicted_y) - 1.96 * mean(se_fit),
      ci_upper = mean(predicted_y) + 1.96 * mean(se_fit)
    )
  
  # 绘制图表
  plot <- ggplot(summary_data, aes(x = factor(is_cooperative), y = mean_predicted_y )) +
    geom_point(size = 3) +
    geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
    labs(
      title = "Mean Predicted y Values with Confidence Intervals",
      x = "Categories",
      y = "Mean Predicted y",
      color = "Is Same Category"
    ) +
    theme_minimal()
  
  # 返回图表
  return(plot)
}

(4) plot_continue_cutoff

library(dplyr)
library(ggplot2)
library(scales)  # for comma formatting
## 
## 载入程序包:'scales'
## The following object is masked from 'package:fixest':
## 
##     pvalue
library(tidyr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ lubridate 1.9.3     ✔ stringr   1.5.1
## ✔ purrr     1.0.2     ✔ tibble    3.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ plm::between()      masks dplyr::between()
## ✖ readr::col_factor() masks scales::col_factor()
## ✖ purrr::discard()    masks scales::discard()
## ✖ dplyr::filter()     masks stats::filter()
## ✖ plm::lag()          masks dplyr::lag(), stats::lag()
## ✖ plm::lead()         masks dplyr::lead()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# 定义函数,输入参数为模型公式
plot_continue_cutoff <- function(data, model_formula) {
  
  # Step 1: Create a sequence of 100 cut-offs from 0.1k to 1000k, log-stepped
  cutoffs <- exp(seq(log(100), log(1e6), length.out = 100))
  
  # Create a matrix to store coefficients of factor(fans_piar_cat)
  coefficients_matrix <- matrix(NA, nrow = 100, ncol = 3)
  
  # Create a matrix to store the counts of each category
  pct_matrix <- matrix(NA, nrow = 100, ncol = 4)  # For 4 categories
  
  # Step 2: Loop over each cut-off, create fans_piar_cat, run the regression, and store the coefficients
  for (i in seq_along(cutoffs)) {
    cut_off <- cutoffs[i]
    
    # Create the fans_piar_cat variable based on cut-off
    data <- data %>%
      mutate(fans_piar_cat_temp = case_when(
        (before_fans_count < cut_off) & (other_before_fans_count < cut_off) ~ "Small vs Small",
        (before_fans_count < cut_off) & (other_before_fans_count >= cut_off) ~ "Small vs Big",
        (before_fans_count >= cut_off) & (other_before_fans_count < cut_off) ~ "Big vs Small",
        (before_fans_count >= cut_off) & (other_before_fans_count >= cut_off) ~ "Big vs Big"
      )) %>%
      mutate(fans_piar_cat_temp = relevel(factor(fans_piar_cat_temp), ref = "Small vs Small"))
    
    # 1.计算percentage 
    pct_summary <- data %>%
      count(fans_piar_cat_temp) %>%
      mutate(pct = n / sum(n) * 100)
    
    pct_matrix[i, ] <- pct_summary %>%
      complete(fans_piar_cat_temp = c("Big vs Big", "Small vs Big", "Big vs Small", "Small vs Small"), fill = list(pct = 0)) %>%
      pull(pct)
    
    # 2.计算每个model的系数
    model <- feols(model_formula, data = data,vcov = ~author_id)
    
    coef_summary <- summary(model)$coefficients
    
    # print(coef_summary)
    # Store the coefficients for the three categories in the matrix
    coefficients_matrix[i, ] <- coef_summary[c("factor(fans_piar_cat_temp)Small vs Big", 
                                               "factor(fans_piar_cat_temp)Big vs Small", 
                                               "factor(fans_piar_cat_temp)Big vs Big")]  # "(Intercept)")]
  }
    #print(coefficients_matrix)
  
  ## 1.画出四组percentage图
  pct_df <- data.frame(
    Cutoff = cutoffs,
    Big_vs_Big = pct_matrix[, 1],
    Big_vs_Small = pct_matrix[, 2],
    Small_vs_Big = pct_matrix[, 3],
    Small_vs_Small = pct_matrix[, 4]
  )
  
  pct_df_long <- pct_df %>%
    pivot_longer(cols = c("Big_vs_Big", "Big_vs_Small","Small_vs_Big","Small_vs_Small"),
                 names_to = "Category", values_to = "Percentage")
  
  # print(pct_df_long)
  # 画图
  pct_plot <- ggplot(pct_df_long, aes(x = Cutoff, y = Percentage, color = Category)) +
    geom_line() +
    scale_x_log10() +  # Logarithmic scale for the cut-off
    labs(title = "Distribution of fans_piar_cat across Cutoffs",
         x = "Cutoff (log scale)", y = "Percentage",
         color = "Category") +
    theme_minimal()
  
  # print(pct_plot)
  
  ## 2.画出model coef图
  coef_df <- data.frame(
    Cutoff = cutoffs,
    Small_vs_Big = coefficients_matrix[, 1],
    Big_vs_Small = coefficients_matrix[, 2],
    Big_vs_Big = coefficients_matrix[, 3]
    #Small_vs_Small = coefficients_matrix[, 4]
  )
  
  coef_df_long <- coef_df %>%
    pivot_longer(cols = c("Small_vs_Big", "Big_vs_Small", "Big_vs_Big"),
                 names_to = "Category", values_to = "Coefficient")
  
  ## 画图
  coef_plot <- ggplot(coef_df_long, aes(x = Cutoff, y = Coefficient, color = Category)) +
    geom_line() +
    scale_x_log10() +  # Logarithmic scale for the cut-off
    labs(title = "Evolution of Coefficients for fans_piar_cat across Cutoffs",
         x = "Cutoff (log scale)", y = "Coefficient",
         color = "Category") +
    theme_minimal()
  
  print(coef_plot)
}

2.DV:人均粉丝打赏:log(avg_fan_total_cost_amt+1)

(1) regression

model <- feols(log(avg_fan_total_cost_amt+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative |p_date, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(avg_fan_total_cost_amt + 1)
## Observations: 187,126
## Fixed-effects: p_date: 975
## Standard-errors: Clustered (author_id) 
##                                                   Estimate Std. Error   t value
## factor(fans_piar_cat)Small vs Big                 0.028320   0.002953   9.58924
## factor(fans_piar_cat)Big vs Small                -0.024452   0.001229 -19.89522
## factor(fans_piar_cat)Big vs Big                  -0.025267   0.001082 -23.34982
## is_cooperative                                    0.009500   0.001349   7.04310
## factor(fans_piar_cat)Small vs Big:is_cooperative  0.005994   0.005015   1.19526
## factor(fans_piar_cat)Big vs Small:is_cooperative -0.011356   0.001836  -6.18592
## factor(fans_piar_cat)Big vs Big:is_cooperative   -0.007419   0.001793  -4.13852
##                                                    Pr(>|t|)    
## factor(fans_piar_cat)Small vs Big                 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small                 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big                   < 2.2e-16 ***
## is_cooperative                                   1.8878e-12 ***
## factor(fans_piar_cat)Small vs Big:is_cooperative 2.3199e-01    
## factor(fans_piar_cat)Big vs Small:is_cooperative 6.1895e-10 ***
## factor(fans_piar_cat)Big vs Big:is_cooperative   3.4974e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.219571     Adj. R2: 0.005954
##                  Within R2: 0.003705

(2) plot both

# 画图
model <- feols(log(avg_fan_total_cost_amt+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(avg_fan_total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                                                   Estimate Std. Error
## (Intercept)                                       0.038838   0.000731
## factor(fans_piar_cat)Small vs Big                 0.030463   0.002942
## factor(fans_piar_cat)Big vs Small                -0.021900   0.001162
## factor(fans_piar_cat)Big vs Big                  -0.021889   0.000972
## is_cooperative                                    0.010604   0.001350
## factor(fans_piar_cat)Small vs Big:is_cooperative  0.004824   0.005006
## factor(fans_piar_cat)Big vs Small:is_cooperative -0.012566   0.001778
## factor(fans_piar_cat)Big vs Big:is_cooperative   -0.008886   0.001735
##                                                     t value   Pr(>|t|)    
## (Intercept)                                       53.118905  < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big                 10.352865  < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small                -18.846991  < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big                  -22.517133  < 2.2e-16 ***
## is_cooperative                                     7.852427 4.1062e-15 ***
## factor(fans_piar_cat)Small vs Big:is_cooperative   0.963515 3.3529e-01    
## factor(fans_piar_cat)Big vs Small:is_cooperative  -7.067109 1.5885e-12 ***
## factor(fans_piar_cat)Big vs Big:is_cooperative    -5.120919 3.0441e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.220413   Adj. R2: 0.003525
plot_predictions_with_ci(model, data)
## `summarise()` has grouped output by 'fans_piar_cat'. You can override using the
## `.groups` argument.

### (3) plot fans

# 画图
model <- feols(log(avg_fan_total_cost_amt+1) ~ factor(fans_piar_cat), data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(avg_fan_total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                                    Estimate Std. Error  t value  Pr(>|t|)    
## (Intercept)                        0.042408   0.000624  67.9489 < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big  0.032760   0.002449  13.3781 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small -0.026209   0.000898 -29.1923 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big   -0.024738   0.000827 -29.9189 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.220466   Adj. R2: 0.003064
plot_predictions_fans(model, data)

(4) plot category

# 画图
model <- feols(log(avg_fan_total_cost_amt+1) ~ is_cooperative, data = data,vcov = ~author_id)

summary(model)
## OLS estimation, Dep. Var.: log(avg_fan_total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                Estimate Std. Error  t value   Pr(>|t|)    
## (Intercept)    0.037701   0.000625 60.27622  < 2.2e-16 ***
## is_cooperative 0.008762   0.001113  7.87408 3.4544e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.220767   Adj. R2: 3.521e-4
plot_predictions_cate(model, data)

(5) plot continue cutoff

# 使用例子
# Define the formula for the model
model_formula <- log(avg_fan_total_cost_amt + 1) ~ factor(fans_piar_cat_temp)

# Call the function with the data and formula
plot_continue_cutoff(data, model_formula)

(6) plot_fan_pair_heatmap

library(ggplot2)
library(dplyr)
library(RColorBrewer)

# Step 1: Set the ranges for the fan counts (fewer, broader bins)
data <- data %>% mutate(fans_new_range = case_when(
  before_fans_count > 0 & before_fans_count <= 1000 ~ "0-1k",
  before_fans_count > 1000 & before_fans_count <= 10000 ~ "1k-10k",
  before_fans_count > 10000 & before_fans_count <= 100000 ~ "10k-100k",
  before_fans_count > 100000 & before_fans_count <= 1000000 ~ "100k-1M",
  TRUE ~ ">1M"
))

data <- data %>% mutate(other_fans_new_range = case_when(
  other_before_fans_count > 0 & other_before_fans_count <= 1000 ~ "0-1k",
  other_before_fans_count > 1000 & other_before_fans_count <= 10000 ~ "1k-10k",
  #other_before_fans_count > 10000 & other_before_fans_count <= 100000 ~ "10k-100k",
  #other_before_fans_count > 100000 & other_before_fans_count <= 1000000 ~ "100k-1M",
  TRUE ~ ">10k"
))


# Step 2: Set the correct order for the categories
data <- data %>%
  mutate(fans_new_range = factor(fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")),
         other_fans_new_range = factor(other_fans_new_range, levels = c("0-1k", "1k-10k", ">10k")))  # "10k-100k","100k-1M"

# Step 3: Aggregate the data to calculate the mean and confidence intervals, handling 0% cases
agg_data <- data %>%
  group_by(fans_new_range, other_fans_new_range) %>%
  summarize(
    count = n(),  # 计算每个组合的数量
    Y = ifelse(count > 0, mean(log(avg_fan_total_cost_amt + 1), na.rm = TRUE), NA),  # 如果有数据,计算均值;否则设为NA
    ci_lower = ifelse(count > 0, Y - qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA),  # 如果有数据,计算CI;否则设为NA
    ci_upper = ifelse(count > 0, Y + qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA)   # 如果有数据,计算CI;否则设为NA
  ) %>%
  ungroup() %>%
  mutate(percentage = count / sum(count) * 100)
## `summarise()` has grouped output by 'fans_new_range'. You can override using
## the `.groups` argument.
# Step 4: Plot the data in a heatmap-like format with custom colors and percentage labels
ggplot(agg_data, aes(x = fans_new_range, y = other_fans_new_range, fill = Y)) +
  geom_tile() +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  geom_text(aes(label = paste0(round(percentage, 2), "%")), color = "black", size = 4) +
  scale_fill_gradientn(colors = c("yellow", "orange", "darkorange", "red", "darkred"), 
                     values = scales::rescale(c(0, 0.05, 0.1, 0.15, 0.2)), 
                     limits = c(0, 0.2), 
                     na.value = "white") + 
  labs(title = "Average Fan Total Cost Amount by Fan Ranges",
       x = "Fans Range",
       y = "Other Fans Range",
       fill = "Avg Fan Total Cost Amt") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

2.1. DV:人均viewer打赏:log(avg_viewer_total_cost_amt+1)

(1) regression

model <- feols(log(avg_viewer_total_cost_amt+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative |p_date, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(avg_viewer_total_cost_amt + 1)
## Observations: 187,126
## Fixed-effects: p_date: 975
## Standard-errors: Clustered (author_id) 
##                                                   Estimate Std. Error  t value
## factor(fans_piar_cat)Small vs Big                 0.193010   0.007996 24.13745
## factor(fans_piar_cat)Big vs Small                 0.169834   0.007816 21.72794
## factor(fans_piar_cat)Big vs Big                   0.207317   0.007011 29.57182
## is_cooperative                                    0.028324   0.002213 12.79926
## factor(fans_piar_cat)Small vs Big:is_cooperative  0.040228   0.013240  3.03835
## factor(fans_piar_cat)Big vs Small:is_cooperative -0.014137   0.011668 -1.21161
## factor(fans_piar_cat)Big vs Big:is_cooperative   -0.013210   0.010500 -1.25814
##                                                   Pr(>|t|)    
## factor(fans_piar_cat)Small vs Big                < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small                < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big                  < 2.2e-16 ***
## is_cooperative                                   < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.0023792 ** 
## factor(fans_piar_cat)Big vs Small:is_cooperative 0.2256648    
## factor(fans_piar_cat)Big vs Big:is_cooperative   0.2083438    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.438796     Adj. R2: 0.035929
##                  Within R2: 0.031064

(2) plot both

# 画图
model <- feols(log(avg_viewer_total_cost_amt+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(avg_viewer_total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                                                   Estimate Std. Error  t value
## (Intercept)                                       0.102590   0.001190 86.24594
## factor(fans_piar_cat)Small vs Big                 0.195631   0.008013 24.41559
## factor(fans_piar_cat)Big vs Small                 0.173282   0.007818 22.16328
## factor(fans_piar_cat)Big vs Big                   0.210926   0.006990 30.17703
## is_cooperative                                    0.028725   0.002204 13.03264
## factor(fans_piar_cat)Small vs Big:is_cooperative  0.040555   0.013262  3.05808
## factor(fans_piar_cat)Big vs Small:is_cooperative -0.015247   0.011672 -1.30638
## factor(fans_piar_cat)Big vs Big:is_cooperative   -0.014781   0.010492 -1.40879
##                                                   Pr(>|t|)    
## (Intercept)                                      < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big                < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small                < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big                  < 2.2e-16 ***
## is_cooperative                                   < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big:is_cooperative  0.002228 ** 
## factor(fans_piar_cat)Big vs Small:is_cooperative  0.191426    
## factor(fans_piar_cat)Big vs Big:is_cooperative    0.158900    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.44075   Adj. R2: 0.032387
plot_predictions_with_ci(model, data)
## `summarise()` has grouped output by 'fans_piar_cat'. You can override using the
## `.groups` argument.

### (3) plot fans

# 画图
model <- feols(log(avg_viewer_total_cost_amt+1) ~ factor(fans_piar_cat), data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(avg_viewer_total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                                   Estimate Std. Error  t value  Pr(>|t|)    
## (Intercept)                       0.112261   0.001032 108.7313 < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big 0.212310   0.006770  31.3603 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small 0.168688   0.006311  26.7279 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big   0.207106   0.005649  36.6600 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.440999   Adj. R2: 0.031313
plot_predictions_fans(model, data)

(4) plot category

# 画图
model <- feols(log(avg_viewer_total_cost_amt+1) ~ is_cooperative, data = data,vcov = ~author_id)

summary(model)
## OLS estimation, Dep. Var.: log(avg_viewer_total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                Estimate Std. Error  t value  Pr(>|t|)    
## (Intercept)    0.138597   0.001312 105.6472 < 2.2e-16 ***
## is_cooperative 0.037234   0.002269  16.4072 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.447722   Adj. R2: 0.001562
plot_predictions_cate(model, data)

(5) plot continue cutoff

# 使用例子
# Define the formula for the model
model_formula <- log(avg_viewer_total_cost_amt + 1) ~ factor(fans_piar_cat_temp)

# Call the function with the data and formula
plot_continue_cutoff(data, model_formula)

(6) plot_fan_pair_heatmap

library(ggplot2)
library(dplyr)
library(RColorBrewer)

# Step 1: Set the ranges for the fan counts (fewer, broader bins)
data <- data %>% mutate(fans_new_range = case_when(
  before_fans_count > 0 & before_fans_count <= 1000 ~ "0-1k",
  before_fans_count > 1000 & before_fans_count <= 10000 ~ "1k-10k",
  #before_fans_count > 10000 & before_fans_count <= 100000 ~ "10k-100k",
  #before_fans_count > 100000 & before_fans_count <= 1000000 ~ "100k-1M",
  TRUE ~ ">10k"
))

data <- data %>% mutate(other_fans_new_range = case_when(
  other_before_fans_count > 0 & other_before_fans_count <= 1000 ~ "0-1k",
  other_before_fans_count > 1000 & other_before_fans_count <= 10000 ~ "1k-10k",
  #other_before_fans_count > 10000 & other_before_fans_count <= 100000 ~ "10k-100k",
  #other_before_fans_count > 100000 & other_before_fans_count <= 1000000 ~ "100k-1M",
  TRUE ~ ">10k"
))


# Step 2: Set the correct order for the categories
data <- data %>%
  mutate(fans_new_range = factor(fans_new_range, levels = c("0-1k", "1k-10k", ">10k")),  "10k-100k", "100k-1M",
         other_fans_new_range = factor(other_fans_new_range, levels = c("0-1k", "1k-10k", ">10k")))  # "10k-100k","100k-1M"

# Step 3: Aggregate the data to calculate the mean and confidence intervals, handling 0% cases
agg_data <- data %>%
  group_by(fans_new_range, other_fans_new_range) %>%
  summarize(
    count = n(),  # 计算每个组合的数量
    Y = ifelse(count > 0, mean(log(avg_viewer_total_cost_amt + 1), na.rm = TRUE), NA),  # 如果有数据,计算均值;否则设为NA
    ci_lower = ifelse(count > 0, Y - qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA),  # 如果有数据,计算CI;否则设为NA
    ci_upper = ifelse(count > 0, Y + qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA)   # 如果有数据,计算CI;否则设为NA
  ) %>%
  ungroup() %>%
  mutate(percentage = count / sum(count) * 100)
## `summarise()` has grouped output by 'fans_new_range'. You can override using
## the `.groups` argument.
# Step 4: Plot the data in a heatmap-like format with custom colors and percentage labels
ggplot(agg_data, aes(x = fans_new_range, y = other_fans_new_range, fill = Y)) +
  geom_tile() +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  geom_text(aes(label = paste0(round(percentage, 2), "%")), color = "black", size = 4) +
  scale_fill_gradientn(colors = c("yellow", "orange", "darkorange", "red", "darkred"), 
                     values = scales::rescale(c(0, 0.05, 0.1, 0.15, 0.2)), 
                     limits = c(0, 0.2), 
                     na.value = "white") + 
  labs(title = "Average viewer Total Cost Amount by Fan Ranges",
       x = "Fans Range",
       y = "Other Fans Range",
       fill = "Avg Fan Total Cost Amt") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

2.2. DV:单位观看时长的打赏:log(avg_time_total_cost_amt+1)

(1) regression

model <- feols(log(avg_time_total_cost_amt+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative |p_date, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(avg_time_total_cost_amt + 1)
## Observations: 187,126
## Fixed-effects: p_date: 975
## Standard-errors: Clustered (author_id) 
##                                                  Estimate Std. Error   t value
## factor(fans_piar_cat)Small vs Big                -2.6e-05    2.9e-05 -0.907748
## factor(fans_piar_cat)Big vs Small                -2.4e-05    3.2e-05 -0.757443
## factor(fans_piar_cat)Big vs Big                  -3.5e-05    3.0e-05 -1.190522
## is_cooperative                                    5.5e-05    5.6e-05  0.994852
## factor(fans_piar_cat)Small vs Big:is_cooperative -6.3e-05    6.1e-05 -1.031365
## factor(fans_piar_cat)Big vs Small:is_cooperative -6.8e-05    6.3e-05 -1.091454
## factor(fans_piar_cat)Big vs Big:is_cooperative   -4.5e-05    5.7e-05 -0.787784
##                                                  Pr(>|t|) 
## factor(fans_piar_cat)Small vs Big                 0.36401 
## factor(fans_piar_cat)Big vs Small                 0.44879 
## factor(fans_piar_cat)Big vs Big                   0.23384 
## is_cooperative                                    0.31981 
## factor(fans_piar_cat)Small vs Big:is_cooperative  0.30237 
## factor(fans_piar_cat)Big vs Small:is_cooperative  0.27507 
## factor(fans_piar_cat)Big vs Big:is_cooperative    0.43082 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.008894     Adj. R2: 1.905e-4
##                  Within R2: 1.157e-5

(2) plot both

# 画图
model <- feols(log(avg_time_total_cost_amt+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(avg_time_total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                                                  Estimate Std. Error   t value
## (Intercept)                                       4.1e-05    2.5e-05  1.637817
## factor(fans_piar_cat)Small vs Big                -1.6e-05    2.5e-05 -0.627271
## factor(fans_piar_cat)Big vs Small                -1.8e-05    2.5e-05 -0.714191
## factor(fans_piar_cat)Big vs Big                  -1.9e-05    2.5e-05 -0.747102
## is_cooperative                                    7.0e-05    6.3e-05  1.102692
## factor(fans_piar_cat)Small vs Big:is_cooperative -6.7e-05    6.3e-05 -1.061532
## factor(fans_piar_cat)Big vs Small:is_cooperative -7.0e-05    6.3e-05 -1.104477
## factor(fans_piar_cat)Big vs Big:is_cooperative   -6.6e-05    6.3e-05 -1.043247
##                                                  Pr(>|t|) 
## (Intercept)                                       0.10146 
## factor(fans_piar_cat)Small vs Big                 0.53048 
## factor(fans_piar_cat)Big vs Small                 0.47511 
## factor(fans_piar_cat)Big vs Big                   0.45500 
## is_cooperative                                    0.27016 
## factor(fans_piar_cat)Small vs Big:is_cooperative  0.28845 
## factor(fans_piar_cat)Big vs Small:is_cooperative  0.26939 
## factor(fans_piar_cat)Big vs Big:is_cooperative    0.29684 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.008918   Adj. R2: -2.314e-5
plot_predictions_with_ci(model, data)
## `summarise()` has grouped output by 'fans_piar_cat'. You can override using the
## `.groups` argument.

### (3) plot fans

# 画图
model <- feols(log(avg_time_total_cost_amt+1) ~ factor(fans_piar_cat), data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(avg_time_total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                                   Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)                        6.5e-05    2.6e-05  2.51408 0.011935 *  
## factor(fans_piar_cat)Small vs Big -3.8e-05    2.6e-05 -1.48955 0.136345    
## factor(fans_piar_cat)Big vs Small -4.2e-05    2.6e-05 -1.61444 0.106435    
## factor(fans_piar_cat)Big vs Big   -4.1e-05    2.6e-05 -1.58335 0.113344    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.008918   Adj. R2: -1.277e-5
plot_predictions_fans(model, data)

(4) plot category

# 画图
model <- feols(log(avg_time_total_cost_amt+1) ~ is_cooperative, data = data,vcov = ~author_id)

summary(model)
## OLS estimation, Dep. Var.: log(avg_time_total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     3.8e-05      2e-05 1.84924 0.064425 .  
## is_cooperative  5.4e-05      5e-05 1.08877 0.276256    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.008918   Adj. R2: 2.955e-6
plot_predictions_cate(model, data)

(5) plot continue cutoff

# 使用例子
# Define the formula for the model
model_formula <- log(avg_time_total_cost_amt + 1) ~ factor(fans_piar_cat_temp)

# Call the function with the data and formula
plot_continue_cutoff(data, model_formula)

(6) plot_fan_pair_heatmap

library(ggplot2)
library(dplyr)
library(RColorBrewer)

# Step 1: Set the ranges for the fan counts (fewer, broader bins)
data <- data %>% mutate(fans_new_range = case_when(
  before_fans_count > 0 & before_fans_count <= 1000 ~ "0-1k",
  before_fans_count > 1000 & before_fans_count <= 10000 ~ "1k-10k",
  before_fans_count > 10000 & before_fans_count <= 100000 ~ "10k-100k",
  before_fans_count > 100000 & before_fans_count <= 1000000 ~ "100k-1M",
  TRUE ~ ">1M"
))

data <- data %>% mutate(other_fans_new_range = case_when(
  other_before_fans_count > 0 & other_before_fans_count <= 1000 ~ "0-1k",
  other_before_fans_count > 1000 & other_before_fans_count <= 10000 ~ "1k-10k",
  #other_before_fans_count > 10000 & other_before_fans_count <= 100000 ~ "10k-100k",
  #other_before_fans_count > 100000 & other_before_fans_count <= 1000000 ~ "100k-1M",
  TRUE ~ ">10k"
))


# Step 2: Set the correct order for the categories
data <- data %>%
  mutate(fans_new_range = factor(fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")),
         other_fans_new_range = factor(other_fans_new_range, levels = c("0-1k", "1k-10k", ">10k")))  # "10k-100k","100k-1M"

# Step 3: Aggregate the data to calculate the mean and confidence intervals, handling 0% cases
agg_data <- data %>%
  group_by(fans_new_range, other_fans_new_range) %>%
  summarize(
    count = n(),  # 计算每个组合的数量
    Y = ifelse(count > 0, mean(log(avg_time_total_cost_amt + 1), na.rm = TRUE), NA),  # 如果有数据,计算均值;否则设为NA
    ci_lower = ifelse(count > 0, Y - qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA),  # 如果有数据,计算CI;否则设为NA
    ci_upper = ifelse(count > 0, Y + qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA)   # 如果有数据,计算CI;否则设为NA
  ) %>%
  ungroup() %>%
  mutate(percentage = count / sum(count) * 100)
## `summarise()` has grouped output by 'fans_new_range'. You can override using
## the `.groups` argument.
# Step 4: Plot the data in a heatmap-like format with custom colors and percentage labels
ggplot(agg_data, aes(x = fans_new_range, y = other_fans_new_range, fill = Y)) +
  geom_tile() +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  geom_text(aes(label = paste0(round(percentage, 2), "%")), color = "black", size = 4) +
  scale_fill_gradientn(colors = c("yellow", "orange", "darkorange", "red", "darkred"), 
                     values = scales::rescale(c(0, 0.05, 0.1, 0.15, 0.2)), 
                     limits = c(0, 0.2), 
                     na.value = "white") + 
  labs(title = "Average Fan Total Cost Amount by Fan Ranges",
       x = "Fans Range",
       y = "Other Fans Range",
       fill = "Avg Fan Total Cost Amt") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

2.3.DV:总打赏:log(total_cost_amt+1)

(1) regression

model <- feols(log(total_cost_amt+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative |p_date, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(total_cost_amt + 1)
## Observations: 187,126
## Fixed-effects: p_date: 975
## Standard-errors: Clustered (author_id) 
##                                                   Estimate Std. Error   t value
## factor(fans_piar_cat)Small vs Big                 1.504307   0.036000 41.786864
## factor(fans_piar_cat)Big vs Small                 1.798410   0.037754 47.634457
## factor(fans_piar_cat)Big vs Big                   2.468558   0.035790 68.974328
## is_cooperative                                    0.114965   0.009531 12.062736
## factor(fans_piar_cat)Small vs Big:is_cooperative  0.325324   0.055448  5.867161
## factor(fans_piar_cat)Big vs Small:is_cooperative  0.020468   0.054788  0.373589
## factor(fans_piar_cat)Big vs Big:is_cooperative   -0.247271   0.048455 -5.103118
##                                                    Pr(>|t|)    
## factor(fans_piar_cat)Small vs Big                 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small                 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big                   < 2.2e-16 ***
## is_cooperative                                    < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big:is_cooperative 4.4421e-09 ***
## factor(fans_piar_cat)Big vs Small:is_cooperative 7.0871e-01    
## factor(fans_piar_cat)Big vs Big:is_cooperative   3.3449e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 1.93209     Adj. R2: 0.157868
##                 Within R2: 0.147539

(2) plot both

# 画图
model <- feols(log(total_cost_amt+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                                                   Estimate Std. Error
## (Intercept)                                       0.788169   0.005481
## factor(fans_piar_cat)Small vs Big                 1.532570   0.036028
## factor(fans_piar_cat)Big vs Small                 1.828888   0.037758
## factor(fans_piar_cat)Big vs Big                   2.507547   0.035668
## is_cooperative                                    0.125272   0.009482
## factor(fans_piar_cat)Small vs Big:is_cooperative  0.316799   0.055539
## factor(fans_piar_cat)Big vs Small:is_cooperative  0.007394   0.054806
## factor(fans_piar_cat)Big vs Big:is_cooperative   -0.264253   0.048443
##                                                     t value   Pr(>|t|)    
## (Intercept)                                      143.812046  < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big                 42.537712  < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small                 48.437078  < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big                   70.301835  < 2.2e-16 ***
## is_cooperative                                    13.211182  < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big:is_cooperative   5.704068 1.1719e-08 ***
## factor(fans_piar_cat)Big vs Small:is_cooperative   0.134918 8.9268e-01    
## factor(fans_piar_cat)Big vs Big:is_cooperative    -5.454943 4.9062e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 1.94299   Adj. R2: 0.15277
plot_predictions_with_ci(model, data)
## `summarise()` has grouped output by 'fans_piar_cat'. You can override using the
## `.groups` argument.

### (3) plot fans

# 画图
model <- feols(log(total_cost_amt+1) ~ factor(fans_piar_cat), data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                                   Estimate Std. Error  t value  Pr(>|t|)    
## (Intercept)                       0.830345   0.004741 175.1391 < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big 1.658532   0.030948  53.5910 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small 1.836686   0.032876  55.8674 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big   2.407051   0.029675  81.1137 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 1.94457   Adj. R2: 0.151415
plot_predictions_fans(model, data)

(4) plot category

# 画图
model <- feols(log(total_cost_amt+1) ~ is_cooperative, data = data,vcov = ~author_id)

summary(model)
## OLS estimation, Dep. Var.: log(total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                Estimate Std. Error  t value  Pr(>|t|)    
## (Intercept)    1.159620   0.006686 173.4396 < 2.2e-16 ***
## is_cooperative 0.210532   0.010738  19.6065 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 2.10857   Adj. R2: 0.002253
plot_predictions_cate(model, data)

(5) plot continue cutoff

# 使用例子
# Define the formula for the model
model_formula <- log(total_cost_amt + 1) ~ factor(fans_piar_cat_temp)

# Call the function with the data and formula
plot_continue_cutoff(data, model_formula)

(6) plot_fan_pair_heatmap

library(ggplot2)
library(dplyr)
library(RColorBrewer)

# Step 1: Set the ranges for the fan counts (fewer, broader bins)
data <- data %>% mutate(fans_new_range = case_when(
  before_fans_count > 0 & before_fans_count <= 1000 ~ "0-1k",
  before_fans_count > 1000 & before_fans_count <= 10000 ~ "1k-10k",
  before_fans_count > 10000 & before_fans_count <= 100000 ~ "10k-100k",
  before_fans_count > 100000 & before_fans_count <= 1000000 ~ "100k-1M",
  TRUE ~ ">1M"
))

data <- data %>% mutate(other_fans_new_range = case_when(
  other_before_fans_count > 0 & other_before_fans_count <= 1000 ~ "0-1k",
  other_before_fans_count > 1000 & other_before_fans_count <= 10000 ~ "1k-10k",
  other_before_fans_count > 10000 & other_before_fans_count <= 100000 ~ "10k-100k",
  other_before_fans_count > 100000 & other_before_fans_count <= 1000000 ~ "100k-1M",
  TRUE ~ ">1M"
))

# Step 2: Set the correct order for the categories
data <- data %>%
  mutate(fans_new_range = factor(fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")),
         other_fans_new_range = factor(other_fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")))

# Step 3: Aggregate the data to calculate the mean and confidence intervals, handling 0% cases
agg_data <- data %>%
  group_by(fans_new_range, other_fans_new_range) %>%
  summarize(
    count = n(),  # 计算每个组合的数量
    Y = ifelse(count > 0, mean(log(total_cost_amt + 1), na.rm = TRUE), NA),  # 如果有数据,计算均值;否则设为NA
    ci_lower = ifelse(count > 0, Y - qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA),  # 如果有数据,计算CI;否则设为NA
    ci_upper = ifelse(count > 0, Y + qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA)   # 如果有数据,计算CI;否则设为NA
  ) %>%
  ungroup() %>%
  mutate(percentage = count / sum(count) * 100)
## `summarise()` has grouped output by 'fans_new_range'. You can override using
## the `.groups` argument.
# Step 4: Plot the data in a heatmap-like format with custom colors and percentage labels
ggplot(agg_data, aes(x = fans_new_range, y = other_fans_new_range, fill = Y)) +
  geom_tile() +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  geom_text(aes(label = paste0(round(percentage, 2), "%")), color = "black", size = 4) +
  scale_fill_gradientn(colors = c("yellow", "orange", "darkorange", "red", "darkred"), 
                     values = scales::rescale(c(0, 0.05, 0.1, 0.15, 0.2)), 
                     #limits = c(0, 0.2), 
                     na.value = "white") + 
  labs(title = "Average Fan Total Cost Amount by Fan Ranges",
       x = "Fans Range",
       y = "Other Fans Range",
       fill = "Avg Fan Total Cost Amt") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

3.DV:涨粉:log(follow_author_fans_count+1)

data$log_Y= log(data$follow_author_fans_count+1)
summary(data$log_Y)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.2303  0.0000  8.5583

(1) regression

model <- feols(log(follow_author_fans_count+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative |p_date, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(follow_author_fans_count + 1)
## Observations: 187,126
## Fixed-effects: p_date: 975
## Standard-errors: Clustered (author_id) 
##                                                   Estimate Std. Error   t value
## factor(fans_piar_cat)Small vs Big                 0.216227   0.009699 22.292826
## factor(fans_piar_cat)Big vs Small                 0.386941   0.012234 31.628547
## factor(fans_piar_cat)Big vs Big                   0.725524   0.014979 48.434570
## is_cooperative                                   -0.013079   0.002396 -5.458509
## factor(fans_piar_cat)Small vs Big:is_cooperative  0.009948   0.014939  0.665924
## factor(fans_piar_cat)Big vs Small:is_cooperative -0.070114   0.017784 -3.942486
## factor(fans_piar_cat)Big vs Big:is_cooperative    0.015164   0.021565  0.703182
##                                                    Pr(>|t|)    
## factor(fans_piar_cat)Small vs Big                 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small                 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big                   < 2.2e-16 ***
## is_cooperative                                   4.8087e-08 ***
## factor(fans_piar_cat)Small vs Big:is_cooperative 5.0546e-01    
## factor(fans_piar_cat)Big vs Small:is_cooperative 8.0676e-05 ***
## factor(fans_piar_cat)Big vs Big:is_cooperative   4.8194e-01    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.604995     Adj. R2: 0.118903
##                  Within R2: 0.109577

(2) plot both

# 画图
model <- feols(log(follow_author_fans_count+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(follow_author_fans_count + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                                                   Estimate Std. Error   t value
## (Intercept)                                       0.136842   0.001451 94.332586
## factor(fans_piar_cat)Small vs Big                 0.223243   0.009678 23.067563
## factor(fans_piar_cat)Big vs Small                 0.394698   0.012330 32.011946
## factor(fans_piar_cat)Big vs Big                   0.738318   0.015075 48.976133
## is_cooperative                                   -0.007669   0.002347 -3.266780
## factor(fans_piar_cat)Small vs Big:is_cooperative  0.007409   0.014957  0.495348
## factor(fans_piar_cat)Big vs Small:is_cooperative -0.075341   0.017872 -4.215706
## factor(fans_piar_cat)Big vs Big:is_cooperative    0.012063   0.021690  0.556135
##                                                    Pr(>|t|)    
## (Intercept)                                       < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big                 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small                 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big                   < 2.2e-16 ***
## is_cooperative                                   1.0880e-03 ** 
## factor(fans_piar_cat)Small vs Big:is_cooperative 6.2035e-01    
## factor(fans_piar_cat)Big vs Small:is_cooperative 2.4914e-05 ***
## factor(fans_piar_cat)Big vs Big:is_cooperative   5.7812e-01    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.608317   Adj. R2: 0.113839
plot_predictions_with_ci(model, data)
## `summarise()` has grouped output by 'fans_piar_cat'. You can override using the
## `.groups` argument.

### (3) plot fans

# 画图
model <- feols(log(follow_author_fans_count+1) ~ factor(fans_piar_cat), data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(follow_author_fans_count + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                                   Estimate Std. Error  t value  Pr(>|t|)    
## (Intercept)                       0.134260   0.001165 115.2873 < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big 0.225726   0.007764  29.0752 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small 0.366010   0.009851  37.1533 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big   0.742744   0.012505  59.3940 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.608402   Adj. R2: 0.11361
plot_predictions_fans(model, data)

(4) plot category

# 画图
model <- feols(log(follow_author_fans_count+1) ~ is_cooperative, data = data,vcov = ~author_id)

summary(model)
## OLS estimation, Dep. Var.: log(follow_author_fans_count + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                Estimate Std. Error   t value   Pr(>|t|)    
## (Intercept)    0.225708   0.001925 117.25723  < 2.2e-16 ***
## is_cooperative 0.013324   0.003225   4.13158 3.6047e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.64619   Adj. R2: 9.116e-5
plot_predictions_cate(model, data)

(5) plot continue cutoff

# 使用例子
# Define the formula for the model
model_formula <- log(follow_author_fans_count+1) ~ factor(fans_piar_cat_temp)

# Call the function with the data and formula
plot_continue_cutoff(data, model_formula)

### (6) plot_fan_pair_heatmap

library(ggplot2)
library(dplyr)
library(RColorBrewer)

# Step 1: Set the ranges for the fan counts (fewer, broader bins)
data <- data %>% mutate(fans_new_range = case_when(
  before_fans_count > 0 & before_fans_count <= 1000 ~ "0-1k",
  before_fans_count > 1000 & before_fans_count <= 10000 ~ "1k-10k",
  before_fans_count > 10000 & before_fans_count <= 100000 ~ "10k-100k",
  before_fans_count > 100000 & before_fans_count <= 1000000 ~ "100k-1M",
  TRUE ~ ">1M"
))

data <- data %>% mutate(other_fans_new_range = case_when(
  other_before_fans_count > 0 & other_before_fans_count <= 1000 ~ "0-1k",
  other_before_fans_count > 1000 & other_before_fans_count <= 10000 ~ "1k-10k",
  other_before_fans_count > 10000 & other_before_fans_count <= 100000 ~ "10k-100k",
  other_before_fans_count > 100000 & other_before_fans_count <= 1000000 ~ "100k-1M",
  TRUE ~ ">1M"
))

# Step 2: Set the correct order for the categories
data <- data %>%
  mutate(fans_new_range = factor(fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")),
         other_fans_new_range = factor(other_fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")))

# Step 3: Aggregate the data to calculate the mean and confidence intervals, handling 0% cases
agg_data <- data %>%
  group_by(fans_new_range, other_fans_new_range) %>%
  summarize(
    count = n(),  # 计算每个组合的数量
    Y = ifelse(count > 0, mean(log(follow_author_fans_count + 1), na.rm = TRUE), NA),  # 如果有数据,计算均值;否则设为NA
    ci_lower = ifelse(count > 0, Y - qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA),  # 如果有数据,计算CI;否则设为NA
    ci_upper = ifelse(count > 0, Y + qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA)   # 如果有数据,计算CI;否则设为NA
  ) %>%
  ungroup() %>%
  mutate(percentage = count / sum(count) * 100)
## `summarise()` has grouped output by 'fans_new_range'. You can override using
## the `.groups` argument.
# Step 4: Plot the data in a heatmap-like format with custom colors and percentage labels
ggplot(agg_data, aes(x = fans_new_range, y = other_fans_new_range, fill = Y)) +
  geom_tile() +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  geom_text(aes(label = paste0(round(percentage, 2), "%")), color = "black", size = 4) +
  scale_fill_gradientn(colors = c("yellow", "orange", "darkorange", "red", "darkred"), 
                     values = scales::rescale(c(0, 0.05, 0.1, 0.15, 0.2)), 
                     #limits = c(0, 0.2), 
                     na.value = "white") + 
  labs(title = "Average Fan Total Cost Amount by Fan Ranges",
       x = "Fans Range",
       y = "Other Fans Range",
       fill = "follow_author_fans_count") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

4.DV:掉粉:log(unfollow_author_fans_count+1)

(1) regression

model <- feols(log(unfollow_author_fans_count+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative |p_date, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(unfollow_author_fans_count + 1)
## Observations: 187,126
## Fixed-effects: p_date: 975
## Standard-errors: Clustered (author_id) 
##                                                   Estimate Std. Error   t value
## factor(fans_piar_cat)Small vs Big                 0.140763   0.006249 22.526716
## factor(fans_piar_cat)Big vs Small                 0.389921   0.009670 40.322244
## factor(fans_piar_cat)Big vs Big                   0.769609   0.013417 57.359359
## is_cooperative                                   -0.007749   0.001553 -4.990046
## factor(fans_piar_cat)Small vs Big:is_cooperative -0.001895   0.009635 -0.196661
## factor(fans_piar_cat)Big vs Small:is_cooperative -0.068933   0.013926 -4.950058
## factor(fans_piar_cat)Big vs Big:is_cooperative   -0.025124   0.018789 -1.337198
##                                                    Pr(>|t|)    
## factor(fans_piar_cat)Small vs Big                 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small                 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big                   < 2.2e-16 ***
## is_cooperative                                   6.0429e-07 ***
## factor(fans_piar_cat)Small vs Big:is_cooperative 8.4409e-01    
## factor(fans_piar_cat)Big vs Small:is_cooperative 7.4268e-07 ***
## factor(fans_piar_cat)Big vs Big:is_cooperative   1.8116e-01    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.455734     Adj. R2: 0.193824
##                  Within R2: 0.185614

(2) plot both

# 画图
model <- feols(log(unfollow_author_fans_count+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(unfollow_author_fans_count + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                                                   Estimate Std. Error   t value
## (Intercept)                                       0.082809   0.000902 91.790781
## factor(fans_piar_cat)Small vs Big                 0.143810   0.006226 23.098161
## factor(fans_piar_cat)Big vs Small                 0.393307   0.009743 40.367304
## factor(fans_piar_cat)Big vs Big                   0.776209   0.013523 57.400702
## is_cooperative                                   -0.004170   0.001510 -2.761736
## factor(fans_piar_cat)Small vs Big:is_cooperative -0.003141   0.009627 -0.326306
## factor(fans_piar_cat)Big vs Small:is_cooperative -0.072186   0.013994 -5.158359
## factor(fans_piar_cat)Big vs Big:is_cooperative   -0.026794   0.018902 -1.417495
##                                                    Pr(>|t|)    
## (Intercept)                                       < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big                 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small                 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big                   < 2.2e-16 ***
## is_cooperative                                   5.7502e-03 ** 
## factor(fans_piar_cat)Small vs Big:is_cooperative 7.4419e-01    
## factor(fans_piar_cat)Big vs Small:is_cooperative 2.4942e-07 ***
## factor(fans_piar_cat)Big vs Big:is_cooperative   1.5634e-01    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.458147   Adj. R2: 0.189507
plot_predictions_with_ci(model, data)
## `summarise()` has grouped output by 'fans_piar_cat'. You can override using the
## `.groups` argument.

### (3) plot fans

# 画图
model <- feols(log(unfollow_author_fans_count+1) ~ factor(fans_piar_cat), data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(unfollow_author_fans_count + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                                   Estimate Std. Error  t value  Pr(>|t|)    
## (Intercept)                       0.081405   0.000732 111.2117 < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big 0.142433   0.004980  28.5995 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small 0.365949   0.007925  46.1736 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big   0.764620   0.011323  67.5272 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.458258   Adj. R2: 0.189132
plot_predictions_fans(model, data)

(4) plot category

# 画图
model <- feols(log(unfollow_author_fans_count+1) ~ is_cooperative, data = data,vcov = ~author_id)

summary(model)
## OLS estimation, Dep. Var.: log(unfollow_author_fans_count + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                Estimate Std. Error   t value   Pr(>|t|)    
## (Intercept)    0.170013   0.001504 113.04675  < 2.2e-16 ***
## is_cooperative 0.012771   0.002558   4.99273 5.9595e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.508871   Adj. R2: 1.376e-4
plot_predictions_cate(model, data)

(5) plot continue cutoff

# Define the formula for the model
model_formula <- log(unfollow_author_fans_count+1) ~ factor(fans_piar_cat_temp)

# Call the function with the data and formula
plot_continue_cutoff(data, model_formula)

### (6) plot_fan_pair_heatmap

library(ggplot2)
library(dplyr)
library(RColorBrewer)

# Step 1: Set the ranges for the fan counts (fewer, broader bins)
data <- data %>% mutate(fans_new_range = case_when(
  before_fans_count > 0 & before_fans_count <= 1000 ~ "0-1k",
  before_fans_count > 1000 & before_fans_count <= 10000 ~ "1k-10k",
  before_fans_count > 10000 & before_fans_count <= 100000 ~ "10k-100k",
  before_fans_count > 100000 & before_fans_count <= 1000000 ~ "100k-1M",
  TRUE ~ ">1M"
))

data <- data %>% mutate(other_fans_new_range = case_when(
  other_before_fans_count > 0 & other_before_fans_count <= 1000 ~ "0-1k",
  other_before_fans_count > 1000 & other_before_fans_count <= 10000 ~ "1k-10k",
  other_before_fans_count > 10000 & other_before_fans_count <= 100000 ~ "10k-100k",
  other_before_fans_count > 100000 & other_before_fans_count <= 1000000 ~ "100k-1M",
  TRUE ~ ">1M"
))

# Step 2: Set the correct order for the categories
data <- data %>%
  mutate(fans_new_range = factor(fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")),
         other_fans_new_range = factor(other_fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")))

# Step 3: Aggregate the data to calculate the mean and confidence intervals, handling 0% cases
agg_data <- data %>%
  group_by(fans_new_range, other_fans_new_range) %>%
  summarize(
    count = n(),  # 计算每个组合的数量
    Y = ifelse(count > 0, mean(log(unfollow_author_fans_count + 1), na.rm = TRUE), NA),  # 如果有数据,计算均值;否则设为NA
    ci_lower = ifelse(count > 0, Y - qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA),  # 如果有数据,计算CI;否则设为NA
    ci_upper = ifelse(count > 0, Y + qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA)   # 如果有数据,计算CI;否则设为NA
  ) %>%
  ungroup() %>%
  mutate(percentage = count / sum(count) * 100)
## `summarise()` has grouped output by 'fans_new_range'. You can override using
## the `.groups` argument.
# Step 4: Plot the data in a heatmap-like format with custom colors and percentage labels
ggplot(agg_data, aes(x = fans_new_range, y = other_fans_new_range, fill = Y)) +
  geom_tile() +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  geom_text(aes(label = paste0(round(percentage, 2), "%")), color = "black", size = 4) +
  scale_fill_gradientn(colors = c("yellow", "orange", "darkorange", "red", "darkred"), 
                     values = scales::rescale(c(0, 0.05, 0.1, 0.15, 0.2)), 
                     #limits = c(0, 0.2), 
                     na.value = "white") + 
  labs(title = "Average Fan Total Cost Amount by Fan Ranges",
       x = "Fans Range",
       y = "Other Fans Range",
       fill = "follow_author_fans_count") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

5.DV:net涨粉数:log(net_follow_fans+605)

summary(data$net_follow_fans)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -254.000    0.000    0.000    0.958    0.000 5064.000

(1) regression

model <- feols(log(net_follow_fans+255) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative |p_date, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(net_follow_fans + 255)
## Observations: 187,126
## Fixed-effects: p_date: 975
## Standard-errors: Clustered (author_id) 
##                                                   Estimate Std. Error   t value
## factor(fans_piar_cat)Small vs Big                 0.003578   0.000537  6.669335
## factor(fans_piar_cat)Big vs Small                 0.005307   0.000626  8.482533
## factor(fans_piar_cat)Big vs Big                   0.009477   0.001101  8.606747
## is_cooperative                                   -0.000371   0.000088 -4.197884
## factor(fans_piar_cat)Small vs Big:is_cooperative  0.000141   0.000813  0.172847
## factor(fans_piar_cat)Big vs Small:is_cooperative -0.001609   0.000883 -1.823052
## factor(fans_piar_cat)Big vs Big:is_cooperative    0.003085   0.002082  1.481933
##                                                    Pr(>|t|)    
## factor(fans_piar_cat)Small vs Big                2.5781e-11 ***
## factor(fans_piar_cat)Big vs Small                 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big                   < 2.2e-16 ***
## is_cooperative                                   2.6957e-05 ***
## factor(fans_piar_cat)Small vs Big:is_cooperative 8.6277e-01    
## factor(fans_piar_cat)Big vs Small:is_cooperative 6.8297e-02 .  
## factor(fans_piar_cat)Big vs Big:is_cooperative   1.3836e-01    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.040283     Adj. R2: 0.006926
##                  Within R2: 0.005989

(2) plot both

# 画图
model <- feols(log(net_follow_fans+255) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(net_follow_fans + 255)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                                                   Estimate Std. Error
## (Intercept)                                       5.542571   0.000054
## factor(fans_piar_cat)Small vs Big                 0.003604   0.000537
## factor(fans_piar_cat)Big vs Small                 0.005347   0.000626
## factor(fans_piar_cat)Big vs Big                   0.009591   0.001100
## is_cooperative                                   -0.000292   0.000082
## factor(fans_piar_cat)Small vs Big:is_cooperative  0.000137   0.000817
## factor(fans_piar_cat)Big vs Small:is_cooperative -0.001633   0.000883
## factor(fans_piar_cat)Big vs Big:is_cooperative    0.003079   0.002066
##                                                        t value   Pr(>|t|)    
## (Intercept)                                      102086.907310  < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big                     6.706917 1.9945e-11 ***
## factor(fans_piar_cat)Big vs Small                     8.536405  < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big                       8.719109  < 2.2e-16 ***
## is_cooperative                                       -3.562455 3.6751e-04 ***
## factor(fans_piar_cat)Small vs Big:is_cooperative      0.167652 8.6686e-01    
## factor(fans_piar_cat)Big vs Small:is_cooperative     -1.850558 6.4235e-02 .  
## factor(fans_piar_cat)Big vs Big:is_cooperative        1.490215 1.3617e-01    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.040404   Adj. R2: 0.006163
plot_predictions_with_ci(model, data)
## `summarise()` has grouped output by 'fans_piar_cat'. You can override using the
## `.groups` argument.

### (3) plot fans

# 画图
model <- feols(log(net_follow_fans+255) ~ factor(fans_piar_cat), data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(net_follow_fans + 255)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                                   Estimate Std. Error      t value  Pr(>|t|)
## (Intercept)                       5.542472   0.000042 133134.04211 < 2.2e-16
## factor(fans_piar_cat)Small vs Big 0.003643   0.000409      8.90530 < 2.2e-16
## factor(fans_piar_cat)Big vs Small 0.004720   0.000459     10.28949 < 2.2e-16
## factor(fans_piar_cat)Big vs Big   0.010859   0.000980     11.07953 < 2.2e-16
##                                      
## (Intercept)                       ***
## factor(fans_piar_cat)Small vs Big ***
## factor(fans_piar_cat)Big vs Small ***
## factor(fans_piar_cat)Big vs Big   ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.040407   Adj. R2: 0.006048
plot_predictions_fans(model, data)

(4) plot category

# 画图
model <- feols(log(net_follow_fans+255) ~ is_cooperative, data = data,vcov = ~author_id)

summary(model)
## OLS estimation, Dep. Var.: log(net_follow_fans + 255)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                Estimate Std. Error    t value  Pr(>|t|)    
## (Intercept)    5.543776   0.000104 53436.9340 < 2.2e-16 ***
## is_cooperative 0.000244   0.000217     1.1219   0.26191    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.04053   Adj. R2: 2.859e-6
plot_predictions_cate(model, data)

(5) plot continue cutoff

# Define the formula for the model
model_formula <- log(net_follow_fans+255) ~ factor(fans_piar_cat_temp)

# Call the function with the data and formula
plot_continue_cutoff(data, model_formula)

(6) plot_fan_pair_heatmap

library(ggplot2)
library(dplyr)
library(RColorBrewer)

# Step 1: Set the ranges for the fan counts (fewer, broader bins)
data <- data %>% mutate(fans_new_range = case_when(
  before_fans_count > 0 & before_fans_count <= 1000 ~ "0-1k",
  before_fans_count > 1000 & before_fans_count <= 10000 ~ "1k-10k",
  before_fans_count > 10000 & before_fans_count <= 100000 ~ "10k-100k",
  before_fans_count > 100000 & before_fans_count <= 1000000 ~ "100k-1M",
  TRUE ~ ">1M"
))

data <- data %>% mutate(other_fans_new_range = case_when(
  other_before_fans_count > 0 & other_before_fans_count <= 1000 ~ "0-1k",
  other_before_fans_count > 1000 & other_before_fans_count <= 10000 ~ "1k-10k",
  other_before_fans_count > 10000 & other_before_fans_count <= 100000 ~ "10k-100k",
  other_before_fans_count > 100000 & other_before_fans_count <= 1000000 ~ "100k-1M",
  TRUE ~ ">1M"
))

# Step 2: Set the correct order for the categories
data <- data %>%
  mutate(fans_new_range = factor(fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")),
         other_fans_new_range = factor(other_fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")))

# Step 3: Aggregate the data to calculate the mean and confidence intervals, handling 0% cases
agg_data <- data %>%
  group_by(fans_new_range, other_fans_new_range) %>%
  summarize(
    count = n(),  # 计算每个组合的数量
    Y = ifelse(count > 0, mean(log(net_follow_fans +605), na.rm = TRUE), NA),  # 如果有数据,计算均值;否则设为NA
    ci_lower = ifelse(count > 0, Y - qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA),  # 如果有数据,计算CI;否则设为NA
    ci_upper = ifelse(count > 0, Y + qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA)   # 如果有数据,计算CI;否则设为NA
  ) %>%
  ungroup() %>%
  mutate(percentage = count / sum(count) * 100)
## `summarise()` has grouped output by 'fans_new_range'. You can override using
## the `.groups` argument.
# Step 4: Plot the data in a heatmap-like format with custom colors and percentage labels
ggplot(agg_data, aes(x = fans_new_range, y = other_fans_new_range, fill = Y)) +
  geom_tile() +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  geom_text(aes(label = paste0(round(percentage, 2), "%")), color = "black", size = 4) +
  scale_fill_gradientn(colors = c("yellow", "orange", "darkorange", "red", "darkred"), 
                     values = scales::rescale(c(0, 0.05, 0.1, 0.15, 0.2)), 
                     #limits = c(0, 0.2), 
                     na.value = "white") + 
  labs(title = "Average Fan Total Cost Amount by Fan Ranges",
       x = "Fans Range",
       y = "Other Fans Range",
       fill = "follow_author_fans_count") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

6 DV:吸粉(PK及以后涨的来自对方的粉丝):log(already_follow_other_fans_count+1)

(1) regression

model <- feols(log(already_follow_other_fans_count+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative |p_date, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(already_follow_other_fans_count + 1)
## Observations: 187,126
## Fixed-effects: p_date: 975
## Standard-errors: Clustered (author_id) 
##                                                  Estimate Std. Error   t value
## factor(fans_piar_cat)Small vs Big                0.008643   0.001337  6.465395
## factor(fans_piar_cat)Big vs Small                0.004347   0.000938  4.634133
## factor(fans_piar_cat)Big vs Big                  0.030643   0.002152 14.239919
## is_cooperative                                   0.000387   0.000282  1.369041
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.007826   0.002452  3.192134
## factor(fans_piar_cat)Big vs Small:is_cooperative 0.001300   0.001701  0.764027
## factor(fans_piar_cat)Big vs Big:is_cooperative   0.018595   0.003871  4.803729
##                                                    Pr(>|t|)    
## factor(fans_piar_cat)Small vs Big                1.0133e-10 ***
## factor(fans_piar_cat)Big vs Small                3.5872e-06 ***
## factor(fans_piar_cat)Big vs Big                   < 2.2e-16 ***
## is_cooperative                                   1.7099e-01    
## factor(fans_piar_cat)Small vs Big:is_cooperative 1.4125e-03 ** 
## factor(fans_piar_cat)Big vs Small:is_cooperative 4.4485e-01    
## factor(fans_piar_cat)Big vs Big:is_cooperative   1.5588e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.085446     Adj. R2: 0.017635
##                  Within R2: 0.016367

(2) plot both

# 画图
model <- feols(log(already_follow_other_fans_count+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(already_follow_other_fans_count + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                                                  Estimate Std. Error   t value
## (Intercept)                                      0.002803   0.000149 18.826269
## factor(fans_piar_cat)Small vs Big                0.008981   0.001339  6.706339
## factor(fans_piar_cat)Big vs Small                0.004662   0.000939  4.965460
## factor(fans_piar_cat)Big vs Big                  0.031277   0.002148 14.558698
## is_cooperative                                   0.000647   0.000276  2.343900
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.007509   0.002457  3.055939
## factor(fans_piar_cat)Big vs Small:is_cooperative 0.001111   0.001696  0.655184
## factor(fans_piar_cat)Big vs Big:is_cooperative   0.018284   0.003868  4.726862
##                                                    Pr(>|t|)    
## (Intercept)                                       < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big                2.0024e-11 ***
## factor(fans_piar_cat)Big vs Small                6.8610e-07 ***
## factor(fans_piar_cat)Big vs Big                   < 2.2e-16 ***
## is_cooperative                                   1.9085e-02 *  
## factor(fans_piar_cat)Small vs Big:is_cooperative 2.2439e-03 ** 
## factor(fans_piar_cat)Big vs Small:is_cooperative 5.1235e-01    
## factor(fans_piar_cat)Big vs Big:is_cooperative   2.2821e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.085698   Adj. R2: 0.016966
plot_predictions_with_ci(model, data)
## `summarise()` has grouped output by 'fans_piar_cat'. You can override using the
## `.groups` argument.

### (3) plot fans

# 画图
model <- feols(log(already_follow_other_fans_count+1) ~ factor(fans_piar_cat), data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(already_follow_other_fans_count + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                                   Estimate Std. Error  t value   Pr(>|t|)    
## (Intercept)                       0.003020   0.000126 23.97696  < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big 0.011865   0.001147 10.34883  < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small 0.005107   0.000800  6.38546 1.7136e-10 ***
## factor(fans_piar_cat)Big vs Big   0.039004   0.001861 20.96187  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.085747   Adj. R2: 0.015874
plot_predictions_fans(model, data)

(4) plot category

# 画图
model <- feols(log(already_follow_other_fans_count+1) ~ is_cooperative, data = data,vcov = ~author_id)

summary(model)
## OLS estimation, Dep. Var.: log(already_follow_other_fans_count + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                Estimate Std. Error  t value  Pr(>|t|)    
## (Intercept)    0.005881   0.000220 26.67917 < 2.2e-16 ***
## is_cooperative 0.003957   0.000462  8.55892 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.086415   Adj. R2: 4.706e-4
plot_predictions_cate(model, data)

(5) plot continue cutoff

# Define the formula for the model
model_formula <- log(already_follow_other_fans_count+1) ~ factor(fans_piar_cat_temp)

# Call the function with the data and formula
plot_continue_cutoff(data, model_formula)

(6) plot_fan_pair_heatmap

library(ggplot2)
library(dplyr)
library(RColorBrewer)

# Step 1: Set the ranges for the fan counts (fewer, broader bins)
data <- data %>% mutate(fans_new_range = case_when(
  before_fans_count > 0 & before_fans_count <= 1000 ~ "0-1k",
  before_fans_count > 1000 & before_fans_count <= 10000 ~ "1k-10k",
  before_fans_count > 10000 & before_fans_count <= 100000 ~ "10k-100k",
  before_fans_count > 100000 & before_fans_count <= 1000000 ~ "100k-1M",
  TRUE ~ ">1M"
))

data <- data %>% mutate(other_fans_new_range = case_when(
  other_before_fans_count > 0 & other_before_fans_count <= 1000 ~ "0-1k",
  other_before_fans_count > 1000 & other_before_fans_count <= 10000 ~ "1k-10k",
  other_before_fans_count > 10000 & other_before_fans_count <= 100000 ~ "10k-100k",
  other_before_fans_count > 100000 & other_before_fans_count <= 1000000 ~ "100k-1M",
  TRUE ~ ">1M"
))

# Step 2: Set the correct order for the categories
data <- data %>%
  mutate(fans_new_range = factor(fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")),
         other_fans_new_range = factor(other_fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")))

# Step 3: Aggregate the data to calculate the mean and confidence intervals, handling 0% cases
agg_data <- data %>%
  group_by(fans_new_range, other_fans_new_range) %>%
  summarize(
    count = n(),  # 计算每个组合的数量
    Y = ifelse(count > 0, mean(log(already_follow_other_fans_count + 1), na.rm = TRUE), NA),  # 如果有数据,计算均值;否则设为NA
    ci_lower = ifelse(count > 0, Y - qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA),  # 如果有数据,计算CI;否则设为NA
    ci_upper = ifelse(count > 0, Y + qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA)   # 如果有数据,计算CI;否则设为NA
  ) %>%
  ungroup() %>%
  mutate(percentage = count / sum(count) * 100)
## `summarise()` has grouped output by 'fans_new_range'. You can override using
## the `.groups` argument.
# Step 4: Plot the data in a heatmap-like format with custom colors and percentage labels
ggplot(agg_data, aes(x = fans_new_range, y = other_fans_new_range, fill = Y)) +
  geom_tile() +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  geom_text(aes(label = paste0(round(percentage, 2), "%")), color = "black", size = 4) +
  scale_fill_gradientn(colors = c("yellow", "orange", "darkorange", "red", "darkred"), 
                     values = scales::rescale(c(0, 0.05, 0.1, 0.15, 0.2)), 
                     #limits = c(0, 0.2), 
                     na.value = "white") + 
  labs(title = "Average Fan Total Cost Amount by Fan Ranges",
       x = "Fans Range",
       y = "Other Fans Range",
       fill = "follow_author_fans_count") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

7 DV:粉被对方吸走(PK及以后自己的粉丝跑到对方):log(other_other_already_follow_other_fans_count+1)

(1) regression

model <- feols(log(other_already_follow_other_fans_count+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative |p_date, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(other_already_follow_other_fans_count + 1)
## Observations: 187,126
## Fixed-effects: p_date: 975
## Standard-errors: Clustered (author_id) 
##                                                   Estimate Std. Error   t value
## factor(fans_piar_cat)Small vs Big                 0.005874   0.000985  5.961712
## factor(fans_piar_cat)Big vs Small                 0.010257   0.001370  7.487383
## factor(fans_piar_cat)Big vs Big                   0.033760   0.002334 14.462731
## is_cooperative                                    0.001027   0.000270  3.805236
## factor(fans_piar_cat)Small vs Big:is_cooperative -0.000021   0.001726 -0.012270
## factor(fans_piar_cat)Big vs Small:is_cooperative  0.001833   0.002306  0.795209
## factor(fans_piar_cat)Big vs Big:is_cooperative    0.016188   0.003944  4.104578
##                                                    Pr(>|t|)    
## factor(fans_piar_cat)Small vs Big                2.5014e-09 ***
## factor(fans_piar_cat)Big vs Small                7.0625e-14 ***
## factor(fans_piar_cat)Big vs Big                   < 2.2e-16 ***
## is_cooperative                                   1.4172e-04 ***
## factor(fans_piar_cat)Small vs Big:is_cooperative 9.9021e-01    
## factor(fans_piar_cat)Big vs Small:is_cooperative 4.2649e-01    
## factor(fans_piar_cat)Big vs Big:is_cooperative   4.0526e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.085629     Adj. R2: 0.019143
##                  Within R2: 0.017624

(2) plot both

# 画图
model <- feols(log(other_already_follow_other_fans_count+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(other_already_follow_other_fans_count + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                                                   Estimate Std. Error   t value
## (Intercept)                                       0.002323   0.000129 17.963406
## factor(fans_piar_cat)Small vs Big                 0.005917   0.000978  6.051027
## factor(fans_piar_cat)Big vs Small                 0.010492   0.001377  7.619616
## factor(fans_piar_cat)Big vs Big                   0.034319   0.002341 14.660571
## is_cooperative                                    0.001340   0.000264  5.065940
## factor(fans_piar_cat)Small vs Big:is_cooperative -0.000100   0.001719 -0.057932
## factor(fans_piar_cat)Big vs Small:is_cooperative  0.001678   0.002301  0.728960
## factor(fans_piar_cat)Big vs Big:is_cooperative    0.015825   0.003945  4.011588
##                                                    Pr(>|t|)    
## (Intercept)                                       < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big                1.4425e-09 ***
## factor(fans_piar_cat)Big vs Small                2.5585e-14 ***
## factor(fans_piar_cat)Big vs Big                   < 2.2e-16 ***
## is_cooperative                                   4.0685e-07 ***
## factor(fans_piar_cat)Small vs Big:is_cooperative 9.5380e-01    
## factor(fans_piar_cat)Big vs Small:is_cooperative 4.6603e-01    
## factor(fans_piar_cat)Big vs Big:is_cooperative   6.0340e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.085894   Adj. R2: 0.018192
plot_predictions_with_ci(model, data)
## `summarise()` has grouped output by 'fans_piar_cat'. You can override using the
## `.groups` argument.

### (3) plot fans

# 画图
model <- feols(log(other_already_follow_other_fans_count+1) ~ factor(fans_piar_cat), data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(other_already_follow_other_fans_count + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                                   Estimate Std. Error  t value   Pr(>|t|)    
## (Intercept)                       0.002774   0.000117 23.80911  < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big 0.005938   0.000819  7.24866 4.2284e-13 ***
## factor(fans_piar_cat)Big vs Small 0.011177   0.001109 10.08177  < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big   0.041071   0.001938 21.18932  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.085932   Adj. R2: 0.017354
plot_predictions_fans(model, data)

(4) plot category

# 画图
model <- feols(log(other_already_follow_other_fans_count+1) ~ is_cooperative, data = data,vcov = ~author_id)

summary(model)
## OLS estimation, Dep. Var.: log(other_already_follow_other_fans_count + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                Estimate Std. Error  t value  Pr(>|t|)    
## (Intercept)    0.005777   0.000227 25.49223 < 2.2e-16 ***
## is_cooperative 0.004050   0.000457  8.86859 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.086667   Adj. R2: 4.901e-4
plot_predictions_cate(model, data)

### (5) plot continue cutoff

# Define the formula for the model
model_formula <- log(other_already_follow_other_fans_count+1) ~ factor(fans_piar_cat_temp)

# Call the function with the data and formula
plot_continue_cutoff(data, model_formula)

### (6) plot_fan_pair_heatmap

library(ggplot2)
library(dplyr)
library(RColorBrewer)

# Step 1: Set the ranges for the fan counts (fewer, broader bins)
data <- data %>% mutate(fans_new_range = case_when(
  before_fans_count > 0 & before_fans_count <= 1000 ~ "0-1k",
  before_fans_count > 1000 & before_fans_count <= 10000 ~ "1k-10k",
  before_fans_count > 10000 & before_fans_count <= 100000 ~ "10k-100k",
  before_fans_count > 100000 & before_fans_count <= 1000000 ~ "100k-1M",
  TRUE ~ ">1M"
))

data <- data %>% mutate(other_fans_new_range = case_when(
  other_before_fans_count > 0 & other_before_fans_count <= 1000 ~ "0-1k",
  other_before_fans_count > 1000 & other_before_fans_count <= 10000 ~ "1k-10k",
  other_before_fans_count > 10000 & other_before_fans_count <= 100000 ~ "10k-100k",
  other_before_fans_count > 100000 & other_before_fans_count <= 1000000 ~ "100k-1M",
  TRUE ~ ">1M"
))

# Step 2: Set the correct order for the categories
data <- data %>%
  mutate(fans_new_range = factor(fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")),
         other_fans_new_range = factor(other_fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")))

# Step 3: Aggregate the data to calculate the mean and confidence intervals, handling 0% cases
agg_data <- data %>%
  group_by(fans_new_range, other_fans_new_range) %>%
  summarize(
    count = n(),  # 计算每个组合的数量
    Y = ifelse(count > 0, mean(log(other_already_follow_other_fans_count + 1), na.rm = TRUE), NA),  # 如果有数据,计算均值;否则设为NA
    ci_lower = ifelse(count > 0, Y - qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA),  # 如果有数据,计算CI;否则设为NA
    ci_upper = ifelse(count > 0, Y + qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA)   # 如果有数据,计算CI;否则设为NA
  ) %>%
  ungroup() %>%
  mutate(percentage = count / sum(count) * 100)
## `summarise()` has grouped output by 'fans_new_range'. You can override using
## the `.groups` argument.
# Step 4: Plot the data in a heatmap-like format with custom colors and percentage labels
ggplot(agg_data, aes(x = fans_new_range, y = other_fans_new_range, fill = Y)) +
  geom_tile() +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  geom_text(aes(label = paste0(round(percentage, 2), "%")), color = "black", size = 4) +
  scale_fill_gradientn(colors = c("yellow", "orange", "darkorange", "red", "darkred"), 
                     values = scales::rescale(c(0, 0.05, 0.1, 0.15, 0.2)), 
                     #limits = c(0, 0.2), 
                     na.value = "white") + 
  labs(title = "other_already_follow_other_fans_count by Fan Ranges",
       x = "Fans Range",
       y = "Other Fans Range",
       fill = "follow_author_fans_count") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

8 DV:net吸粉:

summary(data$net_attract_fans)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -1.25e+02  0.00e+00  0.00e+00 -1.27e-03  0.00e+00  8.60e+01
min(data$net_attract_fans)
## [1] -125

(1) regression

model <- feols(log(net_attract_fans+126) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative |p_date, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(net_attract_fans + 126)
## Observations: 187,126
## Fixed-effects: p_date: 975
## Standard-errors: Clustered (author_id) 
##                                                     Estimate Std. Error
## factor(fans_piar_cat)Small vs Big                 0.00007776   0.000037
## factor(fans_piar_cat)Big vs Small                -0.00009881   0.000042
## factor(fans_piar_cat)Big vs Big                  -0.00092973   0.000577
## is_cooperative                                    0.00000257   0.000015
## factor(fans_piar_cat)Small vs Big:is_cooperative  0.00011183   0.000068
## factor(fans_piar_cat)Big vs Small:is_cooperative  0.00002522   0.000053
## factor(fans_piar_cat)Big vs Big:is_cooperative    0.00089796   0.000595
##                                                    t value Pr(>|t|)    
## factor(fans_piar_cat)Small vs Big                 2.093658 0.036292 *  
## factor(fans_piar_cat)Big vs Small                -2.377336 0.017439 *  
## factor(fans_piar_cat)Big vs Big                  -1.609994 0.107401    
## is_cooperative                                    0.166904 0.867446    
## factor(fans_piar_cat)Small vs Big:is_cooperative  1.643641 0.100252    
## factor(fans_piar_cat)Big vs Small:is_cooperative  0.474084 0.635441    
## factor(fans_piar_cat)Big vs Big:is_cooperative    1.508435 0.131445    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.012331     Adj. R2: 2.593e-4
##                  Within R2: 2.7e-4

(2) plot both

# 画图
model <- feols(log(net_attract_fans+126) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(net_attract_fans + 126)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                                                     Estimate Std. Error
## (Intercept)                                       4.83628931 0.00000260
## factor(fans_piar_cat)Small vs Big                 0.00007636 0.00003528
## factor(fans_piar_cat)Big vs Small                -0.00011840 0.00003481
## factor(fans_piar_cat)Big vs Big                  -0.00092383 0.00057468
## is_cooperative                                   -0.00000891 0.00000478
## factor(fans_piar_cat)Small vs Big:is_cooperative  0.00006706 0.00005427
## factor(fans_piar_cat)Big vs Small:is_cooperative  0.00003565 0.00004492
## factor(fans_piar_cat)Big vs Big:is_cooperative    0.00089459 0.00059835
##                                                        t value   Pr(>|t|)    
## (Intercept)                                       1.858124e+06  < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big                 2.164517e+00 0.03042622 *  
## factor(fans_piar_cat)Big vs Small                -3.401091e+00 0.00067134 ***
## factor(fans_piar_cat)Big vs Big                  -1.607573e+00 0.10793065    
## is_cooperative                                   -1.863118e+00 0.06244751 .  
## factor(fans_piar_cat)Small vs Big:is_cooperative  1.235646e+00 0.21659207    
## factor(fans_piar_cat)Big vs Small:is_cooperative  7.936100e-01 0.42742342    
## factor(fans_piar_cat)Big vs Big:is_cooperative    1.495095e+00 0.13489172    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.012363   Adj. R2: 2.263e-4
plot_predictions_with_ci(model, data)
## `summarise()` has grouped output by 'fans_piar_cat'. You can override using the
## `.groups` argument.

(3) plot fans

# 画图
model <- feols(log(net_attract_fans+126) ~ factor(fans_piar_cat), data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(net_attract_fans + 126)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                                    Estimate Std. Error       t value   Pr(>|t|)
## (Intercept)                        4.836286 0.00000219 2204159.42803  < 2.2e-16
## factor(fans_piar_cat)Small vs Big  0.000101 0.00002677       3.79109 1.5004e-04
## factor(fans_piar_cat)Big vs Small -0.000105 0.00002422      -4.34848 1.3717e-05
## factor(fans_piar_cat)Big vs Big   -0.000549 0.00034081      -1.61142 1.0709e-01
##                                      
## (Intercept)                       ***
## factor(fans_piar_cat)Small vs Big ***
## factor(fans_piar_cat)Big vs Small ***
## factor(fans_piar_cat)Big vs Big      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.012364   Adj. R2: 1.429e-4
plot_predictions_fans(model, data)

(4) plot category

# 画图
model <- feols(log(net_attract_fans+126) ~ is_cooperative, data = data,vcov = ~author_id)

summary(model)
## OLS estimation, Dep. Var.: log(net_attract_fans + 126)
## Observations: 187,126
## Standard-errors: Clustered (author_id) 
##                Estimate Std. Error     t value  Pr(>|t|)    
## (Intercept)    4.836218    4.3e-05 112793.7598 < 2.2e-16 ***
## is_cooperative 0.000063    4.6e-05      1.3645   0.17241    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.012365   Adj. R2: 5.674e-7
plot_predictions_cate(model, data)

### (5) plot continue cutoff

# Define the formula for the model
model_formula <- log(net_attract_fans+126) ~ factor(fans_piar_cat_temp)

# Call the function with the data and formula
plot_continue_cutoff(data, model_formula)

(6) plot_fan_pair_heatmap

library(ggplot2)
library(dplyr)
library(RColorBrewer)

# Step 1: Set the ranges for the fan counts (fewer, broader bins)
data <- data %>% mutate(fans_new_range = case_when(
  before_fans_count > 0 & before_fans_count <= 1000 ~ "0-1k",
  before_fans_count > 1000 & before_fans_count <= 10000 ~ "1k-10k",
  before_fans_count > 10000 & before_fans_count <= 100000 ~ "10k-100k",
  before_fans_count > 100000 & before_fans_count <= 1000000 ~ "100k-1M",
  TRUE ~ ">1M"
))

data <- data %>% mutate(other_fans_new_range = case_when(
  other_before_fans_count > 0 & other_before_fans_count <= 1000 ~ "0-1k",
  other_before_fans_count > 1000 & other_before_fans_count <= 10000 ~ "1k-10k",
  other_before_fans_count > 10000 & other_before_fans_count <= 100000 ~ "10k-100k",
  other_before_fans_count > 100000 & other_before_fans_count <= 1000000 ~ "100k-1M",
  TRUE ~ ">1M"
))

# Step 2: Set the correct order for the categories
data <- data %>%
  mutate(fans_new_range = factor(fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")),
         other_fans_new_range = factor(other_fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")))

# Step 3: Aggregate the data to calculate the mean and confidence intervals, handling 0% cases
agg_data <- data %>%
  group_by(fans_new_range, other_fans_new_range) %>%
  summarize(
    count = n(),  # 计算每个组合的数量
    Y = ifelse(count > 0, mean(log(net_attract_fans+126), na.rm = TRUE), NA),  # 如果有数据,计算均值;否则设为NA
    ci_lower = ifelse(count > 0, Y - qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA),  # 如果有数据,计算CI;否则设为NA
    ci_upper = ifelse(count > 0, Y + qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA)   # 如果有数据,计算CI;否则设为NA
  ) %>%
  ungroup() %>%
  mutate(percentage = count / sum(count) * 100)
## `summarise()` has grouped output by 'fans_new_range'. You can override using
## the `.groups` argument.
# Step 4: Plot the data in a heatmap-like format with custom colors and percentage labels
ggplot(agg_data, aes(x = fans_new_range, y = other_fans_new_range, fill = Y)) +
  geom_tile() +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
  geom_text(aes(label = paste0(round(percentage, 2), "%")), color = "black", size = 4) +
  scale_fill_gradientn(colors = c("yellow", "orange", "darkorange", "red", "darkred"), 
                     values = scales::rescale(c(0, 0.05, 0.1, 0.15, 0.2)), 
                     #limits = c(0, 0.2), 
                     na.value = "white") + 
  labs(title = "other_already_follow_other_fans_count by Fan Ranges",
       x = "Fans Range",
       y = "Other Fans Range",
       fill = "follow_author_fans_count") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))