setwd("C:/Users/ASUS/Desktop/快手")
数据:2022.01.01–2024.09.01 全部的random PK pair
data = read.csv("Within_Random_Pk_final_clean_1.csv")
### fan-size
data = data %>%mutate(fans_piar_cat = case_when(
(before_fans_count>=10000) & (other_before_fans_count>=10000)~ 3,
(before_fans_count>=10000) & (other_before_fans_count<10000)~ 2,
(before_fans_count<10000) & (other_before_fans_count>= 10000)~ 1,
(before_fans_count<10000) & (other_before_fans_count<10000)~ 0
))
data <- data %>%
mutate(fans_piar_cat = factor(fans_piar_cat, level = c(0,1,2,3), labels = c("Small vs Small ", "Small vs Big", "Big vs Small", "Big vs Big")))
### 合作
# (1) 按照直播类型来划分:同类:1
data <- data %>%
mutate(is_cooperative = case_when(
(live_operation_tag == other_live_operation_tag) ~ 1,
TRUE ~ 0
))
table(data$fans_piar_cat)
##
## Small vs Small Small vs Big Big vs Small Big vs Big
## 149898 10906 10701 15621
table(data$is_cooperative)
##
## 0 1
## 121925 65201
# table(data$total_cost_amt) # 119056 为0
# (1) 人均粉丝打赏
data$avg_fan_total_cost_amt= (data$total_cost_amt)/(data$before_fans_count + 1)
# (1.1) 人均(viewer)打赏= 总打赏/总的viewer人数
data$avg_viewer_total_cost_amt= (data$total_cost_amt)/(data$valid_play_user_num + 1)
# (1.2) 单位观看时长的打赏 = 总打赏/总观看时长
data$avg_time_total_cost_amt = (data$total_cost_amt)/(data$valid_play_duration + 1)
# (2) 涨粉
# data$follow_author_fans_count
# (3) 掉粉
# data$unfollow_author_fans_count
# (4) net 涨粉
data$net_follow_fans= data$follow_author_fans_count - data$unfollow_author_fans_count
summary(data$net_follow_fans)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -254.000 0.000 0.000 0.958 0.000 5064.000
# (5) 吸粉
# data$already_follow_other_fans_count
# (6) 粉丝被对方吸走
# data$other_already_follow_other_fans_count
# (7) net吸粉
data$net_attract_fans= data$already_follow_other_fans_count - data$other_already_follow_other_fans_count
summary(data$net_attract_fans)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.25e+02 0.00e+00 0.00e+00 -1.27e-03 0.00e+00 8.60e+01
library(ggplot2)
library(dplyr)
# 创建函数
plot_predictions_with_ci <- function(model_1, data) {
# 预测值和标准误差
predictions <- predict(model_1, newdata = data, se.fit = TRUE)
# 将预测值和标准误差添加到数据框中
data$predicted_y <- predictions$fit
data$se_fit <- predictions$se.fit
# 按照分组变量计算均值和95%的置信区间
summary_data <- data %>%
group_by(fans_piar_cat, is_cooperative) %>%
summarise(
mean_predicted_y = mean(predicted_y),
ci_lower = mean(predicted_y) - 1.96 * mean(se_fit),
ci_upper = mean(predicted_y) + 1.96 * mean(se_fit)
)
# 绘制图表
plot <- ggplot(summary_data, aes(x = factor(fans_piar_cat), y = mean_predicted_y, color = factor(is_cooperative))) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
labs(
title = "Mean Predicted y Values with Confidence Intervals",
x = "Pair Categories",
y = "Mean Predicted y",
color = "Is Same Category"
) +
theme_minimal()
# 返回图表
return(plot)
}
library(ggplot2)
library(dplyr)
# 创建函数
plot_predictions_fans <- function(model_1, data) {
# 预测值和标准误差
predictions <- predict(model_1, newdata = data, se.fit = TRUE)
# 将预测值和标准误差添加到数据框中
data$predicted_y <- predictions$fit
data$se_fit <- predictions$se.fit
# 按照分组变量计算均值和95%的置信区间
summary_data <- data %>%
group_by(fans_piar_cat) %>%
summarise(
mean_predicted_y = mean(predicted_y),
ci_lower = mean(predicted_y) - 1.96 * mean(se_fit),
ci_upper = mean(predicted_y) + 1.96 * mean(se_fit)
)
# 绘制图表
plot <- ggplot(summary_data, aes(x = factor(fans_piar_cat), y = mean_predicted_y, )) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
labs(
title = "Mean Predicted y Values with Confidence Intervals",
x = "fans Pair",
y = "Mean Predicted y",
color = "Is Same Category"
) +
theme_minimal()
# 返回图表
return(plot)
}
library(ggplot2)
library(dplyr)
# 创建函数
plot_predictions_cate <- function(model_1, data) {
# 预测值和标准误差
predictions <- predict(model_1, newdata = data, se.fit = TRUE)
# 将预测值和标准误差添加到数据框中
data$predicted_y <- predictions$fit
data$se_fit <- predictions$se.fit
# 按照分组变量计算均值和95%的置信区间
summary_data <- data %>%
group_by(is_cooperative) %>%
summarise(
mean_predicted_y = mean(predicted_y),
ci_lower = mean(predicted_y) - 1.96 * mean(se_fit),
ci_upper = mean(predicted_y) + 1.96 * mean(se_fit)
)
# 绘制图表
plot <- ggplot(summary_data, aes(x = factor(is_cooperative), y = mean_predicted_y )) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
labs(
title = "Mean Predicted y Values with Confidence Intervals",
x = "Categories",
y = "Mean Predicted y",
color = "Is Same Category"
) +
theme_minimal()
# 返回图表
return(plot)
}
library(dplyr)
library(ggplot2)
library(scales) # for comma formatting
##
## 载入程序包:'scales'
## The following object is masked from 'package:fixest':
##
## pvalue
library(tidyr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ lubridate 1.9.3 ✔ stringr 1.5.1
## ✔ purrr 1.0.2 ✔ tibble 3.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ plm::between() masks dplyr::between()
## ✖ readr::col_factor() masks scales::col_factor()
## ✖ purrr::discard() masks scales::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ plm::lag() masks dplyr::lag(), stats::lag()
## ✖ plm::lead() masks dplyr::lead()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# 定义函数,输入参数为模型公式
plot_continue_cutoff <- function(data, model_formula) {
# Step 1: Create a sequence of 100 cut-offs from 0.1k to 1000k, log-stepped
cutoffs <- exp(seq(log(100), log(1e6), length.out = 100))
# Create a matrix to store coefficients of factor(fans_piar_cat)
coefficients_matrix <- matrix(NA, nrow = 100, ncol = 3)
# Create a matrix to store the counts of each category
pct_matrix <- matrix(NA, nrow = 100, ncol = 4) # For 4 categories
# Step 2: Loop over each cut-off, create fans_piar_cat, run the regression, and store the coefficients
for (i in seq_along(cutoffs)) {
cut_off <- cutoffs[i]
# Create the fans_piar_cat variable based on cut-off
data <- data %>%
mutate(fans_piar_cat_temp = case_when(
(before_fans_count < cut_off) & (other_before_fans_count < cut_off) ~ "Small vs Small",
(before_fans_count < cut_off) & (other_before_fans_count >= cut_off) ~ "Small vs Big",
(before_fans_count >= cut_off) & (other_before_fans_count < cut_off) ~ "Big vs Small",
(before_fans_count >= cut_off) & (other_before_fans_count >= cut_off) ~ "Big vs Big"
)) %>%
mutate(fans_piar_cat_temp = relevel(factor(fans_piar_cat_temp), ref = "Small vs Small"))
# 1.计算percentage
pct_summary <- data %>%
count(fans_piar_cat_temp) %>%
mutate(pct = n / sum(n) * 100)
pct_matrix[i, ] <- pct_summary %>%
complete(fans_piar_cat_temp = c("Big vs Big", "Small vs Big", "Big vs Small", "Small vs Small"), fill = list(pct = 0)) %>%
pull(pct)
# 2.计算每个model的系数
model <- feols(model_formula, data = data,vcov = ~author_id)
coef_summary <- summary(model)$coefficients
# print(coef_summary)
# Store the coefficients for the three categories in the matrix
coefficients_matrix[i, ] <- coef_summary[c("factor(fans_piar_cat_temp)Small vs Big",
"factor(fans_piar_cat_temp)Big vs Small",
"factor(fans_piar_cat_temp)Big vs Big")] # "(Intercept)")]
}
#print(coefficients_matrix)
## 1.画出四组percentage图
pct_df <- data.frame(
Cutoff = cutoffs,
Big_vs_Big = pct_matrix[, 1],
Big_vs_Small = pct_matrix[, 2],
Small_vs_Big = pct_matrix[, 3],
Small_vs_Small = pct_matrix[, 4]
)
pct_df_long <- pct_df %>%
pivot_longer(cols = c("Big_vs_Big", "Big_vs_Small","Small_vs_Big","Small_vs_Small"),
names_to = "Category", values_to = "Percentage")
# print(pct_df_long)
# 画图
pct_plot <- ggplot(pct_df_long, aes(x = Cutoff, y = Percentage, color = Category)) +
geom_line() +
scale_x_log10() + # Logarithmic scale for the cut-off
labs(title = "Distribution of fans_piar_cat across Cutoffs",
x = "Cutoff (log scale)", y = "Percentage",
color = "Category") +
theme_minimal()
# print(pct_plot)
## 2.画出model coef图
coef_df <- data.frame(
Cutoff = cutoffs,
Small_vs_Big = coefficients_matrix[, 1],
Big_vs_Small = coefficients_matrix[, 2],
Big_vs_Big = coefficients_matrix[, 3]
#Small_vs_Small = coefficients_matrix[, 4]
)
coef_df_long <- coef_df %>%
pivot_longer(cols = c("Small_vs_Big", "Big_vs_Small", "Big_vs_Big"),
names_to = "Category", values_to = "Coefficient")
## 画图
coef_plot <- ggplot(coef_df_long, aes(x = Cutoff, y = Coefficient, color = Category)) +
geom_line() +
scale_x_log10() + # Logarithmic scale for the cut-off
labs(title = "Evolution of Coefficients for fans_piar_cat across Cutoffs",
x = "Cutoff (log scale)", y = "Coefficient",
color = "Category") +
theme_minimal()
print(coef_plot)
}
model <- feols(log(avg_fan_total_cost_amt+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative |p_date, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(avg_fan_total_cost_amt + 1)
## Observations: 187,126
## Fixed-effects: p_date: 975
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value
## factor(fans_piar_cat)Small vs Big 0.028320 0.002953 9.58924
## factor(fans_piar_cat)Big vs Small -0.024452 0.001229 -19.89522
## factor(fans_piar_cat)Big vs Big -0.025267 0.001082 -23.34982
## is_cooperative 0.009500 0.001349 7.04310
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.005994 0.005015 1.19526
## factor(fans_piar_cat)Big vs Small:is_cooperative -0.011356 0.001836 -6.18592
## factor(fans_piar_cat)Big vs Big:is_cooperative -0.007419 0.001793 -4.13852
## Pr(>|t|)
## factor(fans_piar_cat)Small vs Big < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big < 2.2e-16 ***
## is_cooperative 1.8878e-12 ***
## factor(fans_piar_cat)Small vs Big:is_cooperative 2.3199e-01
## factor(fans_piar_cat)Big vs Small:is_cooperative 6.1895e-10 ***
## factor(fans_piar_cat)Big vs Big:is_cooperative 3.4974e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.219571 Adj. R2: 0.005954
## Within R2: 0.003705
# 画图
model <- feols(log(avg_fan_total_cost_amt+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(avg_fan_total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error
## (Intercept) 0.038838 0.000731
## factor(fans_piar_cat)Small vs Big 0.030463 0.002942
## factor(fans_piar_cat)Big vs Small -0.021900 0.001162
## factor(fans_piar_cat)Big vs Big -0.021889 0.000972
## is_cooperative 0.010604 0.001350
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.004824 0.005006
## factor(fans_piar_cat)Big vs Small:is_cooperative -0.012566 0.001778
## factor(fans_piar_cat)Big vs Big:is_cooperative -0.008886 0.001735
## t value Pr(>|t|)
## (Intercept) 53.118905 < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big 10.352865 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small -18.846991 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big -22.517133 < 2.2e-16 ***
## is_cooperative 7.852427 4.1062e-15 ***
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.963515 3.3529e-01
## factor(fans_piar_cat)Big vs Small:is_cooperative -7.067109 1.5885e-12 ***
## factor(fans_piar_cat)Big vs Big:is_cooperative -5.120919 3.0441e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.220413 Adj. R2: 0.003525
plot_predictions_with_ci(model, data)
## `summarise()` has grouped output by 'fans_piar_cat'. You can override using the
## `.groups` argument.
### (3) plot fans
# 画图
model <- feols(log(avg_fan_total_cost_amt+1) ~ factor(fans_piar_cat), data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(avg_fan_total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.042408 0.000624 67.9489 < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big 0.032760 0.002449 13.3781 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small -0.026209 0.000898 -29.1923 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big -0.024738 0.000827 -29.9189 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.220466 Adj. R2: 0.003064
plot_predictions_fans(model, data)
# 画图
model <- feols(log(avg_fan_total_cost_amt+1) ~ is_cooperative, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(avg_fan_total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.037701 0.000625 60.27622 < 2.2e-16 ***
## is_cooperative 0.008762 0.001113 7.87408 3.4544e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.220767 Adj. R2: 3.521e-4
plot_predictions_cate(model, data)
# 使用例子
# Define the formula for the model
model_formula <- log(avg_fan_total_cost_amt + 1) ~ factor(fans_piar_cat_temp)
# Call the function with the data and formula
plot_continue_cutoff(data, model_formula)
library(ggplot2)
library(dplyr)
library(RColorBrewer)
# Step 1: Set the ranges for the fan counts (fewer, broader bins)
data <- data %>% mutate(fans_new_range = case_when(
before_fans_count > 0 & before_fans_count <= 1000 ~ "0-1k",
before_fans_count > 1000 & before_fans_count <= 10000 ~ "1k-10k",
before_fans_count > 10000 & before_fans_count <= 100000 ~ "10k-100k",
before_fans_count > 100000 & before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">1M"
))
data <- data %>% mutate(other_fans_new_range = case_when(
other_before_fans_count > 0 & other_before_fans_count <= 1000 ~ "0-1k",
other_before_fans_count > 1000 & other_before_fans_count <= 10000 ~ "1k-10k",
#other_before_fans_count > 10000 & other_before_fans_count <= 100000 ~ "10k-100k",
#other_before_fans_count > 100000 & other_before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">10k"
))
# Step 2: Set the correct order for the categories
data <- data %>%
mutate(fans_new_range = factor(fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")),
other_fans_new_range = factor(other_fans_new_range, levels = c("0-1k", "1k-10k", ">10k"))) # "10k-100k","100k-1M"
# Step 3: Aggregate the data to calculate the mean and confidence intervals, handling 0% cases
agg_data <- data %>%
group_by(fans_new_range, other_fans_new_range) %>%
summarize(
count = n(), # 计算每个组合的数量
Y = ifelse(count > 0, mean(log(avg_fan_total_cost_amt + 1), na.rm = TRUE), NA), # 如果有数据,计算均值;否则设为NA
ci_lower = ifelse(count > 0, Y - qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA), # 如果有数据,计算CI;否则设为NA
ci_upper = ifelse(count > 0, Y + qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA) # 如果有数据,计算CI;否则设为NA
) %>%
ungroup() %>%
mutate(percentage = count / sum(count) * 100)
## `summarise()` has grouped output by 'fans_new_range'. You can override using
## the `.groups` argument.
# Step 4: Plot the data in a heatmap-like format with custom colors and percentage labels
ggplot(agg_data, aes(x = fans_new_range, y = other_fans_new_range, fill = Y)) +
geom_tile() +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
geom_text(aes(label = paste0(round(percentage, 2), "%")), color = "black", size = 4) +
scale_fill_gradientn(colors = c("yellow", "orange", "darkorange", "red", "darkred"),
values = scales::rescale(c(0, 0.05, 0.1, 0.15, 0.2)),
limits = c(0, 0.2),
na.value = "white") +
labs(title = "Average Fan Total Cost Amount by Fan Ranges",
x = "Fans Range",
y = "Other Fans Range",
fill = "Avg Fan Total Cost Amt") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
model <- feols(log(avg_viewer_total_cost_amt+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative |p_date, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(avg_viewer_total_cost_amt + 1)
## Observations: 187,126
## Fixed-effects: p_date: 975
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value
## factor(fans_piar_cat)Small vs Big 0.193010 0.007996 24.13745
## factor(fans_piar_cat)Big vs Small 0.169834 0.007816 21.72794
## factor(fans_piar_cat)Big vs Big 0.207317 0.007011 29.57182
## is_cooperative 0.028324 0.002213 12.79926
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.040228 0.013240 3.03835
## factor(fans_piar_cat)Big vs Small:is_cooperative -0.014137 0.011668 -1.21161
## factor(fans_piar_cat)Big vs Big:is_cooperative -0.013210 0.010500 -1.25814
## Pr(>|t|)
## factor(fans_piar_cat)Small vs Big < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big < 2.2e-16 ***
## is_cooperative < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.0023792 **
## factor(fans_piar_cat)Big vs Small:is_cooperative 0.2256648
## factor(fans_piar_cat)Big vs Big:is_cooperative 0.2083438
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.438796 Adj. R2: 0.035929
## Within R2: 0.031064
# 画图
model <- feols(log(avg_viewer_total_cost_amt+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(avg_viewer_total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value
## (Intercept) 0.102590 0.001190 86.24594
## factor(fans_piar_cat)Small vs Big 0.195631 0.008013 24.41559
## factor(fans_piar_cat)Big vs Small 0.173282 0.007818 22.16328
## factor(fans_piar_cat)Big vs Big 0.210926 0.006990 30.17703
## is_cooperative 0.028725 0.002204 13.03264
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.040555 0.013262 3.05808
## factor(fans_piar_cat)Big vs Small:is_cooperative -0.015247 0.011672 -1.30638
## factor(fans_piar_cat)Big vs Big:is_cooperative -0.014781 0.010492 -1.40879
## Pr(>|t|)
## (Intercept) < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big < 2.2e-16 ***
## is_cooperative < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.002228 **
## factor(fans_piar_cat)Big vs Small:is_cooperative 0.191426
## factor(fans_piar_cat)Big vs Big:is_cooperative 0.158900
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.44075 Adj. R2: 0.032387
plot_predictions_with_ci(model, data)
## `summarise()` has grouped output by 'fans_piar_cat'. You can override using the
## `.groups` argument.
### (3) plot fans
# 画图
model <- feols(log(avg_viewer_total_cost_amt+1) ~ factor(fans_piar_cat), data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(avg_viewer_total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.112261 0.001032 108.7313 < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big 0.212310 0.006770 31.3603 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small 0.168688 0.006311 26.7279 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big 0.207106 0.005649 36.6600 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.440999 Adj. R2: 0.031313
plot_predictions_fans(model, data)
# 画图
model <- feols(log(avg_viewer_total_cost_amt+1) ~ is_cooperative, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(avg_viewer_total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.138597 0.001312 105.6472 < 2.2e-16 ***
## is_cooperative 0.037234 0.002269 16.4072 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.447722 Adj. R2: 0.001562
plot_predictions_cate(model, data)
# 使用例子
# Define the formula for the model
model_formula <- log(avg_viewer_total_cost_amt + 1) ~ factor(fans_piar_cat_temp)
# Call the function with the data and formula
plot_continue_cutoff(data, model_formula)
library(ggplot2)
library(dplyr)
library(RColorBrewer)
# Step 1: Set the ranges for the fan counts (fewer, broader bins)
data <- data %>% mutate(fans_new_range = case_when(
before_fans_count > 0 & before_fans_count <= 1000 ~ "0-1k",
before_fans_count > 1000 & before_fans_count <= 10000 ~ "1k-10k",
#before_fans_count > 10000 & before_fans_count <= 100000 ~ "10k-100k",
#before_fans_count > 100000 & before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">10k"
))
data <- data %>% mutate(other_fans_new_range = case_when(
other_before_fans_count > 0 & other_before_fans_count <= 1000 ~ "0-1k",
other_before_fans_count > 1000 & other_before_fans_count <= 10000 ~ "1k-10k",
#other_before_fans_count > 10000 & other_before_fans_count <= 100000 ~ "10k-100k",
#other_before_fans_count > 100000 & other_before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">10k"
))
# Step 2: Set the correct order for the categories
data <- data %>%
mutate(fans_new_range = factor(fans_new_range, levels = c("0-1k", "1k-10k", ">10k")), "10k-100k", "100k-1M",
other_fans_new_range = factor(other_fans_new_range, levels = c("0-1k", "1k-10k", ">10k"))) # "10k-100k","100k-1M"
# Step 3: Aggregate the data to calculate the mean and confidence intervals, handling 0% cases
agg_data <- data %>%
group_by(fans_new_range, other_fans_new_range) %>%
summarize(
count = n(), # 计算每个组合的数量
Y = ifelse(count > 0, mean(log(avg_viewer_total_cost_amt + 1), na.rm = TRUE), NA), # 如果有数据,计算均值;否则设为NA
ci_lower = ifelse(count > 0, Y - qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA), # 如果有数据,计算CI;否则设为NA
ci_upper = ifelse(count > 0, Y + qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA) # 如果有数据,计算CI;否则设为NA
) %>%
ungroup() %>%
mutate(percentage = count / sum(count) * 100)
## `summarise()` has grouped output by 'fans_new_range'. You can override using
## the `.groups` argument.
# Step 4: Plot the data in a heatmap-like format with custom colors and percentage labels
ggplot(agg_data, aes(x = fans_new_range, y = other_fans_new_range, fill = Y)) +
geom_tile() +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
geom_text(aes(label = paste0(round(percentage, 2), "%")), color = "black", size = 4) +
scale_fill_gradientn(colors = c("yellow", "orange", "darkorange", "red", "darkred"),
values = scales::rescale(c(0, 0.05, 0.1, 0.15, 0.2)),
limits = c(0, 0.2),
na.value = "white") +
labs(title = "Average viewer Total Cost Amount by Fan Ranges",
x = "Fans Range",
y = "Other Fans Range",
fill = "Avg Fan Total Cost Amt") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
model <- feols(log(avg_time_total_cost_amt+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative |p_date, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(avg_time_total_cost_amt + 1)
## Observations: 187,126
## Fixed-effects: p_date: 975
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value
## factor(fans_piar_cat)Small vs Big -2.6e-05 2.9e-05 -0.907748
## factor(fans_piar_cat)Big vs Small -2.4e-05 3.2e-05 -0.757443
## factor(fans_piar_cat)Big vs Big -3.5e-05 3.0e-05 -1.190522
## is_cooperative 5.5e-05 5.6e-05 0.994852
## factor(fans_piar_cat)Small vs Big:is_cooperative -6.3e-05 6.1e-05 -1.031365
## factor(fans_piar_cat)Big vs Small:is_cooperative -6.8e-05 6.3e-05 -1.091454
## factor(fans_piar_cat)Big vs Big:is_cooperative -4.5e-05 5.7e-05 -0.787784
## Pr(>|t|)
## factor(fans_piar_cat)Small vs Big 0.36401
## factor(fans_piar_cat)Big vs Small 0.44879
## factor(fans_piar_cat)Big vs Big 0.23384
## is_cooperative 0.31981
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.30237
## factor(fans_piar_cat)Big vs Small:is_cooperative 0.27507
## factor(fans_piar_cat)Big vs Big:is_cooperative 0.43082
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.008894 Adj. R2: 1.905e-4
## Within R2: 1.157e-5
# 画图
model <- feols(log(avg_time_total_cost_amt+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(avg_time_total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value
## (Intercept) 4.1e-05 2.5e-05 1.637817
## factor(fans_piar_cat)Small vs Big -1.6e-05 2.5e-05 -0.627271
## factor(fans_piar_cat)Big vs Small -1.8e-05 2.5e-05 -0.714191
## factor(fans_piar_cat)Big vs Big -1.9e-05 2.5e-05 -0.747102
## is_cooperative 7.0e-05 6.3e-05 1.102692
## factor(fans_piar_cat)Small vs Big:is_cooperative -6.7e-05 6.3e-05 -1.061532
## factor(fans_piar_cat)Big vs Small:is_cooperative -7.0e-05 6.3e-05 -1.104477
## factor(fans_piar_cat)Big vs Big:is_cooperative -6.6e-05 6.3e-05 -1.043247
## Pr(>|t|)
## (Intercept) 0.10146
## factor(fans_piar_cat)Small vs Big 0.53048
## factor(fans_piar_cat)Big vs Small 0.47511
## factor(fans_piar_cat)Big vs Big 0.45500
## is_cooperative 0.27016
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.28845
## factor(fans_piar_cat)Big vs Small:is_cooperative 0.26939
## factor(fans_piar_cat)Big vs Big:is_cooperative 0.29684
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.008918 Adj. R2: -2.314e-5
plot_predictions_with_ci(model, data)
## `summarise()` has grouped output by 'fans_piar_cat'. You can override using the
## `.groups` argument.
### (3) plot fans
# 画图
model <- feols(log(avg_time_total_cost_amt+1) ~ factor(fans_piar_cat), data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(avg_time_total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.5e-05 2.6e-05 2.51408 0.011935 *
## factor(fans_piar_cat)Small vs Big -3.8e-05 2.6e-05 -1.48955 0.136345
## factor(fans_piar_cat)Big vs Small -4.2e-05 2.6e-05 -1.61444 0.106435
## factor(fans_piar_cat)Big vs Big -4.1e-05 2.6e-05 -1.58335 0.113344
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.008918 Adj. R2: -1.277e-5
plot_predictions_fans(model, data)
# 画图
model <- feols(log(avg_time_total_cost_amt+1) ~ is_cooperative, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(avg_time_total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.8e-05 2e-05 1.84924 0.064425 .
## is_cooperative 5.4e-05 5e-05 1.08877 0.276256
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.008918 Adj. R2: 2.955e-6
plot_predictions_cate(model, data)
# 使用例子
# Define the formula for the model
model_formula <- log(avg_time_total_cost_amt + 1) ~ factor(fans_piar_cat_temp)
# Call the function with the data and formula
plot_continue_cutoff(data, model_formula)
library(ggplot2)
library(dplyr)
library(RColorBrewer)
# Step 1: Set the ranges for the fan counts (fewer, broader bins)
data <- data %>% mutate(fans_new_range = case_when(
before_fans_count > 0 & before_fans_count <= 1000 ~ "0-1k",
before_fans_count > 1000 & before_fans_count <= 10000 ~ "1k-10k",
before_fans_count > 10000 & before_fans_count <= 100000 ~ "10k-100k",
before_fans_count > 100000 & before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">1M"
))
data <- data %>% mutate(other_fans_new_range = case_when(
other_before_fans_count > 0 & other_before_fans_count <= 1000 ~ "0-1k",
other_before_fans_count > 1000 & other_before_fans_count <= 10000 ~ "1k-10k",
#other_before_fans_count > 10000 & other_before_fans_count <= 100000 ~ "10k-100k",
#other_before_fans_count > 100000 & other_before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">10k"
))
# Step 2: Set the correct order for the categories
data <- data %>%
mutate(fans_new_range = factor(fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")),
other_fans_new_range = factor(other_fans_new_range, levels = c("0-1k", "1k-10k", ">10k"))) # "10k-100k","100k-1M"
# Step 3: Aggregate the data to calculate the mean and confidence intervals, handling 0% cases
agg_data <- data %>%
group_by(fans_new_range, other_fans_new_range) %>%
summarize(
count = n(), # 计算每个组合的数量
Y = ifelse(count > 0, mean(log(avg_time_total_cost_amt + 1), na.rm = TRUE), NA), # 如果有数据,计算均值;否则设为NA
ci_lower = ifelse(count > 0, Y - qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA), # 如果有数据,计算CI;否则设为NA
ci_upper = ifelse(count > 0, Y + qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA) # 如果有数据,计算CI;否则设为NA
) %>%
ungroup() %>%
mutate(percentage = count / sum(count) * 100)
## `summarise()` has grouped output by 'fans_new_range'. You can override using
## the `.groups` argument.
# Step 4: Plot the data in a heatmap-like format with custom colors and percentage labels
ggplot(agg_data, aes(x = fans_new_range, y = other_fans_new_range, fill = Y)) +
geom_tile() +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
geom_text(aes(label = paste0(round(percentage, 2), "%")), color = "black", size = 4) +
scale_fill_gradientn(colors = c("yellow", "orange", "darkorange", "red", "darkred"),
values = scales::rescale(c(0, 0.05, 0.1, 0.15, 0.2)),
limits = c(0, 0.2),
na.value = "white") +
labs(title = "Average Fan Total Cost Amount by Fan Ranges",
x = "Fans Range",
y = "Other Fans Range",
fill = "Avg Fan Total Cost Amt") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
model <- feols(log(total_cost_amt+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative |p_date, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(total_cost_amt + 1)
## Observations: 187,126
## Fixed-effects: p_date: 975
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value
## factor(fans_piar_cat)Small vs Big 1.504307 0.036000 41.786864
## factor(fans_piar_cat)Big vs Small 1.798410 0.037754 47.634457
## factor(fans_piar_cat)Big vs Big 2.468558 0.035790 68.974328
## is_cooperative 0.114965 0.009531 12.062736
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.325324 0.055448 5.867161
## factor(fans_piar_cat)Big vs Small:is_cooperative 0.020468 0.054788 0.373589
## factor(fans_piar_cat)Big vs Big:is_cooperative -0.247271 0.048455 -5.103118
## Pr(>|t|)
## factor(fans_piar_cat)Small vs Big < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big < 2.2e-16 ***
## is_cooperative < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big:is_cooperative 4.4421e-09 ***
## factor(fans_piar_cat)Big vs Small:is_cooperative 7.0871e-01
## factor(fans_piar_cat)Big vs Big:is_cooperative 3.3449e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 1.93209 Adj. R2: 0.157868
## Within R2: 0.147539
# 画图
model <- feols(log(total_cost_amt+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error
## (Intercept) 0.788169 0.005481
## factor(fans_piar_cat)Small vs Big 1.532570 0.036028
## factor(fans_piar_cat)Big vs Small 1.828888 0.037758
## factor(fans_piar_cat)Big vs Big 2.507547 0.035668
## is_cooperative 0.125272 0.009482
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.316799 0.055539
## factor(fans_piar_cat)Big vs Small:is_cooperative 0.007394 0.054806
## factor(fans_piar_cat)Big vs Big:is_cooperative -0.264253 0.048443
## t value Pr(>|t|)
## (Intercept) 143.812046 < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big 42.537712 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small 48.437078 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big 70.301835 < 2.2e-16 ***
## is_cooperative 13.211182 < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big:is_cooperative 5.704068 1.1719e-08 ***
## factor(fans_piar_cat)Big vs Small:is_cooperative 0.134918 8.9268e-01
## factor(fans_piar_cat)Big vs Big:is_cooperative -5.454943 4.9062e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 1.94299 Adj. R2: 0.15277
plot_predictions_with_ci(model, data)
## `summarise()` has grouped output by 'fans_piar_cat'. You can override using the
## `.groups` argument.
### (3) plot fans
# 画图
model <- feols(log(total_cost_amt+1) ~ factor(fans_piar_cat), data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.830345 0.004741 175.1391 < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big 1.658532 0.030948 53.5910 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small 1.836686 0.032876 55.8674 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big 2.407051 0.029675 81.1137 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 1.94457 Adj. R2: 0.151415
plot_predictions_fans(model, data)
# 画图
model <- feols(log(total_cost_amt+1) ~ is_cooperative, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.159620 0.006686 173.4396 < 2.2e-16 ***
## is_cooperative 0.210532 0.010738 19.6065 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 2.10857 Adj. R2: 0.002253
plot_predictions_cate(model, data)
# 使用例子
# Define the formula for the model
model_formula <- log(total_cost_amt + 1) ~ factor(fans_piar_cat_temp)
# Call the function with the data and formula
plot_continue_cutoff(data, model_formula)
library(ggplot2)
library(dplyr)
library(RColorBrewer)
# Step 1: Set the ranges for the fan counts (fewer, broader bins)
data <- data %>% mutate(fans_new_range = case_when(
before_fans_count > 0 & before_fans_count <= 1000 ~ "0-1k",
before_fans_count > 1000 & before_fans_count <= 10000 ~ "1k-10k",
before_fans_count > 10000 & before_fans_count <= 100000 ~ "10k-100k",
before_fans_count > 100000 & before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">1M"
))
data <- data %>% mutate(other_fans_new_range = case_when(
other_before_fans_count > 0 & other_before_fans_count <= 1000 ~ "0-1k",
other_before_fans_count > 1000 & other_before_fans_count <= 10000 ~ "1k-10k",
other_before_fans_count > 10000 & other_before_fans_count <= 100000 ~ "10k-100k",
other_before_fans_count > 100000 & other_before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">1M"
))
# Step 2: Set the correct order for the categories
data <- data %>%
mutate(fans_new_range = factor(fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")),
other_fans_new_range = factor(other_fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")))
# Step 3: Aggregate the data to calculate the mean and confidence intervals, handling 0% cases
agg_data <- data %>%
group_by(fans_new_range, other_fans_new_range) %>%
summarize(
count = n(), # 计算每个组合的数量
Y = ifelse(count > 0, mean(log(total_cost_amt + 1), na.rm = TRUE), NA), # 如果有数据,计算均值;否则设为NA
ci_lower = ifelse(count > 0, Y - qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA), # 如果有数据,计算CI;否则设为NA
ci_upper = ifelse(count > 0, Y + qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA) # 如果有数据,计算CI;否则设为NA
) %>%
ungroup() %>%
mutate(percentage = count / sum(count) * 100)
## `summarise()` has grouped output by 'fans_new_range'. You can override using
## the `.groups` argument.
# Step 4: Plot the data in a heatmap-like format with custom colors and percentage labels
ggplot(agg_data, aes(x = fans_new_range, y = other_fans_new_range, fill = Y)) +
geom_tile() +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
geom_text(aes(label = paste0(round(percentage, 2), "%")), color = "black", size = 4) +
scale_fill_gradientn(colors = c("yellow", "orange", "darkorange", "red", "darkred"),
values = scales::rescale(c(0, 0.05, 0.1, 0.15, 0.2)),
#limits = c(0, 0.2),
na.value = "white") +
labs(title = "Average Fan Total Cost Amount by Fan Ranges",
x = "Fans Range",
y = "Other Fans Range",
fill = "Avg Fan Total Cost Amt") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
summary(data$net_follow_fans)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -254.000 0.000 0.000 0.958 0.000 5064.000
model <- feols(log(net_follow_fans+255) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative |p_date, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(net_follow_fans + 255)
## Observations: 187,126
## Fixed-effects: p_date: 975
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value
## factor(fans_piar_cat)Small vs Big 0.003578 0.000537 6.669335
## factor(fans_piar_cat)Big vs Small 0.005307 0.000626 8.482533
## factor(fans_piar_cat)Big vs Big 0.009477 0.001101 8.606747
## is_cooperative -0.000371 0.000088 -4.197884
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.000141 0.000813 0.172847
## factor(fans_piar_cat)Big vs Small:is_cooperative -0.001609 0.000883 -1.823052
## factor(fans_piar_cat)Big vs Big:is_cooperative 0.003085 0.002082 1.481933
## Pr(>|t|)
## factor(fans_piar_cat)Small vs Big 2.5781e-11 ***
## factor(fans_piar_cat)Big vs Small < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big < 2.2e-16 ***
## is_cooperative 2.6957e-05 ***
## factor(fans_piar_cat)Small vs Big:is_cooperative 8.6277e-01
## factor(fans_piar_cat)Big vs Small:is_cooperative 6.8297e-02 .
## factor(fans_piar_cat)Big vs Big:is_cooperative 1.3836e-01
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.040283 Adj. R2: 0.006926
## Within R2: 0.005989
# 画图
model <- feols(log(net_follow_fans+255) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(net_follow_fans + 255)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error
## (Intercept) 5.542571 0.000054
## factor(fans_piar_cat)Small vs Big 0.003604 0.000537
## factor(fans_piar_cat)Big vs Small 0.005347 0.000626
## factor(fans_piar_cat)Big vs Big 0.009591 0.001100
## is_cooperative -0.000292 0.000082
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.000137 0.000817
## factor(fans_piar_cat)Big vs Small:is_cooperative -0.001633 0.000883
## factor(fans_piar_cat)Big vs Big:is_cooperative 0.003079 0.002066
## t value Pr(>|t|)
## (Intercept) 102086.907310 < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big 6.706917 1.9945e-11 ***
## factor(fans_piar_cat)Big vs Small 8.536405 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big 8.719109 < 2.2e-16 ***
## is_cooperative -3.562455 3.6751e-04 ***
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.167652 8.6686e-01
## factor(fans_piar_cat)Big vs Small:is_cooperative -1.850558 6.4235e-02 .
## factor(fans_piar_cat)Big vs Big:is_cooperative 1.490215 1.3617e-01
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.040404 Adj. R2: 0.006163
plot_predictions_with_ci(model, data)
## `summarise()` has grouped output by 'fans_piar_cat'. You can override using the
## `.groups` argument.
### (3) plot fans
# 画图
model <- feols(log(net_follow_fans+255) ~ factor(fans_piar_cat), data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(net_follow_fans + 255)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.542472 0.000042 133134.04211 < 2.2e-16
## factor(fans_piar_cat)Small vs Big 0.003643 0.000409 8.90530 < 2.2e-16
## factor(fans_piar_cat)Big vs Small 0.004720 0.000459 10.28949 < 2.2e-16
## factor(fans_piar_cat)Big vs Big 0.010859 0.000980 11.07953 < 2.2e-16
##
## (Intercept) ***
## factor(fans_piar_cat)Small vs Big ***
## factor(fans_piar_cat)Big vs Small ***
## factor(fans_piar_cat)Big vs Big ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.040407 Adj. R2: 0.006048
plot_predictions_fans(model, data)
# 画图
model <- feols(log(net_follow_fans+255) ~ is_cooperative, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(net_follow_fans + 255)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.543776 0.000104 53436.9340 < 2.2e-16 ***
## is_cooperative 0.000244 0.000217 1.1219 0.26191
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.04053 Adj. R2: 2.859e-6
plot_predictions_cate(model, data)
# Define the formula for the model
model_formula <- log(net_follow_fans+255) ~ factor(fans_piar_cat_temp)
# Call the function with the data and formula
plot_continue_cutoff(data, model_formula)
library(ggplot2)
library(dplyr)
library(RColorBrewer)
# Step 1: Set the ranges for the fan counts (fewer, broader bins)
data <- data %>% mutate(fans_new_range = case_when(
before_fans_count > 0 & before_fans_count <= 1000 ~ "0-1k",
before_fans_count > 1000 & before_fans_count <= 10000 ~ "1k-10k",
before_fans_count > 10000 & before_fans_count <= 100000 ~ "10k-100k",
before_fans_count > 100000 & before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">1M"
))
data <- data %>% mutate(other_fans_new_range = case_when(
other_before_fans_count > 0 & other_before_fans_count <= 1000 ~ "0-1k",
other_before_fans_count > 1000 & other_before_fans_count <= 10000 ~ "1k-10k",
other_before_fans_count > 10000 & other_before_fans_count <= 100000 ~ "10k-100k",
other_before_fans_count > 100000 & other_before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">1M"
))
# Step 2: Set the correct order for the categories
data <- data %>%
mutate(fans_new_range = factor(fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")),
other_fans_new_range = factor(other_fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")))
# Step 3: Aggregate the data to calculate the mean and confidence intervals, handling 0% cases
agg_data <- data %>%
group_by(fans_new_range, other_fans_new_range) %>%
summarize(
count = n(), # 计算每个组合的数量
Y = ifelse(count > 0, mean(log(net_follow_fans +605), na.rm = TRUE), NA), # 如果有数据,计算均值;否则设为NA
ci_lower = ifelse(count > 0, Y - qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA), # 如果有数据,计算CI;否则设为NA
ci_upper = ifelse(count > 0, Y + qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA) # 如果有数据,计算CI;否则设为NA
) %>%
ungroup() %>%
mutate(percentage = count / sum(count) * 100)
## `summarise()` has grouped output by 'fans_new_range'. You can override using
## the `.groups` argument.
# Step 4: Plot the data in a heatmap-like format with custom colors and percentage labels
ggplot(agg_data, aes(x = fans_new_range, y = other_fans_new_range, fill = Y)) +
geom_tile() +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
geom_text(aes(label = paste0(round(percentage, 2), "%")), color = "black", size = 4) +
scale_fill_gradientn(colors = c("yellow", "orange", "darkorange", "red", "darkred"),
values = scales::rescale(c(0, 0.05, 0.1, 0.15, 0.2)),
#limits = c(0, 0.2),
na.value = "white") +
labs(title = "Average Fan Total Cost Amount by Fan Ranges",
x = "Fans Range",
y = "Other Fans Range",
fill = "follow_author_fans_count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
model <- feols(log(already_follow_other_fans_count+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative |p_date, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(already_follow_other_fans_count + 1)
## Observations: 187,126
## Fixed-effects: p_date: 975
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value
## factor(fans_piar_cat)Small vs Big 0.008643 0.001337 6.465395
## factor(fans_piar_cat)Big vs Small 0.004347 0.000938 4.634133
## factor(fans_piar_cat)Big vs Big 0.030643 0.002152 14.239919
## is_cooperative 0.000387 0.000282 1.369041
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.007826 0.002452 3.192134
## factor(fans_piar_cat)Big vs Small:is_cooperative 0.001300 0.001701 0.764027
## factor(fans_piar_cat)Big vs Big:is_cooperative 0.018595 0.003871 4.803729
## Pr(>|t|)
## factor(fans_piar_cat)Small vs Big 1.0133e-10 ***
## factor(fans_piar_cat)Big vs Small 3.5872e-06 ***
## factor(fans_piar_cat)Big vs Big < 2.2e-16 ***
## is_cooperative 1.7099e-01
## factor(fans_piar_cat)Small vs Big:is_cooperative 1.4125e-03 **
## factor(fans_piar_cat)Big vs Small:is_cooperative 4.4485e-01
## factor(fans_piar_cat)Big vs Big:is_cooperative 1.5588e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.085446 Adj. R2: 0.017635
## Within R2: 0.016367
# 画图
model <- feols(log(already_follow_other_fans_count+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(already_follow_other_fans_count + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value
## (Intercept) 0.002803 0.000149 18.826269
## factor(fans_piar_cat)Small vs Big 0.008981 0.001339 6.706339
## factor(fans_piar_cat)Big vs Small 0.004662 0.000939 4.965460
## factor(fans_piar_cat)Big vs Big 0.031277 0.002148 14.558698
## is_cooperative 0.000647 0.000276 2.343900
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.007509 0.002457 3.055939
## factor(fans_piar_cat)Big vs Small:is_cooperative 0.001111 0.001696 0.655184
## factor(fans_piar_cat)Big vs Big:is_cooperative 0.018284 0.003868 4.726862
## Pr(>|t|)
## (Intercept) < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big 2.0024e-11 ***
## factor(fans_piar_cat)Big vs Small 6.8610e-07 ***
## factor(fans_piar_cat)Big vs Big < 2.2e-16 ***
## is_cooperative 1.9085e-02 *
## factor(fans_piar_cat)Small vs Big:is_cooperative 2.2439e-03 **
## factor(fans_piar_cat)Big vs Small:is_cooperative 5.1235e-01
## factor(fans_piar_cat)Big vs Big:is_cooperative 2.2821e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.085698 Adj. R2: 0.016966
plot_predictions_with_ci(model, data)
## `summarise()` has grouped output by 'fans_piar_cat'. You can override using the
## `.groups` argument.
### (3) plot fans
# 画图
model <- feols(log(already_follow_other_fans_count+1) ~ factor(fans_piar_cat), data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(already_follow_other_fans_count + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.003020 0.000126 23.97696 < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big 0.011865 0.001147 10.34883 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small 0.005107 0.000800 6.38546 1.7136e-10 ***
## factor(fans_piar_cat)Big vs Big 0.039004 0.001861 20.96187 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.085747 Adj. R2: 0.015874
plot_predictions_fans(model, data)
# 画图
model <- feols(log(already_follow_other_fans_count+1) ~ is_cooperative, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(already_follow_other_fans_count + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.005881 0.000220 26.67917 < 2.2e-16 ***
## is_cooperative 0.003957 0.000462 8.55892 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.086415 Adj. R2: 4.706e-4
plot_predictions_cate(model, data)
# Define the formula for the model
model_formula <- log(already_follow_other_fans_count+1) ~ factor(fans_piar_cat_temp)
# Call the function with the data and formula
plot_continue_cutoff(data, model_formula)
library(ggplot2)
library(dplyr)
library(RColorBrewer)
# Step 1: Set the ranges for the fan counts (fewer, broader bins)
data <- data %>% mutate(fans_new_range = case_when(
before_fans_count > 0 & before_fans_count <= 1000 ~ "0-1k",
before_fans_count > 1000 & before_fans_count <= 10000 ~ "1k-10k",
before_fans_count > 10000 & before_fans_count <= 100000 ~ "10k-100k",
before_fans_count > 100000 & before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">1M"
))
data <- data %>% mutate(other_fans_new_range = case_when(
other_before_fans_count > 0 & other_before_fans_count <= 1000 ~ "0-1k",
other_before_fans_count > 1000 & other_before_fans_count <= 10000 ~ "1k-10k",
other_before_fans_count > 10000 & other_before_fans_count <= 100000 ~ "10k-100k",
other_before_fans_count > 100000 & other_before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">1M"
))
# Step 2: Set the correct order for the categories
data <- data %>%
mutate(fans_new_range = factor(fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")),
other_fans_new_range = factor(other_fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")))
# Step 3: Aggregate the data to calculate the mean and confidence intervals, handling 0% cases
agg_data <- data %>%
group_by(fans_new_range, other_fans_new_range) %>%
summarize(
count = n(), # 计算每个组合的数量
Y = ifelse(count > 0, mean(log(already_follow_other_fans_count + 1), na.rm = TRUE), NA), # 如果有数据,计算均值;否则设为NA
ci_lower = ifelse(count > 0, Y - qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA), # 如果有数据,计算CI;否则设为NA
ci_upper = ifelse(count > 0, Y + qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA) # 如果有数据,计算CI;否则设为NA
) %>%
ungroup() %>%
mutate(percentage = count / sum(count) * 100)
## `summarise()` has grouped output by 'fans_new_range'. You can override using
## the `.groups` argument.
# Step 4: Plot the data in a heatmap-like format with custom colors and percentage labels
ggplot(agg_data, aes(x = fans_new_range, y = other_fans_new_range, fill = Y)) +
geom_tile() +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
geom_text(aes(label = paste0(round(percentage, 2), "%")), color = "black", size = 4) +
scale_fill_gradientn(colors = c("yellow", "orange", "darkorange", "red", "darkred"),
values = scales::rescale(c(0, 0.05, 0.1, 0.15, 0.2)),
#limits = c(0, 0.2),
na.value = "white") +
labs(title = "Average Fan Total Cost Amount by Fan Ranges",
x = "Fans Range",
y = "Other Fans Range",
fill = "follow_author_fans_count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
model <- feols(log(other_already_follow_other_fans_count+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative |p_date, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(other_already_follow_other_fans_count + 1)
## Observations: 187,126
## Fixed-effects: p_date: 975
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value
## factor(fans_piar_cat)Small vs Big 0.005874 0.000985 5.961712
## factor(fans_piar_cat)Big vs Small 0.010257 0.001370 7.487383
## factor(fans_piar_cat)Big vs Big 0.033760 0.002334 14.462731
## is_cooperative 0.001027 0.000270 3.805236
## factor(fans_piar_cat)Small vs Big:is_cooperative -0.000021 0.001726 -0.012270
## factor(fans_piar_cat)Big vs Small:is_cooperative 0.001833 0.002306 0.795209
## factor(fans_piar_cat)Big vs Big:is_cooperative 0.016188 0.003944 4.104578
## Pr(>|t|)
## factor(fans_piar_cat)Small vs Big 2.5014e-09 ***
## factor(fans_piar_cat)Big vs Small 7.0625e-14 ***
## factor(fans_piar_cat)Big vs Big < 2.2e-16 ***
## is_cooperative 1.4172e-04 ***
## factor(fans_piar_cat)Small vs Big:is_cooperative 9.9021e-01
## factor(fans_piar_cat)Big vs Small:is_cooperative 4.2649e-01
## factor(fans_piar_cat)Big vs Big:is_cooperative 4.0526e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.085629 Adj. R2: 0.019143
## Within R2: 0.017624
# 画图
model <- feols(log(other_already_follow_other_fans_count+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(other_already_follow_other_fans_count + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value
## (Intercept) 0.002323 0.000129 17.963406
## factor(fans_piar_cat)Small vs Big 0.005917 0.000978 6.051027
## factor(fans_piar_cat)Big vs Small 0.010492 0.001377 7.619616
## factor(fans_piar_cat)Big vs Big 0.034319 0.002341 14.660571
## is_cooperative 0.001340 0.000264 5.065940
## factor(fans_piar_cat)Small vs Big:is_cooperative -0.000100 0.001719 -0.057932
## factor(fans_piar_cat)Big vs Small:is_cooperative 0.001678 0.002301 0.728960
## factor(fans_piar_cat)Big vs Big:is_cooperative 0.015825 0.003945 4.011588
## Pr(>|t|)
## (Intercept) < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big 1.4425e-09 ***
## factor(fans_piar_cat)Big vs Small 2.5585e-14 ***
## factor(fans_piar_cat)Big vs Big < 2.2e-16 ***
## is_cooperative 4.0685e-07 ***
## factor(fans_piar_cat)Small vs Big:is_cooperative 9.5380e-01
## factor(fans_piar_cat)Big vs Small:is_cooperative 4.6603e-01
## factor(fans_piar_cat)Big vs Big:is_cooperative 6.0340e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.085894 Adj. R2: 0.018192
plot_predictions_with_ci(model, data)
## `summarise()` has grouped output by 'fans_piar_cat'. You can override using the
## `.groups` argument.
### (3) plot fans
# 画图
model <- feols(log(other_already_follow_other_fans_count+1) ~ factor(fans_piar_cat), data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(other_already_follow_other_fans_count + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.002774 0.000117 23.80911 < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big 0.005938 0.000819 7.24866 4.2284e-13 ***
## factor(fans_piar_cat)Big vs Small 0.011177 0.001109 10.08177 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big 0.041071 0.001938 21.18932 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.085932 Adj. R2: 0.017354
plot_predictions_fans(model, data)
# 画图
model <- feols(log(other_already_follow_other_fans_count+1) ~ is_cooperative, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(other_already_follow_other_fans_count + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.005777 0.000227 25.49223 < 2.2e-16 ***
## is_cooperative 0.004050 0.000457 8.86859 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.086667 Adj. R2: 4.901e-4
plot_predictions_cate(model, data)
### (5) plot continue cutoff
# Define the formula for the model
model_formula <- log(other_already_follow_other_fans_count+1) ~ factor(fans_piar_cat_temp)
# Call the function with the data and formula
plot_continue_cutoff(data, model_formula)
### (6) plot_fan_pair_heatmap
library(ggplot2)
library(dplyr)
library(RColorBrewer)
# Step 1: Set the ranges for the fan counts (fewer, broader bins)
data <- data %>% mutate(fans_new_range = case_when(
before_fans_count > 0 & before_fans_count <= 1000 ~ "0-1k",
before_fans_count > 1000 & before_fans_count <= 10000 ~ "1k-10k",
before_fans_count > 10000 & before_fans_count <= 100000 ~ "10k-100k",
before_fans_count > 100000 & before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">1M"
))
data <- data %>% mutate(other_fans_new_range = case_when(
other_before_fans_count > 0 & other_before_fans_count <= 1000 ~ "0-1k",
other_before_fans_count > 1000 & other_before_fans_count <= 10000 ~ "1k-10k",
other_before_fans_count > 10000 & other_before_fans_count <= 100000 ~ "10k-100k",
other_before_fans_count > 100000 & other_before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">1M"
))
# Step 2: Set the correct order for the categories
data <- data %>%
mutate(fans_new_range = factor(fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")),
other_fans_new_range = factor(other_fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")))
# Step 3: Aggregate the data to calculate the mean and confidence intervals, handling 0% cases
agg_data <- data %>%
group_by(fans_new_range, other_fans_new_range) %>%
summarize(
count = n(), # 计算每个组合的数量
Y = ifelse(count > 0, mean(log(other_already_follow_other_fans_count + 1), na.rm = TRUE), NA), # 如果有数据,计算均值;否则设为NA
ci_lower = ifelse(count > 0, Y - qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA), # 如果有数据,计算CI;否则设为NA
ci_upper = ifelse(count > 0, Y + qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA) # 如果有数据,计算CI;否则设为NA
) %>%
ungroup() %>%
mutate(percentage = count / sum(count) * 100)
## `summarise()` has grouped output by 'fans_new_range'. You can override using
## the `.groups` argument.
# Step 4: Plot the data in a heatmap-like format with custom colors and percentage labels
ggplot(agg_data, aes(x = fans_new_range, y = other_fans_new_range, fill = Y)) +
geom_tile() +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
geom_text(aes(label = paste0(round(percentage, 2), "%")), color = "black", size = 4) +
scale_fill_gradientn(colors = c("yellow", "orange", "darkorange", "red", "darkred"),
values = scales::rescale(c(0, 0.05, 0.1, 0.15, 0.2)),
#limits = c(0, 0.2),
na.value = "white") +
labs(title = "other_already_follow_other_fans_count by Fan Ranges",
x = "Fans Range",
y = "Other Fans Range",
fill = "follow_author_fans_count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
summary(data$net_attract_fans)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.25e+02 0.00e+00 0.00e+00 -1.27e-03 0.00e+00 8.60e+01
min(data$net_attract_fans)
## [1] -125
model <- feols(log(net_attract_fans+126) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative |p_date, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(net_attract_fans + 126)
## Observations: 187,126
## Fixed-effects: p_date: 975
## Standard-errors: Clustered (author_id)
## Estimate Std. Error
## factor(fans_piar_cat)Small vs Big 0.00007776 0.000037
## factor(fans_piar_cat)Big vs Small -0.00009881 0.000042
## factor(fans_piar_cat)Big vs Big -0.00092973 0.000577
## is_cooperative 0.00000257 0.000015
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.00011183 0.000068
## factor(fans_piar_cat)Big vs Small:is_cooperative 0.00002522 0.000053
## factor(fans_piar_cat)Big vs Big:is_cooperative 0.00089796 0.000595
## t value Pr(>|t|)
## factor(fans_piar_cat)Small vs Big 2.093658 0.036292 *
## factor(fans_piar_cat)Big vs Small -2.377336 0.017439 *
## factor(fans_piar_cat)Big vs Big -1.609994 0.107401
## is_cooperative 0.166904 0.867446
## factor(fans_piar_cat)Small vs Big:is_cooperative 1.643641 0.100252
## factor(fans_piar_cat)Big vs Small:is_cooperative 0.474084 0.635441
## factor(fans_piar_cat)Big vs Big:is_cooperative 1.508435 0.131445
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.012331 Adj. R2: 2.593e-4
## Within R2: 2.7e-4
# 画图
model <- feols(log(net_attract_fans+126) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(net_attract_fans + 126)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error
## (Intercept) 4.83628931 0.00000260
## factor(fans_piar_cat)Small vs Big 0.00007636 0.00003528
## factor(fans_piar_cat)Big vs Small -0.00011840 0.00003481
## factor(fans_piar_cat)Big vs Big -0.00092383 0.00057468
## is_cooperative -0.00000891 0.00000478
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.00006706 0.00005427
## factor(fans_piar_cat)Big vs Small:is_cooperative 0.00003565 0.00004492
## factor(fans_piar_cat)Big vs Big:is_cooperative 0.00089459 0.00059835
## t value Pr(>|t|)
## (Intercept) 1.858124e+06 < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big 2.164517e+00 0.03042622 *
## factor(fans_piar_cat)Big vs Small -3.401091e+00 0.00067134 ***
## factor(fans_piar_cat)Big vs Big -1.607573e+00 0.10793065
## is_cooperative -1.863118e+00 0.06244751 .
## factor(fans_piar_cat)Small vs Big:is_cooperative 1.235646e+00 0.21659207
## factor(fans_piar_cat)Big vs Small:is_cooperative 7.936100e-01 0.42742342
## factor(fans_piar_cat)Big vs Big:is_cooperative 1.495095e+00 0.13489172
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.012363 Adj. R2: 2.263e-4
plot_predictions_with_ci(model, data)
## `summarise()` has grouped output by 'fans_piar_cat'. You can override using the
## `.groups` argument.
# 画图
model <- feols(log(net_attract_fans+126) ~ factor(fans_piar_cat), data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(net_attract_fans + 126)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.836286 0.00000219 2204159.42803 < 2.2e-16
## factor(fans_piar_cat)Small vs Big 0.000101 0.00002677 3.79109 1.5004e-04
## factor(fans_piar_cat)Big vs Small -0.000105 0.00002422 -4.34848 1.3717e-05
## factor(fans_piar_cat)Big vs Big -0.000549 0.00034081 -1.61142 1.0709e-01
##
## (Intercept) ***
## factor(fans_piar_cat)Small vs Big ***
## factor(fans_piar_cat)Big vs Small ***
## factor(fans_piar_cat)Big vs Big
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.012364 Adj. R2: 1.429e-4
plot_predictions_fans(model, data)
# 画图
model <- feols(log(net_attract_fans+126) ~ is_cooperative, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(net_attract_fans + 126)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.836218 4.3e-05 112793.7598 < 2.2e-16 ***
## is_cooperative 0.000063 4.6e-05 1.3645 0.17241
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.012365 Adj. R2: 5.674e-7
plot_predictions_cate(model, data)
### (5) plot continue cutoff
# Define the formula for the model
model_formula <- log(net_attract_fans+126) ~ factor(fans_piar_cat_temp)
# Call the function with the data and formula
plot_continue_cutoff(data, model_formula)
library(ggplot2)
library(dplyr)
library(RColorBrewer)
# Step 1: Set the ranges for the fan counts (fewer, broader bins)
data <- data %>% mutate(fans_new_range = case_when(
before_fans_count > 0 & before_fans_count <= 1000 ~ "0-1k",
before_fans_count > 1000 & before_fans_count <= 10000 ~ "1k-10k",
before_fans_count > 10000 & before_fans_count <= 100000 ~ "10k-100k",
before_fans_count > 100000 & before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">1M"
))
data <- data %>% mutate(other_fans_new_range = case_when(
other_before_fans_count > 0 & other_before_fans_count <= 1000 ~ "0-1k",
other_before_fans_count > 1000 & other_before_fans_count <= 10000 ~ "1k-10k",
other_before_fans_count > 10000 & other_before_fans_count <= 100000 ~ "10k-100k",
other_before_fans_count > 100000 & other_before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">1M"
))
# Step 2: Set the correct order for the categories
data <- data %>%
mutate(fans_new_range = factor(fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")),
other_fans_new_range = factor(other_fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")))
# Step 3: Aggregate the data to calculate the mean and confidence intervals, handling 0% cases
agg_data <- data %>%
group_by(fans_new_range, other_fans_new_range) %>%
summarize(
count = n(), # 计算每个组合的数量
Y = ifelse(count > 0, mean(log(net_attract_fans+126), na.rm = TRUE), NA), # 如果有数据,计算均值;否则设为NA
ci_lower = ifelse(count > 0, Y - qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA), # 如果有数据,计算CI;否则设为NA
ci_upper = ifelse(count > 0, Y + qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA) # 如果有数据,计算CI;否则设为NA
) %>%
ungroup() %>%
mutate(percentage = count / sum(count) * 100)
## `summarise()` has grouped output by 'fans_new_range'. You can override using
## the `.groups` argument.
# Step 4: Plot the data in a heatmap-like format with custom colors and percentage labels
ggplot(agg_data, aes(x = fans_new_range, y = other_fans_new_range, fill = Y)) +
geom_tile() +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
geom_text(aes(label = paste0(round(percentage, 2), "%")), color = "black", size = 4) +
scale_fill_gradientn(colors = c("yellow", "orange", "darkorange", "red", "darkred"),
values = scales::rescale(c(0, 0.05, 0.1, 0.15, 0.2)),
#limits = c(0, 0.2),
na.value = "white") +
labs(title = "other_already_follow_other_fans_count by Fan Ranges",
x = "Fans Range",
y = "Other Fans Range",
fill = "follow_author_fans_count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))