setwd("C:/Users/ASUS/Desktop/快手")
数据:2022.01.01–2024.09.01 全部的random PK pair
data = read.csv("Within_Random_Pk_final_clean_1.csv")
library(lubridate)
##
## 载入程序包:'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
data <- data %>% mutate(p_date= as.Date(as.character(p_date),format = "%Y%m%d"),year = year(p_date), month = month(p_date), day = day(p_date))
table(data$year)
##
## 2022 2023 2024
## 70774 62762 53590
### fan-size
data = data %>%mutate(fans_piar_cat = case_when(
(before_fans_count>=10000) & (other_before_fans_count>=10000)~ 3,
(before_fans_count>=10000) & (other_before_fans_count<10000)~ 2,
(before_fans_count<10000) & (other_before_fans_count>= 10000)~ 1,
(before_fans_count<10000) & (other_before_fans_count<10000)~ 0
))
data <- data %>%
mutate(fans_piar_cat = factor(fans_piar_cat, level = c(0,1,2,3), labels = c("Small vs Small ", "Small vs Big", "Big vs Small", "Big vs Big")))
### 合作
# (1) 按照直播类型来划分:同类:1
data <- data %>%
mutate(is_cooperative = case_when(
(live_operation_tag == other_live_operation_tag) ~ 1,
TRUE ~ 0
))
table(data$fans_piar_cat)
##
## Small vs Small Small vs Big Big vs Small Big vs Big
## 149898 10906 10701 15621
table(data$is_cooperative)
##
## 0 1
## 121925 65201
# table(data$total_cost_amt) # 119056 为0
# (1) 人均粉丝打赏
data$avg_fan_total_cost_amt= (data$total_cost_amt)/(data$before_fans_count + 1)
# (2) 涨粉
# data$follow_author_fans_count
# (3) 掉粉
# data$unfollow_author_fans_count
# (4) net 涨粉
data$net_follow_fans= data$follow_author_fans_count - data$unfollow_author_fans_count
summary(data$net_follow_fans)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -254.000 0.000 0.000 0.958 0.000 5064.000
# (5) 吸粉
# data$already_follow_other_fans_count
# (6) 粉丝被对方吸走
# data$other_already_follow_other_fans_count
# (7) net吸粉
data$net_attract_fans= data$already_follow_other_fans_count - data$other_already_follow_other_fans_count
summary(data$net_attract_fans)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.25e+02 0.00e+00 0.00e+00 -1.27e-03 0.00e+00 8.60e+01
library(ggplot2)
library(dplyr)
# 创建函数
plot_predictions_with_ci <- function(model_1, data) {
# 预测值和标准误差
predictions <- predict(model_1, newdata = data, se.fit = TRUE)
# 将预测值和标准误差添加到数据框中
data$predicted_y <- predictions$fit
data$se_fit <- predictions$se.fit
# 按照分组变量计算均值和95%的置信区间
summary_data <- data %>%
group_by(fans_piar_cat, is_cooperative) %>%
summarise(
mean_predicted_y = mean(predicted_y),
ci_lower = mean(predicted_y) - 1.96 * mean(se_fit),
ci_upper = mean(predicted_y) + 1.96 * mean(se_fit)
)
# 绘制图表
plot <- ggplot(summary_data, aes(x = factor(fans_piar_cat), y = mean_predicted_y, color = factor(is_cooperative))) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
labs(
title = "Mean Predicted y Values with Confidence Intervals",
x = "Pair Categories",
y = "Mean Predicted y",
color = "Is Same Category"
) +
theme_minimal()
# 返回图表
return(plot)
}
library(ggplot2)
library(dplyr)
# 创建函数
plot_predictions_fans <- function(model_1, data) {
# 预测值和标准误差
predictions <- predict(model_1, newdata = data, se.fit = TRUE)
# 将预测值和标准误差添加到数据框中
data$predicted_y <- predictions$fit
data$se_fit <- predictions$se.fit
# 按照分组变量计算均值和95%的置信区间
summary_data <- data %>%
group_by(fans_piar_cat) %>%
summarise(
mean_predicted_y = mean(predicted_y),
ci_lower = mean(predicted_y) - 1.96 * mean(se_fit),
ci_upper = mean(predicted_y) + 1.96 * mean(se_fit)
)
# 绘制图表
plot <- ggplot(summary_data, aes(x = factor(fans_piar_cat), y = mean_predicted_y, )) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
labs(
title = "Mean Predicted y Values with Confidence Intervals",
x = "fans Pair",
y = "Mean Predicted y",
color = "Is Same Category"
) +
theme_minimal()
# 返回图表
return(plot)
}
library(ggplot2)
library(dplyr)
# 创建函数
plot_predictions_cate <- function(model_1, data) {
# 预测值和标准误差
predictions <- predict(model_1, newdata = data, se.fit = TRUE)
# 将预测值和标准误差添加到数据框中
data$predicted_y <- predictions$fit
data$se_fit <- predictions$se.fit
# 按照分组变量计算均值和95%的置信区间
summary_data <- data %>%
group_by(is_cooperative) %>%
summarise(
mean_predicted_y = mean(predicted_y),
ci_lower = mean(predicted_y) - 1.96 * mean(se_fit),
ci_upper = mean(predicted_y) + 1.96 * mean(se_fit)
)
# 绘制图表
plot <- ggplot(summary_data, aes(x = factor(is_cooperative), y = mean_predicted_y )) +
geom_point(size = 3) +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
labs(
title = "Mean Predicted y Values with Confidence Intervals",
x = "Categories",
y = "Mean Predicted y",
color = "Is Same Category"
) +
theme_minimal()
# 返回图表
return(plot)
}
library(dplyr)
library(ggplot2)
library(scales) # for comma formatting
##
## 载入程序包:'scales'
## The following object is masked from 'package:fixest':
##
## pvalue
library(tidyr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ purrr 1.0.2 ✔ tibble 3.2.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ plm::between() masks dplyr::between()
## ✖ readr::col_factor() masks scales::col_factor()
## ✖ purrr::discard() masks scales::discard()
## ✖ dplyr::filter() masks stats::filter()
## ✖ plm::lag() masks dplyr::lag(), stats::lag()
## ✖ plm::lead() masks dplyr::lead()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# 定义函数,输入参数为模型公式
plot_continue_cutoff <- function(data, model_formula) {
# Step 1: Create a sequence of 100 cut-offs from 0.1k to 1000k, log-stepped
cutoffs <- exp(seq(log(100), log(1e6), length.out = 100))
# Create a matrix to store coefficients of factor(fans_piar_cat)
coefficients_matrix <- matrix(NA, nrow = 100, ncol = 3)
# Create a matrix to store the counts of each category
pct_matrix <- matrix(NA, nrow = 100, ncol = 4) # For 4 categories
# Step 2: Loop over each cut-off, create fans_piar_cat, run the regression, and store the coefficients
for (i in seq_along(cutoffs)) {
cut_off <- cutoffs[i]
# Create the fans_piar_cat variable based on cut-off
data <- data %>%
mutate(fans_piar_cat_temp = case_when(
(before_fans_count < cut_off) & (other_before_fans_count < cut_off) ~ "Small vs Small",
(before_fans_count < cut_off) & (other_before_fans_count >= cut_off) ~ "Small vs Big",
(before_fans_count >= cut_off) & (other_before_fans_count < cut_off) ~ "Big vs Small",
(before_fans_count >= cut_off) & (other_before_fans_count >= cut_off) ~ "Big vs Big"
)) %>%
mutate(fans_piar_cat_temp = relevel(factor(fans_piar_cat_temp), ref = "Small vs Small"))
# 1.计算percentage
pct_summary <- data %>%
count(fans_piar_cat_temp) %>%
mutate(pct = n / sum(n) * 100)
pct_matrix[i, ] <- pct_summary %>%
complete(fans_piar_cat_temp = c("Big vs Big", "Small vs Big", "Big vs Small", "Small vs Small"), fill = list(pct = 0)) %>%
pull(pct)
# 2.计算每个model的系数
model <- feols(model_formula, data = data,vcov = ~author_id)
coef_summary <- summary(model)$coefficients
# print(coef_summary)
# Store the coefficients for the three categories in the matrix
coefficients_matrix[i, ] <- coef_summary[c("factor(fans_piar_cat_temp)Small vs Big",
"factor(fans_piar_cat_temp)Big vs Small",
"factor(fans_piar_cat_temp)Big vs Big")] # "(Intercept)")]
}
#print(coefficients_matrix)
## 1.画出四组percentage图
pct_df <- data.frame(
Cutoff = cutoffs,
Big_vs_Big = pct_matrix[, 1],
Big_vs_Small = pct_matrix[, 2],
Small_vs_Big = pct_matrix[, 3],
Small_vs_Small = pct_matrix[, 4]
)
pct_df_long <- pct_df %>%
pivot_longer(cols = c("Big_vs_Big", "Big_vs_Small","Small_vs_Big","Small_vs_Small"),
names_to = "Category", values_to = "Percentage")
# print(pct_df_long)
# 画图
pct_plot <- ggplot(pct_df_long, aes(x = Cutoff, y = Percentage, color = Category)) +
geom_line() +
scale_x_log10() + # Logarithmic scale for the cut-off
labs(title = "Distribution of fans_piar_cat across Cutoffs",
x = "Cutoff (log scale)", y = "Percentage",
color = "Category") +
theme_minimal()
# print(pct_plot)
## 2.画出model coef图
coef_df <- data.frame(
Cutoff = cutoffs,
Small_vs_Big = coefficients_matrix[, 1],
Big_vs_Small = coefficients_matrix[, 2],
Big_vs_Big = coefficients_matrix[, 3]
#Small_vs_Small = coefficients_matrix[, 4]
)
coef_df_long <- coef_df %>%
pivot_longer(cols = c("Small_vs_Big", "Big_vs_Small", "Big_vs_Big"),
names_to = "Category", values_to = "Coefficient")
## 画图
coef_plot <- ggplot(coef_df_long, aes(x = Cutoff, y = Coefficient, color = Category)) +
geom_line() +
scale_x_log10() + # Logarithmic scale for the cut-off
labs(title = "Evolution of Coefficients for fans_piar_cat across Cutoffs",
x = "Cutoff (log scale)", y = "Coefficient",
color = "Category") +
theme_minimal()
print(coef_plot)
}
model <- feols(log(avg_fan_total_cost_amt+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative |p_date, data = data,vcov = ~author_id)
summary(model)
## OLS estimation, Dep. Var.: log(avg_fan_total_cost_amt + 1)
## Observations: 187,126
## Fixed-effects: p_date: 975
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value
## factor(fans_piar_cat)Small vs Big 0.028320 0.002953 9.58924
## factor(fans_piar_cat)Big vs Small -0.024452 0.001229 -19.89522
## factor(fans_piar_cat)Big vs Big -0.025267 0.001082 -23.34982
## is_cooperative 0.009500 0.001349 7.04310
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.005994 0.005015 1.19526
## factor(fans_piar_cat)Big vs Small:is_cooperative -0.011356 0.001836 -6.18592
## factor(fans_piar_cat)Big vs Big:is_cooperative -0.007419 0.001793 -4.13852
## Pr(>|t|)
## factor(fans_piar_cat)Small vs Big < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big < 2.2e-16 ***
## is_cooperative 1.8878e-12 ***
## factor(fans_piar_cat)Small vs Big:is_cooperative 2.3199e-01
## factor(fans_piar_cat)Big vs Small:is_cooperative 6.1895e-10 ***
## factor(fans_piar_cat)Big vs Big:is_cooperative 3.4974e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.219571 Adj. R2: 0.005954
## Within R2: 0.003705
model <- feols(log(avg_fan_total_cost_amt+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative + factor(gender) +
factor(age_range) + factor(fre_country_region) + factor(fre_city_level)
# + factor(other_gender) + factor(other_age_range) + factor(other_fre_country_region) + factor(other_fre_city_level)
+ factor(year) + factor(month) + factor(day), data = data,vcov = ~author_id)
## The variable 'factor(age_range)UNKNOWN' has been removed because of collinearity (see $collin.var).
summary(model)
## OLS estimation, Dep. Var.: log(avg_fan_total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error
## (Intercept) 0.066203 0.019448
## factor(fans_piar_cat)Small vs Big 0.008367 0.002997
## factor(fans_piar_cat)Big vs Small -0.045004 0.001561
## factor(fans_piar_cat)Big vs Big -0.051660 0.001540
## is_cooperative 0.006563 0.001345
## factor(gender)M -0.039010 0.001180
## factor(gender)U 0.292412 0.133747
## factor(age_range)12-17 0.021377 0.007483
## factor(age_range)18-23 0.053525 0.007513
## factor(age_range)24-30 0.051666 0.007626
## factor(age_range)31-40 0.031801 0.007386
## factor(age_range)41-49 0.013767 0.007355
## factor(age_range)50+ 0.002807 0.007355
## factor(fre_country_region)北方 -0.041156 0.022279
## factor(fre_country_region)南方 -0.041903 0.022288
## factor(fre_city_level)二线城市 0.012897 0.013582
## factor(fre_city_level)三线城市 0.007232 0.013530
## factor(fre_city_level)四线城市 -0.000059 0.013504
## factor(fre_city_level)五线城市 -0.009105 0.013479
## factor(fre_city_level)新一线城市 0.021371 0.013664
## factor(fre_city_level)一线城市 0.017251 0.014061
## factor(year)2023 0.004493 0.001348
## factor(year)2024 0.013177 0.001380
## factor(month)2 -0.000255 0.001949
## factor(month)3 0.000738 0.001997
## factor(month)4 -0.001468 0.002026
## factor(month)5 0.001690 0.002125
## factor(month)6 0.006913 0.002445
## factor(month)7 0.007019 0.002401
## factor(month)8 0.006427 0.002310
## factor(month)9 0.013150 0.002822
## factor(month)10 0.009460 0.002653
## factor(month)11 0.012504 0.002873
## factor(month)12 0.012079 0.002815
## factor(day)2 -0.005636 0.003441
## factor(day)3 -0.003023 0.003757
## factor(day)4 0.000929 0.003923
## factor(day)5 -0.000941 0.003636
## factor(day)6 -0.000674 0.003713
## factor(day)7 0.001550 0.003956
## factor(day)8 0.002622 0.003864
## factor(day)9 -0.001116 0.003682
## factor(day)10 0.000415 0.003644
## factor(day)11 0.003487 0.003897
## factor(day)12 0.002749 0.003815
## factor(day)13 0.000264 0.003703
## factor(day)14 0.000428 0.003796
## factor(day)15 -0.001892 0.003747
## factor(day)16 -0.001897 0.003751
## factor(day)17 0.001724 0.003922
## factor(day)18 0.000130 0.004072
## factor(day)19 0.002061 0.003951
## factor(day)20 0.002545 0.003901
## factor(day)21 0.002128 0.003953
## factor(day)22 0.005388 0.003995
## factor(day)23 0.000344 0.003760
## factor(day)24 0.003004 0.004109
## factor(day)25 0.002873 0.004098
## factor(day)26 0.002366 0.004030
## factor(day)27 0.005108 0.003856
## factor(day)28 0.002550 0.003991
## factor(day)29 0.002064 0.003907
## factor(day)30 0.004922 0.004103
## factor(day)31 0.003176 0.004682
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.001254 0.004969
## factor(fans_piar_cat)Big vs Small:is_cooperative -0.014218 0.001841
## factor(fans_piar_cat)Big vs Big:is_cooperative -0.008283 0.001766
## t value Pr(>|t|)
## (Intercept) 3.404030 6.6416e-04 ***
## factor(fans_piar_cat)Small vs Big 2.791611 5.2453e-03 **
## factor(fans_piar_cat)Big vs Small -28.827757 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big -33.555334 < 2.2e-16 ***
## is_cooperative 4.878503 1.0700e-06 ***
## factor(gender)M -33.046585 < 2.2e-16 ***
## factor(gender)U 2.186309 2.8794e-02 *
## factor(age_range)12-17 2.856853 4.2792e-03 **
## factor(age_range)18-23 7.124053 1.0525e-12 ***
## factor(age_range)24-30 6.774568 1.2522e-11 ***
## factor(age_range)31-40 4.305790 1.6649e-05 ***
## factor(age_range)41-49 1.871933 6.1218e-02 .
## factor(age_range)50+ 0.381734 7.0266e-01
## factor(fre_country_region)北方 -1.847313 6.4704e-02 .
## factor(fre_country_region)南方 -1.880081 6.0099e-02 .
## factor(fre_city_level)二线城市 0.949569 3.4233e-01
## factor(fre_city_level)三线城市 0.534496 5.9300e-01
## factor(fre_city_level)四线城市 -0.004347 9.9653e-01
## factor(fre_city_level)五线城市 -0.675481 4.9937e-01
## factor(fre_city_level)新一线城市 1.564045 1.1781e-01
## factor(fre_city_level)一线城市 1.226821 2.1989e-01
## factor(year)2023 3.332984 8.5940e-04 ***
## factor(year)2024 9.546450 < 2.2e-16 ***
## factor(month)2 -0.130948 8.9582e-01
## factor(month)3 0.369642 7.1165e-01
## factor(month)4 -0.724914 4.6851e-01
## factor(month)5 0.795199 4.2650e-01
## factor(month)6 2.827305 4.6948e-03 **
## factor(month)7 2.923447 3.4623e-03 **
## factor(month)8 2.782137 5.4009e-03 **
## factor(month)9 4.660088 3.1633e-06 ***
## factor(month)10 3.565834 3.6281e-04 ***
## factor(month)11 4.352708 1.3455e-05 ***
## factor(month)12 4.291000 1.7798e-05 ***
## factor(day)2 -1.637769 1.0147e-01
## factor(day)3 -0.804820 4.2092e-01
## factor(day)4 0.236791 8.1282e-01
## factor(day)5 -0.258809 7.9578e-01
## factor(day)6 -0.181512 8.5597e-01
## factor(day)7 0.391765 6.9523e-01
## factor(day)8 0.678605 4.9739e-01
## factor(day)9 -0.302976 7.6191e-01
## factor(day)10 0.113892 9.0932e-01
## factor(day)11 0.894806 3.7089e-01
## factor(day)12 0.720729 4.7108e-01
## factor(day)13 0.071368 9.4311e-01
## factor(day)14 0.112696 9.1027e-01
## factor(day)15 -0.505051 6.1352e-01
## factor(day)16 -0.505818 6.1299e-01
## factor(day)17 0.439721 6.6014e-01
## factor(day)18 0.031893 9.7456e-01
## factor(day)19 0.521714 6.0187e-01
## factor(day)20 0.652564 5.1404e-01
## factor(day)21 0.538227 5.9042e-01
## factor(day)22 1.348725 1.7743e-01
## factor(day)23 0.091525 9.2708e-01
## factor(day)24 0.731047 4.6475e-01
## factor(day)25 0.701122 4.8323e-01
## factor(day)26 0.587191 5.5708e-01
## factor(day)27 1.324829 1.8523e-01
## factor(day)28 0.638974 5.2284e-01
## factor(day)29 0.528110 5.9742e-01
## factor(day)30 1.199601 2.3030e-01
## factor(day)31 0.678334 4.9756e-01
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.252446 8.0070e-01
## factor(fans_piar_cat)Big vs Small:is_cooperative -7.724018 1.1338e-14 ***
## factor(fans_piar_cat)Big vs Big:is_cooperative -4.691015 2.7208e-06 ***
## ... 1 variable was removed because of collinearity (factor(age_range)UNKNOWN)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.218157 Adj. R2: 0.023516
# 画图
model <- feols(log(avg_fan_total_cost_amt+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative + factor(gender) +
factor(age_range) + factor(fre_country_region) + factor(fre_city_level)
# + factor(other_gender) + factor(other_age_range) + factor(other_fre_country_region) + factor(other_fre_city_level)
+ factor(year) + factor(month) + factor(day), data = data,vcov = ~author_id)
## The variable 'factor(age_range)UNKNOWN' has been removed because of collinearity (see $collin.var).
summary(model)
## OLS estimation, Dep. Var.: log(avg_fan_total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error
## (Intercept) 0.066203 0.019448
## factor(fans_piar_cat)Small vs Big 0.008367 0.002997
## factor(fans_piar_cat)Big vs Small -0.045004 0.001561
## factor(fans_piar_cat)Big vs Big -0.051660 0.001540
## is_cooperative 0.006563 0.001345
## factor(gender)M -0.039010 0.001180
## factor(gender)U 0.292412 0.133747
## factor(age_range)12-17 0.021377 0.007483
## factor(age_range)18-23 0.053525 0.007513
## factor(age_range)24-30 0.051666 0.007626
## factor(age_range)31-40 0.031801 0.007386
## factor(age_range)41-49 0.013767 0.007355
## factor(age_range)50+ 0.002807 0.007355
## factor(fre_country_region)北方 -0.041156 0.022279
## factor(fre_country_region)南方 -0.041903 0.022288
## factor(fre_city_level)二线城市 0.012897 0.013582
## factor(fre_city_level)三线城市 0.007232 0.013530
## factor(fre_city_level)四线城市 -0.000059 0.013504
## factor(fre_city_level)五线城市 -0.009105 0.013479
## factor(fre_city_level)新一线城市 0.021371 0.013664
## factor(fre_city_level)一线城市 0.017251 0.014061
## factor(year)2023 0.004493 0.001348
## factor(year)2024 0.013177 0.001380
## factor(month)2 -0.000255 0.001949
## factor(month)3 0.000738 0.001997
## factor(month)4 -0.001468 0.002026
## factor(month)5 0.001690 0.002125
## factor(month)6 0.006913 0.002445
## factor(month)7 0.007019 0.002401
## factor(month)8 0.006427 0.002310
## factor(month)9 0.013150 0.002822
## factor(month)10 0.009460 0.002653
## factor(month)11 0.012504 0.002873
## factor(month)12 0.012079 0.002815
## factor(day)2 -0.005636 0.003441
## factor(day)3 -0.003023 0.003757
## factor(day)4 0.000929 0.003923
## factor(day)5 -0.000941 0.003636
## factor(day)6 -0.000674 0.003713
## factor(day)7 0.001550 0.003956
## factor(day)8 0.002622 0.003864
## factor(day)9 -0.001116 0.003682
## factor(day)10 0.000415 0.003644
## factor(day)11 0.003487 0.003897
## factor(day)12 0.002749 0.003815
## factor(day)13 0.000264 0.003703
## factor(day)14 0.000428 0.003796
## factor(day)15 -0.001892 0.003747
## factor(day)16 -0.001897 0.003751
## factor(day)17 0.001724 0.003922
## factor(day)18 0.000130 0.004072
## factor(day)19 0.002061 0.003951
## factor(day)20 0.002545 0.003901
## factor(day)21 0.002128 0.003953
## factor(day)22 0.005388 0.003995
## factor(day)23 0.000344 0.003760
## factor(day)24 0.003004 0.004109
## factor(day)25 0.002873 0.004098
## factor(day)26 0.002366 0.004030
## factor(day)27 0.005108 0.003856
## factor(day)28 0.002550 0.003991
## factor(day)29 0.002064 0.003907
## factor(day)30 0.004922 0.004103
## factor(day)31 0.003176 0.004682
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.001254 0.004969
## factor(fans_piar_cat)Big vs Small:is_cooperative -0.014218 0.001841
## factor(fans_piar_cat)Big vs Big:is_cooperative -0.008283 0.001766
## t value Pr(>|t|)
## (Intercept) 3.404030 6.6416e-04 ***
## factor(fans_piar_cat)Small vs Big 2.791611 5.2453e-03 **
## factor(fans_piar_cat)Big vs Small -28.827757 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big -33.555334 < 2.2e-16 ***
## is_cooperative 4.878503 1.0700e-06 ***
## factor(gender)M -33.046585 < 2.2e-16 ***
## factor(gender)U 2.186309 2.8794e-02 *
## factor(age_range)12-17 2.856853 4.2792e-03 **
## factor(age_range)18-23 7.124053 1.0525e-12 ***
## factor(age_range)24-30 6.774568 1.2522e-11 ***
## factor(age_range)31-40 4.305790 1.6649e-05 ***
## factor(age_range)41-49 1.871933 6.1218e-02 .
## factor(age_range)50+ 0.381734 7.0266e-01
## factor(fre_country_region)北方 -1.847313 6.4704e-02 .
## factor(fre_country_region)南方 -1.880081 6.0099e-02 .
## factor(fre_city_level)二线城市 0.949569 3.4233e-01
## factor(fre_city_level)三线城市 0.534496 5.9300e-01
## factor(fre_city_level)四线城市 -0.004347 9.9653e-01
## factor(fre_city_level)五线城市 -0.675481 4.9937e-01
## factor(fre_city_level)新一线城市 1.564045 1.1781e-01
## factor(fre_city_level)一线城市 1.226821 2.1989e-01
## factor(year)2023 3.332984 8.5940e-04 ***
## factor(year)2024 9.546450 < 2.2e-16 ***
## factor(month)2 -0.130948 8.9582e-01
## factor(month)3 0.369642 7.1165e-01
## factor(month)4 -0.724914 4.6851e-01
## factor(month)5 0.795199 4.2650e-01
## factor(month)6 2.827305 4.6948e-03 **
## factor(month)7 2.923447 3.4623e-03 **
## factor(month)8 2.782137 5.4009e-03 **
## factor(month)9 4.660088 3.1633e-06 ***
## factor(month)10 3.565834 3.6281e-04 ***
## factor(month)11 4.352708 1.3455e-05 ***
## factor(month)12 4.291000 1.7798e-05 ***
## factor(day)2 -1.637769 1.0147e-01
## factor(day)3 -0.804820 4.2092e-01
## factor(day)4 0.236791 8.1282e-01
## factor(day)5 -0.258809 7.9578e-01
## factor(day)6 -0.181512 8.5597e-01
## factor(day)7 0.391765 6.9523e-01
## factor(day)8 0.678605 4.9739e-01
## factor(day)9 -0.302976 7.6191e-01
## factor(day)10 0.113892 9.0932e-01
## factor(day)11 0.894806 3.7089e-01
## factor(day)12 0.720729 4.7108e-01
## factor(day)13 0.071368 9.4311e-01
## factor(day)14 0.112696 9.1027e-01
## factor(day)15 -0.505051 6.1352e-01
## factor(day)16 -0.505818 6.1299e-01
## factor(day)17 0.439721 6.6014e-01
## factor(day)18 0.031893 9.7456e-01
## factor(day)19 0.521714 6.0187e-01
## factor(day)20 0.652564 5.1404e-01
## factor(day)21 0.538227 5.9042e-01
## factor(day)22 1.348725 1.7743e-01
## factor(day)23 0.091525 9.2708e-01
## factor(day)24 0.731047 4.6475e-01
## factor(day)25 0.701122 4.8323e-01
## factor(day)26 0.587191 5.5708e-01
## factor(day)27 1.324829 1.8523e-01
## factor(day)28 0.638974 5.2284e-01
## factor(day)29 0.528110 5.9742e-01
## factor(day)30 1.199601 2.3030e-01
## factor(day)31 0.678334 4.9756e-01
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.252446 8.0070e-01
## factor(fans_piar_cat)Big vs Small:is_cooperative -7.724018 1.1338e-14 ***
## factor(fans_piar_cat)Big vs Big:is_cooperative -4.691015 2.7208e-06 ***
## ... 1 variable was removed because of collinearity (factor(age_range)UNKNOWN)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.218157 Adj. R2: 0.023516
plot_predictions_with_ci(model, data)
## `summarise()` has grouped output by 'fans_piar_cat'. You can override using the
## `.groups` argument.
# 画图
model <- feols(log(avg_fan_total_cost_amt+1) ~ factor(fans_piar_cat)
+ factor(gender) +
factor(age_range) + factor(fre_country_region) + factor(fre_city_level)
# + factor(other_gender) + factor(other_age_range) + factor(other_fre_country_region) + factor(other_fre_city_level)
+ factor(year) + factor(month) + factor(day), data = data,vcov = ~author_id)
## The variable 'factor(age_range)UNKNOWN' has been removed because of collinearity (see $collin.var).
summary(model)
## OLS estimation, Dep. Var.: log(avg_fan_total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.068384 0.019408 3.523404 4.2616e-04
## factor(fans_piar_cat)Small vs Big 0.009142 0.002543 3.594536 3.2507e-04
## factor(fans_piar_cat)Big vs Small -0.050052 0.001405 -35.631732 < 2.2e-16
## factor(fans_piar_cat)Big vs Big -0.054577 0.001469 -37.140371 < 2.2e-16
## factor(gender)M -0.039116 0.001178 -33.209206 < 2.2e-16
## factor(gender)U 0.294141 0.133760 2.199016 2.7878e-02
## factor(age_range)12-17 0.021798 0.007481 2.913602 3.5734e-03
## factor(age_range)18-23 0.053571 0.007511 7.132518 9.8972e-13
## factor(age_range)24-30 0.051241 0.007620 6.724561 1.7672e-11
## factor(age_range)31-40 0.031296 0.007378 4.242073 2.2159e-05
## factor(age_range)41-49 0.013266 0.007347 1.805652 7.0975e-02
## factor(age_range)50+ 0.002229 0.007347 0.303357 7.6162e-01
## factor(fre_country_region)北方 -0.041152 0.022270 -1.847918 6.4616e-02
## factor(fre_country_region)南方 -0.041739 0.022280 -1.873393 6.1016e-02
## factor(fre_city_level)二线城市 0.013018 0.013581 0.958568 3.3778e-01
## factor(fre_city_level)三线城市 0.007402 0.013530 0.547113 5.8430e-01
## factor(fre_city_level)四线城市 0.000110 0.013504 0.008156 9.9349e-01
## factor(fre_city_level)五线城市 -0.008997 0.013479 -0.667498 5.0445e-01
## factor(fre_city_level)新一线城市 0.021486 0.013663 1.572564 1.1582e-01
## factor(fre_city_level)一线城市 0.017313 0.014061 1.231236 2.1824e-01
## factor(year)2023 0.004727 0.001347 3.508073 4.5149e-04
## factor(year)2024 0.013701 0.001380 9.929015 < 2.2e-16
## factor(month)2 -0.000283 0.001949 -0.145135 8.8460e-01
## factor(month)3 0.000694 0.001997 0.347632 7.2812e-01
## factor(month)4 -0.001541 0.002026 -0.760970 4.4668e-01
## factor(month)5 0.001680 0.002124 0.790736 4.2910e-01
## factor(month)6 0.006949 0.002445 2.841521 4.4905e-03
## factor(month)7 0.006913 0.002400 2.880567 3.9701e-03
## factor(month)8 0.006379 0.002310 2.761382 5.7564e-03
## factor(month)9 0.013166 0.002822 4.665980 3.0740e-06
## factor(month)10 0.009481 0.002654 3.572960 3.5307e-04
## factor(month)11 0.012628 0.002874 4.393625 1.1155e-05
## factor(month)12 0.012298 0.002814 4.370667 1.2395e-05
## factor(day)2 -0.005621 0.003443 -1.632693 1.0254e-01
## factor(day)3 -0.003056 0.003757 -0.813383 4.1600e-01
## factor(day)4 0.000960 0.003924 0.244699 8.0669e-01
## factor(day)5 -0.000940 0.003636 -0.258373 7.9612e-01
## factor(day)6 -0.000662 0.003713 -0.178194 8.5857e-01
## factor(day)7 0.001574 0.003956 0.397850 6.9074e-01
## factor(day)8 0.002731 0.003865 0.706494 4.7988e-01
## factor(day)9 -0.001017 0.003685 -0.276025 7.8253e-01
## factor(day)10 0.000488 0.003645 0.133767 8.9359e-01
## factor(day)11 0.003548 0.003899 0.909982 3.6283e-01
## factor(day)12 0.002837 0.003816 0.743409 4.5724e-01
## factor(day)13 0.000277 0.003704 0.074730 9.4043e-01
## factor(day)14 0.000482 0.003796 0.126894 8.9902e-01
## factor(day)15 -0.001824 0.003747 -0.486925 6.2631e-01
## factor(day)16 -0.001860 0.003752 -0.495843 6.2001e-01
## factor(day)17 0.001765 0.003922 0.449967 6.5274e-01
## factor(day)18 0.000201 0.004073 0.049417 9.6059e-01
## factor(day)19 0.002120 0.003951 0.536535 5.9159e-01
## factor(day)20 0.002641 0.003901 0.676941 4.9844e-01
## factor(day)21 0.002198 0.003953 0.556057 5.7817e-01
## factor(day)22 0.005411 0.003995 1.354474 1.7559e-01
## factor(day)23 0.000441 0.003759 0.117330 9.0660e-01
## factor(day)24 0.002997 0.004109 0.729314 4.6581e-01
## factor(day)25 0.003038 0.004097 0.741421 4.5844e-01
## factor(day)26 0.002402 0.004031 0.595746 5.5135e-01
## factor(day)27 0.005119 0.003857 1.327080 1.8448e-01
## factor(day)28 0.002549 0.003992 0.638488 5.2316e-01
## factor(day)29 0.002041 0.003909 0.522161 6.0156e-01
## factor(day)30 0.004904 0.004104 1.194963 2.3210e-01
## factor(day)31 0.003167 0.004683 0.676267 4.9887e-01
##
## (Intercept) ***
## factor(fans_piar_cat)Small vs Big ***
## factor(fans_piar_cat)Big vs Small ***
## factor(fans_piar_cat)Big vs Big ***
## factor(gender)M ***
## factor(gender)U *
## factor(age_range)12-17 **
## factor(age_range)18-23 ***
## factor(age_range)24-30 ***
## factor(age_range)31-40 ***
## factor(age_range)41-49 .
## factor(age_range)50+
## factor(fre_country_region)北方 .
## factor(fre_country_region)南方 .
## factor(fre_city_level)二线城市
## factor(fre_city_level)三线城市
## factor(fre_city_level)四线城市
## factor(fre_city_level)五线城市
## factor(fre_city_level)新一线城市
## factor(fre_city_level)一线城市
## factor(year)2023 ***
## factor(year)2024 ***
## factor(month)2
## factor(month)3
## factor(month)4
## factor(month)5
## factor(month)6 **
## factor(month)7 **
## factor(month)8 **
## factor(month)9 ***
## factor(month)10 ***
## factor(month)11 ***
## factor(month)12 ***
## factor(day)2
## factor(day)3
## factor(day)4
## factor(day)5
## factor(day)6
## factor(day)7
## factor(day)8
## factor(day)9
## factor(day)10
## factor(day)11
## factor(day)12
## factor(day)13
## factor(day)14
## factor(day)15
## factor(day)16
## factor(day)17
## factor(day)18
## factor(day)19
## factor(day)20
## factor(day)21
## factor(day)22
## factor(day)23
## factor(day)24
## factor(day)25
## factor(day)26
## factor(day)27
## factor(day)28
## factor(day)29
## factor(day)30
## factor(day)31
## ... 1 variable was removed because of collinearity (factor(age_range)UNKNOWN)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.218178 Adj. R2: 0.023347
plot_predictions_fans(model, data)
# 画图
model <- feols(log(avg_fan_total_cost_amt+1) ~ is_cooperative + factor(gender) +
factor(age_range) + factor(fre_country_region) + factor(fre_city_level)
# + factor(other_gender) + factor(other_age_range) + factor(other_fre_country_region) + factor(other_fre_city_level)
+ factor(year) + factor(month) + factor(day), data = data,vcov = ~author_id)
## The variable 'factor(age_range)UNKNOWN' has been removed because of collinearity (see $collin.var).
summary(model)
## OLS estimation, Dep. Var.: log(avg_fan_total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.06381479 0.019532 3.267245 1.0862e-03
## is_cooperative 0.00367797 0.001113 3.303369 9.5552e-04
## factor(gender)M -0.03089462 0.001036 -29.831280 < 2.2e-16
## factor(gender)U 0.29945432 0.133756 2.238803 2.5170e-02
## factor(age_range)12-17 0.02023694 0.007458 2.713471 6.6590e-03
## factor(age_range)18-23 0.04963659 0.007481 6.635434 3.2459e-11
## factor(age_range)24-30 0.04104490 0.007543 5.441735 5.2843e-08
## factor(age_range)31-40 0.02131703 0.007335 2.906368 3.6570e-03
## factor(age_range)41-49 0.00743428 0.007322 1.015364 3.0993e-01
## factor(age_range)50+ -0.00061152 0.007326 -0.083475 9.3347e-01
## factor(fre_country_region)北方 -0.03693007 0.022348 -1.652521 9.8430e-02
## factor(fre_country_region)南方 -0.03516559 0.022355 -1.573046 1.1571e-01
## factor(fre_city_level)二线城市 0.00816332 0.013554 0.602277 5.4699e-01
## factor(fre_city_level)三线城市 0.00308546 0.013506 0.228455 8.1929e-01
## factor(fre_city_level)四线城市 -0.00262725 0.013479 -0.194917 8.4546e-01
## factor(fre_city_level)五线城市 -0.01001552 0.013454 -0.744426 4.5662e-01
## factor(fre_city_level)新一线城市 0.01643953 0.013637 1.205499 2.2801e-01
## factor(fre_city_level)一线城市 0.01244858 0.014046 0.886257 3.7548e-01
## factor(year)2023 0.00208618 0.001339 1.557640 1.1932e-01
## factor(year)2024 0.01051154 0.001361 7.723611 1.1374e-14
## factor(month)2 0.00000126 0.001950 0.000644 9.9949e-01
## factor(month)3 -0.00046130 0.002001 -0.230486 8.1771e-01
## factor(month)4 -0.00249831 0.002028 -1.231849 2.1801e-01
## factor(month)5 0.00053051 0.002130 0.249059 8.0332e-01
## factor(month)6 0.00500592 0.002448 2.044885 4.0868e-02
## factor(month)7 0.00521640 0.002403 2.170549 2.9967e-02
## factor(month)8 0.00468181 0.002312 2.025083 4.2861e-02
## factor(month)9 0.01024520 0.002819 3.633802 2.7936e-04
## factor(month)10 0.00702740 0.002655 2.646523 8.1332e-03
## factor(month)11 0.01002734 0.002874 3.488704 4.8550e-04
## factor(month)12 0.01027431 0.002818 3.645386 2.6708e-04
## factor(day)2 -0.00531141 0.003449 -1.540032 1.2355e-01
## factor(day)3 -0.00271427 0.003765 -0.720898 4.7097e-01
## factor(day)4 0.00086996 0.003936 0.221033 8.2507e-01
## factor(day)5 -0.00122096 0.003647 -0.334746 7.3782e-01
## factor(day)6 -0.00130332 0.003723 -0.350063 7.2629e-01
## factor(day)7 0.00118125 0.003968 0.297712 7.6592e-01
## factor(day)8 0.00202447 0.003878 0.522021 6.0166e-01
## factor(day)9 -0.00121574 0.003694 -0.329081 7.4209e-01
## factor(day)10 0.00033998 0.003656 0.092981 9.2592e-01
## factor(day)11 0.00308421 0.003911 0.788646 4.3032e-01
## factor(day)12 0.00250352 0.003826 0.654393 5.1286e-01
## factor(day)13 -0.00033695 0.003716 -0.090683 9.2774e-01
## factor(day)14 -0.00019106 0.003808 -0.050168 9.5999e-01
## factor(day)15 -0.00218583 0.003760 -0.581358 5.6100e-01
## factor(day)16 -0.00226209 0.003762 -0.601328 5.4762e-01
## factor(day)17 0.00128490 0.003933 0.326702 7.4389e-01
## factor(day)18 0.00003617 0.004082 0.008860 9.9293e-01
## factor(day)19 0.00137852 0.003966 0.347619 7.2813e-01
## factor(day)20 0.00200316 0.003913 0.511885 6.0873e-01
## factor(day)21 0.00115751 0.003969 0.291651 7.7055e-01
## factor(day)22 0.00505949 0.004009 1.262127 2.0690e-01
## factor(day)23 -0.00028669 0.003773 -0.075994 9.3942e-01
## factor(day)24 0.00230856 0.004123 0.559932 5.7553e-01
## factor(day)25 0.00258331 0.004111 0.628457 5.2971e-01
## factor(day)26 0.00204345 0.004045 0.505169 6.1344e-01
## factor(day)27 0.00435071 0.003870 1.124273 2.6090e-01
## factor(day)28 0.00244545 0.004003 0.610935 5.4124e-01
## factor(day)29 0.00181735 0.003920 0.463625 6.4292e-01
## factor(day)30 0.00458427 0.004118 1.113240 2.6561e-01
## factor(day)31 0.00313742 0.004694 0.668326 5.0393e-01
##
## (Intercept) **
## is_cooperative ***
## factor(gender)M ***
## factor(gender)U *
## factor(age_range)12-17 **
## factor(age_range)18-23 ***
## factor(age_range)24-30 ***
## factor(age_range)31-40 **
## factor(age_range)41-49
## factor(age_range)50+
## factor(fre_country_region)北方 .
## factor(fre_country_region)南方
## factor(fre_city_level)二线城市
## factor(fre_city_level)三线城市
## factor(fre_city_level)四线城市
## factor(fre_city_level)五线城市
## factor(fre_city_level)新一线城市
## factor(fre_city_level)一线城市
## factor(year)2023
## factor(year)2024 ***
## factor(month)2
## factor(month)3
## factor(month)4
## factor(month)5
## factor(month)6 *
## factor(month)7 *
## factor(month)8 *
## factor(month)9 ***
## factor(month)10 **
## factor(month)11 ***
## factor(month)12 ***
## factor(day)2
## factor(day)3
## factor(day)4
## factor(day)5
## factor(day)6
## factor(day)7
## factor(day)8
## factor(day)9
## factor(day)10
## factor(day)11
## factor(day)12
## factor(day)13
## factor(day)14
## factor(day)15
## factor(day)16
## factor(day)17
## factor(day)18
## factor(day)19
## factor(day)20
## factor(day)21
## factor(day)22
## factor(day)23
## factor(day)24
## factor(day)25
## factor(day)26
## factor(day)27
## factor(day)28
## factor(day)29
## factor(day)30
## factor(day)31
## ... 1 variable was removed because of collinearity (factor(age_range)UNKNOWN)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.218881 Adj. R2: 0.017048
plot_predictions_cate(model, data)
# 使用例子
# Define the formula for the model
model_formula <- log(avg_fan_total_cost_amt + 1) ~ factor(fans_piar_cat_temp)
# Call the function with the data and formula
plot_continue_cutoff(data, model_formula)
library(ggplot2)
library(dplyr)
library(RColorBrewer)
# Step 1: Set the ranges for the fan counts (fewer, broader bins)
data <- data %>% mutate(fans_new_range = case_when(
before_fans_count > 0 & before_fans_count <= 1000 ~ "0-1k",
before_fans_count > 1000 & before_fans_count <= 10000 ~ "1k-10k",
before_fans_count > 10000 & before_fans_count <= 100000 ~ "10k-100k",
before_fans_count > 100000 & before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">1M"
))
data <- data %>% mutate(other_fans_new_range = case_when(
other_before_fans_count > 0 & other_before_fans_count <= 1000 ~ "0-1k",
other_before_fans_count > 1000 & other_before_fans_count <= 10000 ~ "1k-10k",
#other_before_fans_count > 10000 & other_before_fans_count <= 100000 ~ "10k-100k",
#other_before_fans_count > 100000 & other_before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">10k"
))
# Step 2: Set the correct order for the categories
data <- data %>%
mutate(fans_new_range = factor(fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")),
other_fans_new_range = factor(other_fans_new_range, levels = c("0-1k", "1k-10k", ">10k"))) # "10k-100k","100k-1M"
# Step 3: Aggregate the data to calculate the mean and confidence intervals, handling 0% cases
agg_data <- data %>%
group_by(fans_new_range, other_fans_new_range) %>%
summarize(
count = n(), # 计算每个组合的数量
Y = ifelse(count > 0, mean(log(avg_fan_total_cost_amt + 1), na.rm = TRUE), NA), # 如果有数据,计算均值;否则设为NA
ci_lower = ifelse(count > 0, Y - qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA), # 如果有数据,计算CI;否则设为NA
ci_upper = ifelse(count > 0, Y + qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA) # 如果有数据,计算CI;否则设为NA
) %>%
ungroup() %>%
mutate(percentage = count / sum(count) * 100)
## `summarise()` has grouped output by 'fans_new_range'. You can override using
## the `.groups` argument.
# Step 4: Plot the data in a heatmap-like format with custom colors and percentage labels
ggplot(agg_data, aes(x = fans_new_range, y = other_fans_new_range, fill = Y)) +
geom_tile() +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
geom_text(aes(label = paste0(round(percentage, 2), "%")), color = "black", size = 4) +
scale_fill_gradientn(colors = c("yellow", "orange", "darkorange", "red", "darkred"),
values = scales::rescale(c(0, 0.05, 0.1, 0.15, 0.2)),
limits = c(0, 0.2),
na.value = "white") +
labs(title = "Average Fan Total Cost Amount by Fan Ranges",
x = "Fans Range",
y = "Other Fans Range",
fill = "Avg Fan Total Cost Amt") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
model <- feols(log(total_cost_amt+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative + factor(gender) +
factor(age_range) + factor(fre_country_region) + factor(fre_city_level)
# + factor(other_gender) + factor(other_age_range) + factor(other_fre_country_region) + factor(other_fre_city_level)
+ factor(year) + factor(month) + factor(day), data = data,vcov = ~author_id)
## The variable 'factor(age_range)UNKNOWN' has been removed because of collinearity (see $collin.var).
summary(model)
## OLS estimation, Dep. Var.: log(total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error
## (Intercept) 0.946752 0.118456
## factor(fans_piar_cat)Small vs Big 1.175104 0.034836
## factor(fans_piar_cat)Big vs Small 1.433165 0.037619
## factor(fans_piar_cat)Big vs Big 2.016155 0.036024
## is_cooperative 0.089400 0.009227
## factor(gender)M -0.800444 0.010040
## factor(gender)U -0.382726 0.220123
## factor(age_range)12-17 0.421540 0.048436
## factor(age_range)18-23 0.751100 0.048530
## factor(age_range)24-30 0.700914 0.051039
## factor(age_range)31-40 0.569908 0.048332
## factor(age_range)41-49 0.392864 0.048216
## factor(age_range)50+ 0.194960 0.047784
## factor(fre_country_region)北方 -0.454406 0.150537
## factor(fre_country_region)南方 -0.606789 0.150449
## factor(fre_city_level)二线城市 0.400471 0.108261
## factor(fre_city_level)三线城市 0.338823 0.107882
## factor(fre_city_level)四线城市 0.275644 0.107622
## factor(fre_city_level)五线城市 0.139283 0.107327
## factor(fre_city_level)新一线城市 0.489520 0.109561
## factor(fre_city_level)一线城市 0.504028 0.111541
## factor(year)2023 0.025192 0.012179
## factor(year)2024 0.094322 0.012837
## factor(month)2 -0.061355 0.018042
## factor(month)3 -0.005677 0.018649
## factor(month)4 -0.094968 0.018618
## factor(month)5 -0.004574 0.019541
## factor(month)6 0.067418 0.021718
## factor(month)7 0.028815 0.020633
## factor(month)8 0.068303 0.020702
## factor(month)9 0.073425 0.024311
## factor(month)10 0.102026 0.024327
## factor(month)11 0.098977 0.024560
## factor(month)12 0.099978 0.024322
## factor(day)2 -0.045527 0.032096
## factor(day)3 0.035125 0.032488
## factor(day)4 0.035842 0.032676
## factor(day)5 0.011473 0.032344
## factor(day)6 -0.007399 0.032737
## factor(day)7 0.026873 0.033287
## factor(day)8 0.069408 0.033395
## factor(day)9 0.009740 0.033099
## factor(day)10 0.082757 0.033322
## factor(day)11 0.052540 0.033004
## factor(day)12 0.068486 0.033202
## factor(day)13 0.021987 0.033041
## factor(day)14 -0.005744 0.032422
## factor(day)15 0.002498 0.033066
## factor(day)16 0.038727 0.033174
## factor(day)17 0.039300 0.033056
## factor(day)18 -0.011807 0.033161
## factor(day)19 0.028270 0.034022
## factor(day)20 0.041665 0.033859
## factor(day)21 0.013008 0.033354
## factor(day)22 0.070428 0.033570
## factor(day)23 0.034805 0.032982
## factor(day)24 0.045333 0.033249
## factor(day)25 0.033034 0.033852
## factor(day)26 0.007194 0.033564
## factor(day)27 0.047575 0.033139
## factor(day)28 0.029615 0.033339
## factor(day)29 0.038894 0.033713
## factor(day)30 0.079654 0.034756
## factor(day)31 0.026344 0.038715
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.221333 0.053837
## factor(fans_piar_cat)Big vs Small:is_cooperative -0.056332 0.053850
## factor(fans_piar_cat)Big vs Big:is_cooperative -0.291590 0.047951
## t value Pr(>|t|)
## (Intercept) 7.992462 1.3316e-15 ***
## factor(fans_piar_cat)Small vs Big 33.732449 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small 38.096818 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big 55.967297 < 2.2e-16 ***
## is_cooperative 9.689182 < 2.2e-16 ***
## factor(gender)M -79.725170 < 2.2e-16 ***
## factor(gender)U -1.738688 8.2092e-02 .
## factor(age_range)12-17 8.702951 < 2.2e-16 ***
## factor(age_range)18-23 15.477046 < 2.2e-16 ***
## factor(age_range)24-30 13.732843 < 2.2e-16 ***
## factor(age_range)31-40 11.791483 < 2.2e-16 ***
## factor(age_range)41-49 8.148034 3.7257e-16 ***
## factor(age_range)50+ 4.080037 4.5051e-05 ***
## factor(fre_country_region)北方 -3.018562 2.5402e-03 **
## factor(fre_country_region)南方 -4.033192 5.5050e-05 ***
## factor(fre_city_level)二线城市 3.699129 2.1641e-04 ***
## factor(fre_city_level)三线城市 3.140681 1.6859e-03 **
## factor(fre_city_level)四线城市 2.561227 1.0431e-02 *
## factor(fre_city_level)五线城市 1.297749 1.9438e-01
## factor(fre_city_level)新一线城市 4.468029 7.8999e-06 ***
## factor(fre_city_level)一线城市 4.518782 6.2242e-06 ***
## factor(year)2023 2.068459 3.8599e-02 *
## factor(year)2024 7.347903 2.0231e-13 ***
## factor(month)2 -3.400673 6.7237e-04 ***
## factor(month)3 -0.304385 7.6083e-01
## factor(month)4 -5.100794 3.3862e-07 ***
## factor(month)5 -0.234060 8.1494e-01
## factor(month)6 3.104297 1.9077e-03 **
## factor(month)7 1.396525 1.6256e-01
## factor(month)8 3.299405 9.6912e-04 ***
## factor(month)9 3.020259 2.5260e-03 **
## factor(month)10 4.193917 2.7433e-05 ***
## factor(month)11 4.030050 5.5791e-05 ***
## factor(month)12 4.110578 3.9487e-05 ***
## factor(day)2 -1.418466 1.5606e-01
## factor(day)3 1.081180 2.7962e-01
## factor(day)4 1.096877 2.7270e-01
## factor(day)5 0.354728 7.2279e-01
## factor(day)6 -0.225999 8.2120e-01
## factor(day)7 0.807318 4.1948e-01
## factor(day)8 2.078406 3.7674e-02 *
## factor(day)9 0.294270 7.6855e-01
## factor(day)10 2.483533 1.3010e-02 *
## factor(day)11 1.591933 1.1140e-01
## factor(day)12 2.062702 3.9143e-02 *
## factor(day)13 0.665434 5.0577e-01
## factor(day)14 -0.177165 8.5938e-01
## factor(day)15 0.075546 9.3978e-01
## factor(day)16 1.167382 2.4306e-01
## factor(day)17 1.188906 2.3448e-01
## factor(day)18 -0.356062 7.2179e-01
## factor(day)19 0.830922 4.0602e-01
## factor(day)20 1.230532 2.1850e-01
## factor(day)21 0.389986 6.9655e-01
## factor(day)22 2.097937 3.5912e-02 *
## factor(day)23 1.055271 2.9130e-01
## factor(day)24 1.363443 1.7274e-01
## factor(day)25 0.975833 3.2915e-01
## factor(day)26 0.214346 8.3028e-01
## factor(day)27 1.435603 1.5112e-01
## factor(day)28 0.888275 3.7439e-01
## factor(day)29 1.153676 2.4863e-01
## factor(day)30 2.291818 2.1917e-02 *
## factor(day)31 0.680463 4.9621e-01
## factor(fans_piar_cat)Small vs Big:is_cooperative 4.111147 3.9390e-05 ***
## factor(fans_piar_cat)Big vs Small:is_cooperative -1.046095 2.9552e-01
## factor(fans_piar_cat)Big vs Big:is_cooperative -6.081020 1.1969e-09 ***
## ... 1 variable was removed because of collinearity (factor(age_range)UNKNOWN)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 1.88591 Adj. R2: 0.201569
# 画图
model <- feols(log(total_cost_amt+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative+ factor(gender) +
factor(age_range) + factor(fre_country_region) + factor(fre_city_level)
# + factor(other_gender) + factor(other_age_range) + factor(other_fre_country_region) + factor(other_fre_city_level)
+ factor(year) + factor(month) + factor(day), data = data,vcov = ~author_id)
## The variable 'factor(age_range)UNKNOWN' has been removed because of collinearity (see $collin.var).
summary(model)
## OLS estimation, Dep. Var.: log(total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error
## (Intercept) 0.946752 0.118456
## factor(fans_piar_cat)Small vs Big 1.175104 0.034836
## factor(fans_piar_cat)Big vs Small 1.433165 0.037619
## factor(fans_piar_cat)Big vs Big 2.016155 0.036024
## is_cooperative 0.089400 0.009227
## factor(gender)M -0.800444 0.010040
## factor(gender)U -0.382726 0.220123
## factor(age_range)12-17 0.421540 0.048436
## factor(age_range)18-23 0.751100 0.048530
## factor(age_range)24-30 0.700914 0.051039
## factor(age_range)31-40 0.569908 0.048332
## factor(age_range)41-49 0.392864 0.048216
## factor(age_range)50+ 0.194960 0.047784
## factor(fre_country_region)北方 -0.454406 0.150537
## factor(fre_country_region)南方 -0.606789 0.150449
## factor(fre_city_level)二线城市 0.400471 0.108261
## factor(fre_city_level)三线城市 0.338823 0.107882
## factor(fre_city_level)四线城市 0.275644 0.107622
## factor(fre_city_level)五线城市 0.139283 0.107327
## factor(fre_city_level)新一线城市 0.489520 0.109561
## factor(fre_city_level)一线城市 0.504028 0.111541
## factor(year)2023 0.025192 0.012179
## factor(year)2024 0.094322 0.012837
## factor(month)2 -0.061355 0.018042
## factor(month)3 -0.005677 0.018649
## factor(month)4 -0.094968 0.018618
## factor(month)5 -0.004574 0.019541
## factor(month)6 0.067418 0.021718
## factor(month)7 0.028815 0.020633
## factor(month)8 0.068303 0.020702
## factor(month)9 0.073425 0.024311
## factor(month)10 0.102026 0.024327
## factor(month)11 0.098977 0.024560
## factor(month)12 0.099978 0.024322
## factor(day)2 -0.045527 0.032096
## factor(day)3 0.035125 0.032488
## factor(day)4 0.035842 0.032676
## factor(day)5 0.011473 0.032344
## factor(day)6 -0.007399 0.032737
## factor(day)7 0.026873 0.033287
## factor(day)8 0.069408 0.033395
## factor(day)9 0.009740 0.033099
## factor(day)10 0.082757 0.033322
## factor(day)11 0.052540 0.033004
## factor(day)12 0.068486 0.033202
## factor(day)13 0.021987 0.033041
## factor(day)14 -0.005744 0.032422
## factor(day)15 0.002498 0.033066
## factor(day)16 0.038727 0.033174
## factor(day)17 0.039300 0.033056
## factor(day)18 -0.011807 0.033161
## factor(day)19 0.028270 0.034022
## factor(day)20 0.041665 0.033859
## factor(day)21 0.013008 0.033354
## factor(day)22 0.070428 0.033570
## factor(day)23 0.034805 0.032982
## factor(day)24 0.045333 0.033249
## factor(day)25 0.033034 0.033852
## factor(day)26 0.007194 0.033564
## factor(day)27 0.047575 0.033139
## factor(day)28 0.029615 0.033339
## factor(day)29 0.038894 0.033713
## factor(day)30 0.079654 0.034756
## factor(day)31 0.026344 0.038715
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.221333 0.053837
## factor(fans_piar_cat)Big vs Small:is_cooperative -0.056332 0.053850
## factor(fans_piar_cat)Big vs Big:is_cooperative -0.291590 0.047951
## t value Pr(>|t|)
## (Intercept) 7.992462 1.3316e-15 ***
## factor(fans_piar_cat)Small vs Big 33.732449 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small 38.096818 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Big 55.967297 < 2.2e-16 ***
## is_cooperative 9.689182 < 2.2e-16 ***
## factor(gender)M -79.725170 < 2.2e-16 ***
## factor(gender)U -1.738688 8.2092e-02 .
## factor(age_range)12-17 8.702951 < 2.2e-16 ***
## factor(age_range)18-23 15.477046 < 2.2e-16 ***
## factor(age_range)24-30 13.732843 < 2.2e-16 ***
## factor(age_range)31-40 11.791483 < 2.2e-16 ***
## factor(age_range)41-49 8.148034 3.7257e-16 ***
## factor(age_range)50+ 4.080037 4.5051e-05 ***
## factor(fre_country_region)北方 -3.018562 2.5402e-03 **
## factor(fre_country_region)南方 -4.033192 5.5050e-05 ***
## factor(fre_city_level)二线城市 3.699129 2.1641e-04 ***
## factor(fre_city_level)三线城市 3.140681 1.6859e-03 **
## factor(fre_city_level)四线城市 2.561227 1.0431e-02 *
## factor(fre_city_level)五线城市 1.297749 1.9438e-01
## factor(fre_city_level)新一线城市 4.468029 7.8999e-06 ***
## factor(fre_city_level)一线城市 4.518782 6.2242e-06 ***
## factor(year)2023 2.068459 3.8599e-02 *
## factor(year)2024 7.347903 2.0231e-13 ***
## factor(month)2 -3.400673 6.7237e-04 ***
## factor(month)3 -0.304385 7.6083e-01
## factor(month)4 -5.100794 3.3862e-07 ***
## factor(month)5 -0.234060 8.1494e-01
## factor(month)6 3.104297 1.9077e-03 **
## factor(month)7 1.396525 1.6256e-01
## factor(month)8 3.299405 9.6912e-04 ***
## factor(month)9 3.020259 2.5260e-03 **
## factor(month)10 4.193917 2.7433e-05 ***
## factor(month)11 4.030050 5.5791e-05 ***
## factor(month)12 4.110578 3.9487e-05 ***
## factor(day)2 -1.418466 1.5606e-01
## factor(day)3 1.081180 2.7962e-01
## factor(day)4 1.096877 2.7270e-01
## factor(day)5 0.354728 7.2279e-01
## factor(day)6 -0.225999 8.2120e-01
## factor(day)7 0.807318 4.1948e-01
## factor(day)8 2.078406 3.7674e-02 *
## factor(day)9 0.294270 7.6855e-01
## factor(day)10 2.483533 1.3010e-02 *
## factor(day)11 1.591933 1.1140e-01
## factor(day)12 2.062702 3.9143e-02 *
## factor(day)13 0.665434 5.0577e-01
## factor(day)14 -0.177165 8.5938e-01
## factor(day)15 0.075546 9.3978e-01
## factor(day)16 1.167382 2.4306e-01
## factor(day)17 1.188906 2.3448e-01
## factor(day)18 -0.356062 7.2179e-01
## factor(day)19 0.830922 4.0602e-01
## factor(day)20 1.230532 2.1850e-01
## factor(day)21 0.389986 6.9655e-01
## factor(day)22 2.097937 3.5912e-02 *
## factor(day)23 1.055271 2.9130e-01
## factor(day)24 1.363443 1.7274e-01
## factor(day)25 0.975833 3.2915e-01
## factor(day)26 0.214346 8.3028e-01
## factor(day)27 1.435603 1.5112e-01
## factor(day)28 0.888275 3.7439e-01
## factor(day)29 1.153676 2.4863e-01
## factor(day)30 2.291818 2.1917e-02 *
## factor(day)31 0.680463 4.9621e-01
## factor(fans_piar_cat)Small vs Big:is_cooperative 4.111147 3.9390e-05 ***
## factor(fans_piar_cat)Big vs Small:is_cooperative -1.046095 2.9552e-01
## factor(fans_piar_cat)Big vs Big:is_cooperative -6.081020 1.1969e-09 ***
## ... 1 variable was removed because of collinearity (factor(age_range)UNKNOWN)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 1.88591 Adj. R2: 0.201569
plot_predictions_with_ci(model, data)
## `summarise()` has grouped output by 'fans_piar_cat'. You can override using the
## `.groups` argument.
### (3) plot fans
# 画图
model <- feols(log(total_cost_amt+1) ~ factor(fans_piar_cat)+ factor(gender) +
factor(age_range) + factor(fre_country_region) + factor(fre_city_level)
# + factor(other_gender) + factor(other_age_range) + factor(other_fre_country_region) + factor(other_fre_city_level)
+ factor(year) + factor(month) + factor(day), data = data,vcov = ~author_id)
## The variable 'factor(age_range)UNKNOWN' has been removed because of collinearity (see $collin.var).
summary(model)
## OLS estimation, Dep. Var.: log(total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.978422 0.118102 8.284572 < 2.2e-16
## factor(fans_piar_cat)Small vs Big 1.262808 0.030145 41.891434 < 2.2e-16
## factor(fans_piar_cat)Big vs Small 1.415540 0.033320 42.483585 < 2.2e-16
## factor(fans_piar_cat)Big vs Big 1.900671 0.030254 62.823431 < 2.2e-16
## factor(gender)M -0.803219 0.010065 -79.807131 < 2.2e-16
## factor(gender)U -0.361473 0.220712 -1.637759 1.0147e-01
## factor(age_range)12-17 0.427888 0.048411 8.838605 < 2.2e-16
## factor(age_range)18-23 0.752824 0.048507 15.519972 < 2.2e-16
## factor(age_range)24-30 0.695738 0.051012 13.638597 < 2.2e-16
## factor(age_range)31-40 0.563947 0.048290 11.678382 < 2.2e-16
## factor(age_range)41-49 0.386273 0.048175 8.018080 1.0814e-15
## factor(age_range)50+ 0.187039 0.047748 3.917186 8.9627e-05
## factor(fre_country_region)北方 -0.455441 0.150791 -3.020342 2.5253e-03
## factor(fre_country_region)南方 -0.605623 0.150698 -4.018792 5.8525e-05
## factor(fre_city_level)二线城市 0.400760 0.108908 3.679808 2.3349e-04
## factor(fre_city_level)三线城市 0.340599 0.108540 3.138006 1.7013e-03
## factor(fre_city_level)四线城市 0.277109 0.108277 2.559259 1.0490e-02
## factor(fre_city_level)五线城市 0.139687 0.107987 1.293552 1.9582e-01
## factor(fre_city_level)新一线城市 0.489663 0.110213 4.442870 8.8827e-06
## factor(fre_city_level)一线城市 0.504464 0.112174 4.497152 6.8919e-06
## factor(year)2023 0.028665 0.012185 2.352538 1.8647e-02
## factor(year)2024 0.101850 0.012809 7.951690 1.8520e-15
## factor(month)2 -0.061690 0.018051 -3.417502 6.3215e-04
## factor(month)3 -0.006236 0.018657 -0.334240 7.3820e-01
## factor(month)4 -0.096554 0.018619 -5.185780 2.1538e-07
## factor(month)5 -0.004818 0.019558 -0.246323 8.0543e-01
## factor(month)6 0.067520 0.021734 3.106619 1.8927e-03
## factor(month)7 0.027182 0.020639 1.317032 1.8783e-01
## factor(month)8 0.067171 0.020727 3.240745 1.1924e-03
## factor(month)9 0.073570 0.024325 3.024416 2.4915e-03
## factor(month)10 0.102084 0.024351 4.192215 2.7640e-05
## factor(month)11 0.099890 0.024565 4.066424 4.7763e-05
## factor(month)12 0.102632 0.024332 4.217920 2.4670e-05
## factor(day)2 -0.044847 0.032108 -1.396765 1.6249e-01
## factor(day)3 0.034823 0.032487 1.071900 2.8377e-01
## factor(day)4 0.036567 0.032693 1.118474 2.6337e-01
## factor(day)5 0.012510 0.032366 0.386525 6.9911e-01
## factor(day)6 -0.007505 0.032748 -0.229170 8.1874e-01
## factor(day)7 0.026843 0.033293 0.806249 4.2010e-01
## factor(day)8 0.070937 0.033396 2.124114 3.3662e-02
## factor(day)9 0.012171 0.033121 0.367457 7.1328e-01
## factor(day)10 0.083709 0.033328 2.511674 1.2017e-02
## factor(day)11 0.053376 0.033015 1.616715 1.0594e-01
## factor(day)12 0.070459 0.033220 2.120936 3.3929e-02
## factor(day)13 0.022759 0.033042 0.688806 4.9095e-01
## factor(day)14 -0.004713 0.032448 -0.145250 8.8451e-01
## factor(day)15 0.004294 0.033076 0.129818 8.9671e-01
## factor(day)16 0.039831 0.033192 1.200030 2.3013e-01
## factor(day)17 0.040634 0.033064 1.228940 2.1910e-01
## factor(day)18 -0.010520 0.033158 -0.317261 7.5105e-01
## factor(day)19 0.029326 0.034046 0.861365 3.8904e-01
## factor(day)20 0.043537 0.033870 1.285393 1.9866e-01
## factor(day)21 0.014089 0.033370 0.422225 6.7286e-01
## factor(day)22 0.071549 0.033592 2.129954 3.3177e-02
## factor(day)23 0.036611 0.032993 1.109663 2.6715e-01
## factor(day)24 0.045872 0.033279 1.378410 1.6808e-01
## factor(day)25 0.035575 0.033859 1.050654 2.9342e-01
## factor(day)26 0.008107 0.033584 0.241402 8.0924e-01
## factor(day)27 0.047911 0.033164 1.444682 1.4855e-01
## factor(day)28 0.030503 0.033347 0.914706 3.6035e-01
## factor(day)29 0.038703 0.033739 1.147159 2.5132e-01
## factor(day)30 0.078847 0.034796 2.265966 2.3455e-02
## factor(day)31 0.027195 0.038727 0.702237 4.8253e-01
##
## (Intercept) ***
## factor(fans_piar_cat)Small vs Big ***
## factor(fans_piar_cat)Big vs Small ***
## factor(fans_piar_cat)Big vs Big ***
## factor(gender)M ***
## factor(gender)U
## factor(age_range)12-17 ***
## factor(age_range)18-23 ***
## factor(age_range)24-30 ***
## factor(age_range)31-40 ***
## factor(age_range)41-49 ***
## factor(age_range)50+ ***
## factor(fre_country_region)北方 **
## factor(fre_country_region)南方 ***
## factor(fre_city_level)二线城市 ***
## factor(fre_city_level)三线城市 **
## factor(fre_city_level)四线城市 *
## factor(fre_city_level)五线城市
## factor(fre_city_level)新一线城市 ***
## factor(fre_city_level)一线城市 ***
## factor(year)2023 *
## factor(year)2024 ***
## factor(month)2 ***
## factor(month)3
## factor(month)4 ***
## factor(month)5
## factor(month)6 **
## factor(month)7
## factor(month)8 **
## factor(month)9 **
## factor(month)10 ***
## factor(month)11 ***
## factor(month)12 ***
## factor(day)2
## factor(day)3
## factor(day)4
## factor(day)5
## factor(day)6
## factor(day)7
## factor(day)8 *
## factor(day)9
## factor(day)10 *
## factor(day)11
## factor(day)12 *
## factor(day)13
## factor(day)14
## factor(day)15
## factor(day)16
## factor(day)17
## factor(day)18
## factor(day)19
## factor(day)20
## factor(day)21
## factor(day)22 *
## factor(day)23
## factor(day)24
## factor(day)25
## factor(day)26
## factor(day)27
## factor(day)28
## factor(day)29
## factor(day)30 *
## factor(day)31
## ... 1 variable was removed because of collinearity (factor(age_range)UNKNOWN)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 1.88685 Adj. R2: 0.200784
plot_predictions_fans(model, data)
# 画图
model <- feols(log(total_cost_amt+1) ~ is_cooperative+ factor(gender) +
factor(age_range) + factor(fre_country_region) + factor(fre_city_level)
# + factor(other_gender) + factor(other_age_range) + factor(other_fre_country_region) + factor(other_fre_city_level)
+ factor(year) + factor(month) + factor(day), data = data,vcov = ~author_id)
## The variable 'factor(age_range)UNKNOWN' has been removed because of collinearity (see $collin.var).
summary(model)
## OLS estimation, Dep. Var.: log(total_cost_amt + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.028121 0.123580 8.319441 < 2.2e-16
## is_cooperative 0.126833 0.010114 12.540397 < 2.2e-16
## factor(gender)M -1.143450 0.010947 -104.454006 < 2.2e-16
## factor(gender)U -0.602824 0.223932 -2.692000 7.1032e-03
## factor(age_range)12-17 0.470789 0.051963 9.060101 < 2.2e-16
## factor(age_range)18-23 0.948594 0.051948 18.260625 < 2.2e-16
## factor(age_range)24-30 1.166395 0.054341 21.464245 < 2.2e-16
## factor(age_range)31-40 0.997723 0.051857 19.239921 < 2.2e-16
## factor(age_range)41-49 0.654844 0.051834 12.633451 < 2.2e-16
## factor(age_range)50+ 0.347546 0.051398 6.761828 1.3674e-11
## factor(fre_country_region)北方 -0.549686 0.180595 -3.043754 2.3369e-03
## factor(fre_country_region)南方 -0.799181 0.180380 -4.430533 9.4064e-06
## factor(fre_city_level)二线城市 0.578078 0.144159 4.009994 6.0748e-05
## factor(fre_city_level)三线城市 0.488447 0.143985 3.392343 6.9315e-04
## factor(fre_city_level)四线城市 0.366934 0.143843 2.550945 1.0744e-02
## factor(fre_city_level)五线城市 0.163583 0.143639 1.138850 2.5477e-01
## factor(fre_city_level)新一线城市 0.682887 0.145132 4.705278 2.5374e-06
## factor(fre_city_level)一线城市 0.688180 0.147391 4.669086 3.0279e-06
## factor(year)2023 0.124777 0.012617 9.889729 < 2.2e-16
## factor(year)2024 0.205715 0.013358 15.400499 < 2.2e-16
## factor(month)2 -0.076380 0.019161 -3.986187 6.7174e-05
## factor(month)3 0.040513 0.019575 2.069650 3.8487e-02
## factor(month)4 -0.051500 0.019623 -2.624472 8.6792e-03
## factor(month)5 0.051812 0.020601 2.515039 1.1903e-02
## factor(month)6 0.150903 0.022913 6.585988 4.5329e-11
## factor(month)7 0.099215 0.021671 4.578179 4.6940e-06
## factor(month)8 0.142373 0.021784 6.535726 6.3494e-11
## factor(month)9 0.193240 0.025397 7.608738 2.7832e-14
## factor(month)10 0.201973 0.025461 7.932638 2.1594e-15
## factor(month)11 0.201412 0.025753 7.820794 5.2815e-15
## factor(month)12 0.178231 0.025563 6.972355 3.1290e-12
## factor(day)2 -0.068836 0.033736 -2.040417 4.1311e-02
## factor(day)3 0.013732 0.034197 0.401546 6.8802e-01
## factor(day)4 0.041044 0.034235 1.198893 2.3057e-01
## factor(day)5 0.012336 0.033943 0.363431 7.1628e-01
## factor(day)6 0.018577 0.034313 0.541399 5.8823e-01
## factor(day)7 0.047412 0.034869 1.359698 1.7393e-01
## factor(day)8 0.083913 0.034921 2.402967 1.6264e-02
## factor(day)9 0.016257 0.034600 0.469848 6.3846e-01
## factor(day)10 0.088220 0.034981 2.521917 1.1673e-02
## factor(day)11 0.069953 0.034498 2.027727 4.2590e-02
## factor(day)12 0.082192 0.034766 2.364114 1.8074e-02
## factor(day)13 0.047086 0.034718 1.356261 1.7502e-01
## factor(day)14 0.015079 0.034017 0.443289 6.5756e-01
## factor(day)15 0.015607 0.034752 0.449103 6.5336e-01
## factor(day)16 0.051097 0.034671 1.473771 1.4055e-01
## factor(day)17 0.050944 0.034773 1.465061 1.4291e-01
## factor(day)18 0.002629 0.034804 0.075529 9.3979e-01
## factor(day)19 0.050680 0.035574 1.424613 1.5427e-01
## factor(day)20 0.061504 0.035430 1.735934 8.2578e-02
## factor(day)21 0.052921 0.034895 1.516603 1.2937e-01
## factor(day)22 0.085425 0.035101 2.433722 1.4946e-02
## factor(day)23 0.055525 0.034530 1.608011 1.0783e-01
## factor(day)24 0.065155 0.034713 1.876942 6.0528e-02
## factor(day)25 0.045888 0.035444 1.294687 1.9543e-01
## factor(day)26 0.022220 0.034955 0.635674 5.2499e-01
## factor(day)27 0.065234 0.034740 1.877755 6.0417e-02
## factor(day)28 0.036764 0.035077 1.048106 2.9459e-01
## factor(day)29 0.045714 0.035486 1.288211 1.9767e-01
## factor(day)30 0.074960 0.036314 2.064217 3.8999e-02
## factor(day)31 0.020096 0.040821 0.492293 6.2251e-01
##
## (Intercept) ***
## is_cooperative ***
## factor(gender)M ***
## factor(gender)U **
## factor(age_range)12-17 ***
## factor(age_range)18-23 ***
## factor(age_range)24-30 ***
## factor(age_range)31-40 ***
## factor(age_range)41-49 ***
## factor(age_range)50+ ***
## factor(fre_country_region)北方 **
## factor(fre_country_region)南方 ***
## factor(fre_city_level)二线城市 ***
## factor(fre_city_level)三线城市 ***
## factor(fre_city_level)四线城市 *
## factor(fre_city_level)五线城市
## factor(fre_city_level)新一线城市 ***
## factor(fre_city_level)一线城市 ***
## factor(year)2023 ***
## factor(year)2024 ***
## factor(month)2 ***
## factor(month)3 *
## factor(month)4 **
## factor(month)5 *
## factor(month)6 ***
## factor(month)7 ***
## factor(month)8 ***
## factor(month)9 ***
## factor(month)10 ***
## factor(month)11 ***
## factor(month)12 ***
## factor(day)2 *
## factor(day)3
## factor(day)4
## factor(day)5
## factor(day)6
## factor(day)7
## factor(day)8 *
## factor(day)9
## factor(day)10 *
## factor(day)11 *
## factor(day)12 *
## factor(day)13
## factor(day)14
## factor(day)15
## factor(day)16
## factor(day)17
## factor(day)18
## factor(day)19
## factor(day)20 .
## factor(day)21
## factor(day)22 *
## factor(day)23
## factor(day)24 .
## factor(day)25
## factor(day)26
## factor(day)27 .
## factor(day)28
## factor(day)29
## factor(day)30 *
## factor(day)31
## ... 1 variable was removed because of collinearity (factor(age_range)UNKNOWN)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 1.9745 Adj. R2: 0.124825
plot_predictions_cate(model, data)
# 使用例子
# Define the formula for the model
model_formula <- log(total_cost_amt + 1) ~ factor(fans_piar_cat_temp)
# Call the function with the data and formula
plot_continue_cutoff(data, model_formula)
library(ggplot2)
library(dplyr)
library(RColorBrewer)
# Step 1: Set the ranges for the fan counts (fewer, broader bins)
data <- data %>% mutate(fans_new_range = case_when(
before_fans_count > 0 & before_fans_count <= 1000 ~ "0-1k",
before_fans_count > 1000 & before_fans_count <= 10000 ~ "1k-10k",
before_fans_count > 10000 & before_fans_count <= 100000 ~ "10k-100k",
before_fans_count > 100000 & before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">1M"
))
data <- data %>% mutate(other_fans_new_range = case_when(
other_before_fans_count > 0 & other_before_fans_count <= 1000 ~ "0-1k",
other_before_fans_count > 1000 & other_before_fans_count <= 10000 ~ "1k-10k",
other_before_fans_count > 10000 & other_before_fans_count <= 100000 ~ "10k-100k",
other_before_fans_count > 100000 & other_before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">1M"
))
# Step 2: Set the correct order for the categories
data <- data %>%
mutate(fans_new_range = factor(fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")),
other_fans_new_range = factor(other_fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")))
# Step 3: Aggregate the data to calculate the mean and confidence intervals, handling 0% cases
agg_data <- data %>%
group_by(fans_new_range, other_fans_new_range) %>%
summarize(
count = n(), # 计算每个组合的数量
Y = ifelse(count > 0, mean(log(total_cost_amt + 1), na.rm = TRUE), NA), # 如果有数据,计算均值;否则设为NA
ci_lower = ifelse(count > 0, Y - qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA), # 如果有数据,计算CI;否则设为NA
ci_upper = ifelse(count > 0, Y + qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA) # 如果有数据,计算CI;否则设为NA
) %>%
ungroup() %>%
mutate(percentage = count / sum(count) * 100)
## `summarise()` has grouped output by 'fans_new_range'. You can override using
## the `.groups` argument.
# Step 4: Plot the data in a heatmap-like format with custom colors and percentage labels
ggplot(agg_data, aes(x = fans_new_range, y = other_fans_new_range, fill = Y)) +
geom_tile() +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
geom_text(aes(label = paste0(round(percentage, 2), "%")), color = "black", size = 4) +
scale_fill_gradientn(colors = c("yellow", "orange", "darkorange", "red", "darkred"),
values = scales::rescale(c(0, 0.05, 0.1, 0.15, 0.2)),
#limits = c(0, 0.2),
na.value = "white") +
labs(title = "Average Fan Total Cost Amount by Fan Ranges",
x = "Fans Range",
y = "Other Fans Range",
fill = "Avg Fan Total Cost Amt") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
summary(data$net_follow_fans)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -254.000 0.000 0.000 0.958 0.000 5064.000
model <- feols(log(net_follow_fans+255) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative + factor(gender) +
factor(age_range) + factor(fre_country_region) + factor(fre_city_level)
# + factor(other_gender) + factor(other_age_range) + factor(other_fre_country_region) + factor(other_fre_city_level)
+ factor(year) + factor(month) + factor(day), data = data,vcov = ~author_id)
## The variable 'factor(age_range)UNKNOWN' has been removed because of collinearity (see $collin.var).
summary(model)
## OLS estimation, Dep. Var.: log(net_follow_fans + 255)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error
## (Intercept) 5.54926853 0.005357
## factor(fans_piar_cat)Small vs Big 0.00310937 0.000559
## factor(fans_piar_cat)Big vs Small 0.00471999 0.000628
## factor(fans_piar_cat)Big vs Big 0.00870183 0.001105
## is_cooperative -0.00032314 0.000084
## factor(gender)M -0.00001403 0.000216
## factor(gender)U -0.00782178 0.005364
## factor(age_range)12-17 -0.00068278 0.000319
## factor(age_range)18-23 0.00018221 0.000361
## factor(age_range)24-30 0.00034418 0.000433
## factor(age_range)31-40 0.00073711 0.000383
## factor(age_range)41-49 0.00035842 0.000382
## factor(age_range)50+ -0.00077930 0.000373
## factor(fre_country_region)北方 -0.00723873 0.005506
## factor(fre_country_region)南方 -0.00839764 0.005497
## factor(fre_city_level)二线城市 0.00071500 0.001204
## factor(fre_city_level)三线城市 0.00068485 0.001199
## factor(fre_city_level)四线城市 -0.00001873 0.001181
## factor(fre_city_level)五线城市 -0.00056538 0.001182
## factor(fre_city_level)新一线城市 0.00154712 0.001251
## factor(fre_city_level)一线城市 0.00204473 0.001359
## factor(year)2023 0.00156544 0.000269
## factor(year)2024 0.00151425 0.000230
## factor(month)2 0.00017248 0.000316
## factor(month)3 -0.00035825 0.000318
## factor(month)4 -0.00010432 0.000348
## factor(month)5 0.00034042 0.000391
## factor(month)6 0.00008786 0.000413
## factor(month)7 0.00047157 0.000419
## factor(month)8 0.00021534 0.000383
## factor(month)9 -0.00006035 0.000378
## factor(month)10 -0.00020540 0.000433
## factor(month)11 -0.00023685 0.000758
## factor(month)12 0.00034162 0.000479
## factor(day)2 -0.00046852 0.000525
## factor(day)3 -0.00055164 0.000513
## factor(day)4 -0.00051286 0.000598
## factor(day)5 0.00034641 0.000778
## factor(day)6 -0.00062493 0.000578
## factor(day)7 -0.00092072 0.000541
## factor(day)8 -0.00020119 0.000681
## factor(day)9 -0.00103093 0.000537
## factor(day)10 -0.00072589 0.000624
## factor(day)11 -0.00091129 0.000562
## factor(day)12 -0.00033193 0.000614
## factor(day)13 -0.00079138 0.000521
## factor(day)14 -0.00072192 0.000657
## factor(day)15 -0.00078020 0.000810
## factor(day)16 -0.00043008 0.000654
## factor(day)17 0.00002203 0.000625
## factor(day)18 -0.00007449 0.000627
## factor(day)19 0.00037366 0.000631
## factor(day)20 -0.00022798 0.000652
## factor(day)21 0.00038472 0.000724
## factor(day)22 -0.00000760 0.000603
## factor(day)23 -0.00018045 0.000595
## factor(day)24 -0.00085202 0.001140
## factor(day)25 -0.00019171 0.000663
## factor(day)26 0.00003886 0.000587
## factor(day)27 0.00011023 0.000587
## factor(day)28 -0.00052999 0.000739
## factor(day)29 -0.00009297 0.000683
## factor(day)30 0.00041275 0.000905
## factor(day)31 -0.00061214 0.000601
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.00002739 0.000815
## factor(fans_piar_cat)Big vs Small:is_cooperative -0.00165071 0.000882
## factor(fans_piar_cat)Big vs Big:is_cooperative 0.00305420 0.002065
## t value Pr(>|t|)
## (Intercept) 1035.795363 < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big 5.566626 2.6014e-08 ***
## factor(fans_piar_cat)Big vs Small 7.517540 5.6111e-14 ***
## factor(fans_piar_cat)Big vs Big 7.873351 3.4745e-15 ***
## is_cooperative -3.858366 1.1419e-04 ***
## factor(gender)M -0.064829 9.4831e-01
## factor(gender)U -1.458236 1.4478e-01
## factor(age_range)12-17 -2.141862 3.2206e-02 *
## factor(age_range)18-23 0.504780 6.1371e-01
## factor(age_range)24-30 0.795647 4.2624e-01
## factor(age_range)31-40 1.927020 5.3979e-02 .
## factor(age_range)41-49 0.939200 3.4763e-01
## factor(age_range)50+ -2.090572 3.6568e-02 *
## factor(fre_country_region)北方 -1.314758 1.8859e-01
## factor(fre_country_region)南方 -1.527587 1.2662e-01
## factor(fre_city_level)二线城市 0.593810 5.5264e-01
## factor(fre_city_level)三线城市 0.571250 5.6783e-01
## factor(fre_city_level)四线城市 -0.015857 9.8735e-01
## factor(fre_city_level)五线城市 -0.478310 6.3243e-01
## factor(fre_city_level)新一线城市 1.236275 2.1636e-01
## factor(fre_city_level)一线城市 1.505066 1.3231e-01
## factor(year)2023 5.824228 5.7489e-09 ***
## factor(year)2024 6.588751 4.4494e-11 ***
## factor(month)2 0.545113 5.8568e-01
## factor(month)3 -1.126621 2.5990e-01
## factor(month)4 -0.300045 7.6414e-01
## factor(month)5 0.870749 3.8389e-01
## factor(month)6 0.212602 8.3164e-01
## factor(month)7 1.126359 2.6002e-01
## factor(month)8 0.562896 5.7351e-01
## factor(month)9 -0.159857 8.7299e-01
## factor(month)10 -0.474804 6.3493e-01
## factor(month)11 -0.312371 7.5476e-01
## factor(month)12 0.713009 4.7584e-01
## factor(day)2 -0.892052 3.7237e-01
## factor(day)3 -1.075404 2.8220e-01
## factor(day)4 -0.857166 3.9135e-01
## factor(day)5 0.445504 6.5596e-01
## factor(day)6 -1.081916 2.7929e-01
## factor(day)7 -1.702888 8.8591e-02 .
## factor(day)8 -0.295563 7.6756e-01
## factor(day)9 -1.919458 5.4928e-02 .
## factor(day)10 -1.163700 2.4455e-01
## factor(day)11 -1.622030 1.0480e-01
## factor(day)12 -0.540457 5.8888e-01
## factor(day)13 -1.519092 1.2874e-01
## factor(day)14 -1.098756 2.7188e-01
## factor(day)15 -0.963710 3.3519e-01
## factor(day)16 -0.657136 5.1109e-01
## factor(day)17 0.035226 9.7190e-01
## factor(day)18 -0.118870 9.0538e-01
## factor(day)19 0.591880 5.5393e-01
## factor(day)20 -0.349660 7.2659e-01
## factor(day)21 0.531612 5.9500e-01
## factor(day)22 -0.012610 9.8994e-01
## factor(day)23 -0.303263 7.6169e-01
## factor(day)24 -0.747352 4.5485e-01
## factor(day)25 -0.289263 7.7238e-01
## factor(day)26 0.066170 9.4724e-01
## factor(day)27 0.187823 8.5102e-01
## factor(day)28 -0.716860 4.7346e-01
## factor(day)29 -0.136156 8.9170e-01
## factor(day)30 0.456153 6.4828e-01
## factor(day)31 -1.018342 3.0852e-01
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.033595 9.7320e-01
## factor(fans_piar_cat)Big vs Small:is_cooperative -1.870989 6.1349e-02 .
## factor(fans_piar_cat)Big vs Big:is_cooperative 1.479063 1.3913e-01
## ... 1 variable was removed because of collinearity (factor(age_range)UNKNOWN)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.040382 Adj. R2: 0.00695
# 画图
model <- feols(log(net_follow_fans+255) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative+ factor(gender) +
factor(age_range) + factor(fre_country_region) + factor(fre_city_level)
# + factor(other_gender) + factor(other_age_range) + factor(other_fre_country_region) + factor(other_fre_city_level)
+ factor(year) + factor(month) + factor(day), data = data,vcov = ~author_id)
## The variable 'factor(age_range)UNKNOWN' has been removed because of collinearity (see $collin.var).
summary(model)
## OLS estimation, Dep. Var.: log(net_follow_fans + 255)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error
## (Intercept) 5.54926853 0.005357
## factor(fans_piar_cat)Small vs Big 0.00310937 0.000559
## factor(fans_piar_cat)Big vs Small 0.00471999 0.000628
## factor(fans_piar_cat)Big vs Big 0.00870183 0.001105
## is_cooperative -0.00032314 0.000084
## factor(gender)M -0.00001403 0.000216
## factor(gender)U -0.00782178 0.005364
## factor(age_range)12-17 -0.00068278 0.000319
## factor(age_range)18-23 0.00018221 0.000361
## factor(age_range)24-30 0.00034418 0.000433
## factor(age_range)31-40 0.00073711 0.000383
## factor(age_range)41-49 0.00035842 0.000382
## factor(age_range)50+ -0.00077930 0.000373
## factor(fre_country_region)北方 -0.00723873 0.005506
## factor(fre_country_region)南方 -0.00839764 0.005497
## factor(fre_city_level)二线城市 0.00071500 0.001204
## factor(fre_city_level)三线城市 0.00068485 0.001199
## factor(fre_city_level)四线城市 -0.00001873 0.001181
## factor(fre_city_level)五线城市 -0.00056538 0.001182
## factor(fre_city_level)新一线城市 0.00154712 0.001251
## factor(fre_city_level)一线城市 0.00204473 0.001359
## factor(year)2023 0.00156544 0.000269
## factor(year)2024 0.00151425 0.000230
## factor(month)2 0.00017248 0.000316
## factor(month)3 -0.00035825 0.000318
## factor(month)4 -0.00010432 0.000348
## factor(month)5 0.00034042 0.000391
## factor(month)6 0.00008786 0.000413
## factor(month)7 0.00047157 0.000419
## factor(month)8 0.00021534 0.000383
## factor(month)9 -0.00006035 0.000378
## factor(month)10 -0.00020540 0.000433
## factor(month)11 -0.00023685 0.000758
## factor(month)12 0.00034162 0.000479
## factor(day)2 -0.00046852 0.000525
## factor(day)3 -0.00055164 0.000513
## factor(day)4 -0.00051286 0.000598
## factor(day)5 0.00034641 0.000778
## factor(day)6 -0.00062493 0.000578
## factor(day)7 -0.00092072 0.000541
## factor(day)8 -0.00020119 0.000681
## factor(day)9 -0.00103093 0.000537
## factor(day)10 -0.00072589 0.000624
## factor(day)11 -0.00091129 0.000562
## factor(day)12 -0.00033193 0.000614
## factor(day)13 -0.00079138 0.000521
## factor(day)14 -0.00072192 0.000657
## factor(day)15 -0.00078020 0.000810
## factor(day)16 -0.00043008 0.000654
## factor(day)17 0.00002203 0.000625
## factor(day)18 -0.00007449 0.000627
## factor(day)19 0.00037366 0.000631
## factor(day)20 -0.00022798 0.000652
## factor(day)21 0.00038472 0.000724
## factor(day)22 -0.00000760 0.000603
## factor(day)23 -0.00018045 0.000595
## factor(day)24 -0.00085202 0.001140
## factor(day)25 -0.00019171 0.000663
## factor(day)26 0.00003886 0.000587
## factor(day)27 0.00011023 0.000587
## factor(day)28 -0.00052999 0.000739
## factor(day)29 -0.00009297 0.000683
## factor(day)30 0.00041275 0.000905
## factor(day)31 -0.00061214 0.000601
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.00002739 0.000815
## factor(fans_piar_cat)Big vs Small:is_cooperative -0.00165071 0.000882
## factor(fans_piar_cat)Big vs Big:is_cooperative 0.00305420 0.002065
## t value Pr(>|t|)
## (Intercept) 1035.795363 < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big 5.566626 2.6014e-08 ***
## factor(fans_piar_cat)Big vs Small 7.517540 5.6111e-14 ***
## factor(fans_piar_cat)Big vs Big 7.873351 3.4745e-15 ***
## is_cooperative -3.858366 1.1419e-04 ***
## factor(gender)M -0.064829 9.4831e-01
## factor(gender)U -1.458236 1.4478e-01
## factor(age_range)12-17 -2.141862 3.2206e-02 *
## factor(age_range)18-23 0.504780 6.1371e-01
## factor(age_range)24-30 0.795647 4.2624e-01
## factor(age_range)31-40 1.927020 5.3979e-02 .
## factor(age_range)41-49 0.939200 3.4763e-01
## factor(age_range)50+ -2.090572 3.6568e-02 *
## factor(fre_country_region)北方 -1.314758 1.8859e-01
## factor(fre_country_region)南方 -1.527587 1.2662e-01
## factor(fre_city_level)二线城市 0.593810 5.5264e-01
## factor(fre_city_level)三线城市 0.571250 5.6783e-01
## factor(fre_city_level)四线城市 -0.015857 9.8735e-01
## factor(fre_city_level)五线城市 -0.478310 6.3243e-01
## factor(fre_city_level)新一线城市 1.236275 2.1636e-01
## factor(fre_city_level)一线城市 1.505066 1.3231e-01
## factor(year)2023 5.824228 5.7489e-09 ***
## factor(year)2024 6.588751 4.4494e-11 ***
## factor(month)2 0.545113 5.8568e-01
## factor(month)3 -1.126621 2.5990e-01
## factor(month)4 -0.300045 7.6414e-01
## factor(month)5 0.870749 3.8389e-01
## factor(month)6 0.212602 8.3164e-01
## factor(month)7 1.126359 2.6002e-01
## factor(month)8 0.562896 5.7351e-01
## factor(month)9 -0.159857 8.7299e-01
## factor(month)10 -0.474804 6.3493e-01
## factor(month)11 -0.312371 7.5476e-01
## factor(month)12 0.713009 4.7584e-01
## factor(day)2 -0.892052 3.7237e-01
## factor(day)3 -1.075404 2.8220e-01
## factor(day)4 -0.857166 3.9135e-01
## factor(day)5 0.445504 6.5596e-01
## factor(day)6 -1.081916 2.7929e-01
## factor(day)7 -1.702888 8.8591e-02 .
## factor(day)8 -0.295563 7.6756e-01
## factor(day)9 -1.919458 5.4928e-02 .
## factor(day)10 -1.163700 2.4455e-01
## factor(day)11 -1.622030 1.0480e-01
## factor(day)12 -0.540457 5.8888e-01
## factor(day)13 -1.519092 1.2874e-01
## factor(day)14 -1.098756 2.7188e-01
## factor(day)15 -0.963710 3.3519e-01
## factor(day)16 -0.657136 5.1109e-01
## factor(day)17 0.035226 9.7190e-01
## factor(day)18 -0.118870 9.0538e-01
## factor(day)19 0.591880 5.5393e-01
## factor(day)20 -0.349660 7.2659e-01
## factor(day)21 0.531612 5.9500e-01
## factor(day)22 -0.012610 9.8994e-01
## factor(day)23 -0.303263 7.6169e-01
## factor(day)24 -0.747352 4.5485e-01
## factor(day)25 -0.289263 7.7238e-01
## factor(day)26 0.066170 9.4724e-01
## factor(day)27 0.187823 8.5102e-01
## factor(day)28 -0.716860 4.7346e-01
## factor(day)29 -0.136156 8.9170e-01
## factor(day)30 0.456153 6.4828e-01
## factor(day)31 -1.018342 3.0852e-01
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.033595 9.7320e-01
## factor(fans_piar_cat)Big vs Small:is_cooperative -1.870989 6.1349e-02 .
## factor(fans_piar_cat)Big vs Big:is_cooperative 1.479063 1.3913e-01
## ... 1 variable was removed because of collinearity (factor(age_range)UNKNOWN)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.040382 Adj. R2: 0.00695
plot_predictions_with_ci(model, data)
## `summarise()` has grouped output by 'fans_piar_cat'. You can override using the
## `.groups` argument.
### (3) plot fans
# 画图
model <- feols(log(net_follow_fans+255) ~ factor(fans_piar_cat)+ factor(gender) +
factor(age_range) + factor(fre_country_region) + factor(fre_city_level)
# + factor(other_gender) + factor(other_age_range) + factor(other_fre_country_region) + factor(other_fre_city_level)
+ factor(year) + factor(month) + factor(day), data = data,vcov = ~author_id)
## The variable 'factor(age_range)UNKNOWN' has been removed because of collinearity (see $collin.var).
summary(model)
## OLS estimation, Dep. Var.: log(net_follow_fans + 255)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.549132 0.005359 1035.490282 < 2.2e-16
## factor(fans_piar_cat)Small vs Big 0.003104 0.000431 7.196560 6.2025e-13
## factor(fans_piar_cat)Big vs Small 0.004081 0.000465 8.779429 < 2.2e-16
## factor(fans_piar_cat)Big vs Big 0.009954 0.001010 9.856011 < 2.2e-16
## factor(gender)M -0.000011 0.000215 -0.052650 9.5801e-01
## factor(gender)U -0.007876 0.005363 -1.468551 1.4196e-01
## factor(age_range)12-17 -0.000707 0.000319 -2.214903 2.6768e-02
## factor(age_range)18-23 0.000174 0.000361 0.480919 6.3058e-01
## factor(age_range)24-30 0.000359 0.000433 0.829099 4.0705e-01
## factor(age_range)31-40 0.000759 0.000383 1.981636 4.7522e-02
## factor(age_range)41-49 0.000383 0.000382 1.002433 3.1614e-01
## factor(age_range)50+ -0.000756 0.000372 -2.033610 4.1993e-02
## factor(fre_country_region)北方 -0.007213 0.005503 -1.310774 1.8994e-01
## factor(fre_country_region)南方 -0.008381 0.005494 -1.525386 1.2716e-01
## factor(fre_city_level)二线城市 0.000724 0.001191 0.607741 5.4336e-01
## factor(fre_city_level)三线城市 0.000682 0.001186 0.575019 5.6528e-01
## factor(fre_city_level)四线城市 -0.000018 0.001168 -0.015191 9.8788e-01
## factor(fre_city_level)五线城市 -0.000562 0.001169 -0.480639 6.3077e-01
## factor(fre_city_level)新一线城市 0.001558 0.001240 1.256152 2.0906e-01
## factor(fre_city_level)一线城市 0.002039 0.001347 1.513488 1.3016e-01
## factor(year)2023 0.001559 0.000268 5.826452 5.6728e-09
## factor(year)2024 0.001490 0.000228 6.539270 6.2008e-11
## factor(month)2 0.000175 0.000316 0.553212 5.8012e-01
## factor(month)3 -0.000352 0.000318 -1.107659 2.6801e-01
## factor(month)4 -0.000095 0.000347 -0.273031 7.8483e-01
## factor(month)5 0.000338 0.000390 0.867066 3.8591e-01
## factor(month)6 0.000089 0.000413 0.214654 8.3004e-01
## factor(month)7 0.000471 0.000418 1.126789 2.5983e-01
## factor(month)8 0.000217 0.000383 0.566499 5.7106e-01
## factor(month)9 -0.000067 0.000378 -0.176947 8.5955e-01
## factor(month)10 -0.000212 0.000433 -0.490411 6.2384e-01
## factor(month)11 -0.000239 0.000758 -0.314948 7.5280e-01
## factor(month)12 0.000332 0.000479 0.693351 4.8809e-01
## factor(day)2 -0.000473 0.000525 -0.900578 3.6781e-01
## factor(day)3 -0.000558 0.000513 -1.087242 2.7693e-01
## factor(day)4 -0.000511 0.000598 -0.854806 3.9266e-01
## factor(day)5 0.000336 0.000778 0.431362 6.6621e-01
## factor(day)6 -0.000630 0.000578 -1.090076 2.7568e-01
## factor(day)7 -0.000919 0.000541 -1.699545 8.9219e-02
## factor(day)8 -0.000205 0.000681 -0.301572 7.6298e-01
## factor(day)9 -0.001046 0.000537 -1.950079 5.1168e-02
## factor(day)10 -0.000728 0.000624 -1.166305 2.4349e-01
## factor(day)11 -0.000916 0.000562 -1.631417 1.0280e-01
## factor(day)12 -0.000347 0.000614 -0.565376 5.7182e-01
## factor(day)13 -0.000798 0.000521 -1.531515 1.2564e-01
## factor(day)14 -0.000722 0.000656 -1.099250 2.7166e-01
## factor(day)15 -0.000796 0.000809 -0.984669 3.2479e-01
## factor(day)16 -0.000432 0.000654 -0.659999 5.0926e-01
## factor(day)17 0.000015 0.000625 0.023265 9.8144e-01
## factor(day)18 -0.000076 0.000627 -0.121691 9.0314e-01
## factor(day)19 0.000368 0.000631 0.583463 5.5958e-01
## factor(day)20 -0.000231 0.000651 -0.355211 7.2243e-01
## factor(day)21 0.000379 0.000724 0.523846 6.0039e-01
## factor(day)22 -0.000013 0.000603 -0.021656 9.8272e-01
## factor(day)23 -0.000190 0.000595 -0.318698 7.4996e-01
## factor(day)24 -0.000854 0.001142 -0.747705 4.5464e-01
## factor(day)25 -0.000197 0.000662 -0.297913 7.6577e-01
## factor(day)26 0.000036 0.000588 0.061718 9.5079e-01
## factor(day)27 0.000109 0.000587 0.185711 8.5267e-01
## factor(day)28 -0.000541 0.000739 -0.731073 4.6474e-01
## factor(day)29 -0.000090 0.000684 -0.132203 8.9482e-01
## factor(day)30 0.000419 0.000904 0.463330 6.4313e-01
## factor(day)31 -0.000622 0.000601 -1.034911 3.0071e-01
##
## (Intercept) ***
## factor(fans_piar_cat)Small vs Big ***
## factor(fans_piar_cat)Big vs Small ***
## factor(fans_piar_cat)Big vs Big ***
## factor(gender)M
## factor(gender)U
## factor(age_range)12-17 *
## factor(age_range)18-23
## factor(age_range)24-30
## factor(age_range)31-40 *
## factor(age_range)41-49
## factor(age_range)50+ *
## factor(fre_country_region)北方
## factor(fre_country_region)南方
## factor(fre_city_level)二线城市
## factor(fre_city_level)三线城市
## factor(fre_city_level)四线城市
## factor(fre_city_level)五线城市
## factor(fre_city_level)新一线城市
## factor(fre_city_level)一线城市
## factor(year)2023 ***
## factor(year)2024 ***
## factor(month)2
## factor(month)3
## factor(month)4
## factor(month)5
## factor(month)6
## factor(month)7
## factor(month)8
## factor(month)9
## factor(month)10
## factor(month)11
## factor(month)12
## factor(day)2
## factor(day)3
## factor(day)4
## factor(day)5
## factor(day)6
## factor(day)7 .
## factor(day)8
## factor(day)9 .
## factor(day)10
## factor(day)11
## factor(day)12
## factor(day)13
## factor(day)14
## factor(day)15
## factor(day)16
## factor(day)17
## factor(day)18
## factor(day)19
## factor(day)20
## factor(day)21
## factor(day)22
## factor(day)23
## factor(day)24
## factor(day)25
## factor(day)26
## factor(day)27
## factor(day)28
## factor(day)29
## factor(day)30
## factor(day)31
## ... 1 variable was removed because of collinearity (factor(age_range)UNKNOWN)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.040385 Adj. R2: 0.006835
plot_predictions_fans(model, data)
# 画图
model <- feols(log(net_follow_fans+255) ~ is_cooperative+ factor(gender) +
factor(age_range) + factor(fre_country_region) + factor(fre_city_level)
# + factor(other_gender) + factor(other_age_range) + factor(other_fre_country_region) + factor(other_fre_city_level)
+ factor(year) + factor(month) + factor(day), data = data,vcov = ~author_id)
## The variable 'factor(age_range)UNKNOWN' has been removed because of collinearity (see $collin.var).
summary(model)
## OLS estimation, Dep. Var.: log(net_follow_fans + 255)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.549494 0.005389 1029.853782 < 2.2e-16
## is_cooperative 0.000103 0.000223 0.460942 6.4484e-01
## factor(gender)M -0.001422 0.000199 -7.161130 8.0363e-13
## factor(gender)U -0.008808 0.005391 -1.633691 1.0233e-01
## factor(age_range)12-17 -0.000500 0.000319 -1.571005 1.1618e-01
## factor(age_range)18-23 0.000931 0.000357 2.607692 9.1164e-03
## factor(age_range)24-30 0.002236 0.000421 5.306501 1.1190e-07
## factor(age_range)31-40 0.002533 0.000403 6.292268 3.1368e-10
## factor(age_range)41-49 0.001432 0.000388 3.689136 2.2509e-04
## factor(age_range)50+ -0.000209 0.000380 -0.551295 5.8143e-01
## factor(fre_country_region)北方 -0.007646 0.005578 -1.370571 1.7051e-01
## factor(fre_country_region)南方 -0.009231 0.005574 -1.656110 9.7702e-02
## factor(fre_city_level)二线城市 0.001471 0.001407 1.045190 2.9594e-01
## factor(fre_city_level)三线城市 0.001304 0.001403 0.929336 3.5272e-01
## factor(fre_city_level)四线城市 0.000350 0.001387 0.252503 8.0065e-01
## factor(fre_city_level)五线城市 -0.000489 0.001388 -0.352380 7.2455e-01
## factor(fre_city_level)新一线城市 0.002349 0.001451 1.618204 1.0562e-01
## factor(fre_city_level)一线城市 0.002794 0.001542 1.811894 7.0005e-02
## factor(year)2023 0.002014 0.000271 7.442173 9.9544e-14
## factor(year)2024 0.002003 0.000233 8.596568 < 2.2e-16
## factor(month)2 0.000112 0.000317 0.353387 7.2380e-01
## factor(month)3 -0.000161 0.000318 -0.505497 6.1321e-01
## factor(month)4 0.000075 0.000349 0.214920 8.2983e-01
## factor(month)5 0.000551 0.000394 1.398855 1.6186e-01
## factor(month)6 0.000422 0.000415 1.016626 3.0933e-01
## factor(month)7 0.000763 0.000422 1.808403 7.0546e-02
## factor(month)8 0.000522 0.000386 1.352222 1.7631e-01
## factor(month)9 0.000433 0.000376 1.151356 2.4959e-01
## factor(month)10 0.000225 0.000434 0.519240 6.0359e-01
## factor(month)11 0.000200 0.000769 0.260703 7.9432e-01
## factor(month)12 0.000668 0.000482 1.384626 1.6617e-01
## factor(day)2 -0.000565 0.000527 -1.072679 2.8342e-01
## factor(day)3 -0.000639 0.000515 -1.240319 2.1486e-01
## factor(day)4 -0.000478 0.000600 -0.796300 4.2586e-01
## factor(day)5 0.000364 0.000780 0.466283 6.4101e-01
## factor(day)6 -0.000516 0.000579 -0.890972 3.7295e-01
## factor(day)7 -0.000837 0.000541 -1.546558 1.2197e-01
## factor(day)8 -0.000101 0.000684 -0.147326 8.8287e-01
## factor(day)9 -0.001011 0.000537 -1.882289 5.9799e-02
## factor(day)10 -0.000701 0.000625 -1.120284 2.6259e-01
## factor(day)11 -0.000835 0.000562 -1.484686 1.3763e-01
## factor(day)12 -0.000288 0.000616 -0.468270 6.3959e-01
## factor(day)13 -0.000705 0.000522 -1.350547 1.7684e-01
## factor(day)14 -0.000617 0.000659 -0.936668 3.4893e-01
## factor(day)15 -0.000742 0.000810 -0.915850 3.5975e-01
## factor(day)16 -0.000380 0.000656 -0.578289 5.6307e-01
## factor(day)17 0.000072 0.000627 0.115477 9.0807e-01
## factor(day)18 -0.000029 0.000629 -0.046096 9.6323e-01
## factor(day)19 0.000486 0.000633 0.767057 4.4305e-01
## factor(day)20 -0.000158 0.000654 -0.242346 8.0851e-01
## factor(day)21 0.000557 0.000727 0.766007 4.4367e-01
## factor(day)22 0.000046 0.000606 0.075357 9.3993e-01
## factor(day)23 -0.000101 0.000596 -0.169384 8.6549e-01
## factor(day)24 -0.000757 0.001144 -0.662062 5.0793e-01
## factor(day)25 -0.000134 0.000664 -0.201326 8.4044e-01
## factor(day)26 0.000088 0.000590 0.149359 8.8127e-01
## factor(day)27 0.000192 0.000590 0.325632 7.4470e-01
## factor(day)28 -0.000508 0.000740 -0.685576 4.9298e-01
## factor(day)29 -0.000064 0.000686 -0.092946 9.2595e-01
## factor(day)30 0.000413 0.000907 0.455409 6.4882e-01
## factor(day)31 -0.000635 0.000604 -1.052234 2.9269e-01
##
## (Intercept) ***
## is_cooperative
## factor(gender)M ***
## factor(gender)U
## factor(age_range)12-17
## factor(age_range)18-23 **
## factor(age_range)24-30 ***
## factor(age_range)31-40 ***
## factor(age_range)41-49 ***
## factor(age_range)50+
## factor(fre_country_region)北方
## factor(fre_country_region)南方 .
## factor(fre_city_level)二线城市
## factor(fre_city_level)三线城市
## factor(fre_city_level)四线城市
## factor(fre_city_level)五线城市
## factor(fre_city_level)新一线城市
## factor(fre_city_level)一线城市 .
## factor(year)2023 ***
## factor(year)2024 ***
## factor(month)2
## factor(month)3
## factor(month)4
## factor(month)5
## factor(month)6
## factor(month)7 .
## factor(month)8
## factor(month)9
## factor(month)10
## factor(month)11
## factor(month)12
## factor(day)2
## factor(day)3
## factor(day)4
## factor(day)5
## factor(day)6
## factor(day)7
## factor(day)8
## factor(day)9 .
## factor(day)10
## factor(day)11
## factor(day)12
## factor(day)13
## factor(day)14
## factor(day)15
## factor(day)16
## factor(day)17
## factor(day)18
## factor(day)19
## factor(day)20
## factor(day)21
## factor(day)22
## factor(day)23
## factor(day)24
## factor(day)25
## factor(day)26
## factor(day)27
## factor(day)28
## factor(day)29
## factor(day)30
## factor(day)31
## ... 1 variable was removed because of collinearity (factor(age_range)UNKNOWN)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.040472 Adj. R2: 0.002567
plot_predictions_cate(model, data)
# Define the formula for the model
model_formula <- log(net_follow_fans+255) ~ factor(fans_piar_cat_temp)
# Call the function with the data and formula
plot_continue_cutoff(data, model_formula)
library(ggplot2)
library(dplyr)
library(RColorBrewer)
# Step 1: Set the ranges for the fan counts (fewer, broader bins)
data <- data %>% mutate(fans_new_range = case_when(
before_fans_count > 0 & before_fans_count <= 1000 ~ "0-1k",
before_fans_count > 1000 & before_fans_count <= 10000 ~ "1k-10k",
before_fans_count > 10000 & before_fans_count <= 100000 ~ "10k-100k",
before_fans_count > 100000 & before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">1M"
))
data <- data %>% mutate(other_fans_new_range = case_when(
other_before_fans_count > 0 & other_before_fans_count <= 1000 ~ "0-1k",
other_before_fans_count > 1000 & other_before_fans_count <= 10000 ~ "1k-10k",
other_before_fans_count > 10000 & other_before_fans_count <= 100000 ~ "10k-100k",
other_before_fans_count > 100000 & other_before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">1M"
))
# Step 2: Set the correct order for the categories
data <- data %>%
mutate(fans_new_range = factor(fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")),
other_fans_new_range = factor(other_fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")))
# Step 3: Aggregate the data to calculate the mean and confidence intervals, handling 0% cases
agg_data <- data %>%
group_by(fans_new_range, other_fans_new_range) %>%
summarize(
count = n(), # 计算每个组合的数量
Y = ifelse(count > 0, mean(log(net_follow_fans +605), na.rm = TRUE), NA), # 如果有数据,计算均值;否则设为NA
ci_lower = ifelse(count > 0, Y - qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA), # 如果有数据,计算CI;否则设为NA
ci_upper = ifelse(count > 0, Y + qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA) # 如果有数据,计算CI;否则设为NA
) %>%
ungroup() %>%
mutate(percentage = count / sum(count) * 100)
## `summarise()` has grouped output by 'fans_new_range'. You can override using
## the `.groups` argument.
# Step 4: Plot the data in a heatmap-like format with custom colors and percentage labels
ggplot(agg_data, aes(x = fans_new_range, y = other_fans_new_range, fill = Y)) +
geom_tile() +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
geom_text(aes(label = paste0(round(percentage, 2), "%")), color = "black", size = 4) +
scale_fill_gradientn(colors = c("yellow", "orange", "darkorange", "red", "darkred"),
values = scales::rescale(c(0, 0.05, 0.1, 0.15, 0.2)),
#limits = c(0, 0.2),
na.value = "white") +
labs(title = "Average Fan Total Cost Amount by Fan Ranges",
x = "Fans Range",
y = "Other Fans Range",
fill = "follow_author_fans_count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
model <- feols(log(already_follow_other_fans_count+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative+ factor(gender) +
factor(age_range) + factor(fre_country_region) + factor(fre_city_level)
# + factor(other_gender) + factor(other_age_range) + factor(other_fre_country_region) + factor(other_fre_city_level)
+ factor(year) + factor(month) + factor(day), data = data,vcov = ~author_id)
## The variable 'factor(age_range)UNKNOWN' has been removed because of collinearity (see $collin.var).
summary(model)
## OLS estimation, Dep. Var.: log(already_follow_other_fans_count + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value
## (Intercept) 0.012543 0.008703 1.441257
## factor(fans_piar_cat)Small vs Big 0.007542 0.001332 5.662437
## factor(fans_piar_cat)Big vs Small 0.003002 0.000960 3.126249
## factor(fans_piar_cat)Big vs Big 0.029169 0.002164 13.481440
## is_cooperative 0.000364 0.000283 1.284058
## factor(gender)M -0.003184 0.000424 -7.509626
## factor(gender)U -0.017483 0.008691 -2.011674
## factor(age_range)12-17 0.000357 0.001129 0.315936
## factor(age_range)18-23 0.002070 0.001185 1.747685
## factor(age_range)24-30 0.000131 0.001271 0.102733
## factor(age_range)31-40 0.000329 0.001166 0.281759
## factor(age_range)41-49 0.001559 0.001183 1.317468
## factor(age_range)50+ 0.002042 0.001159 1.761609
## factor(fre_country_region)北方 -0.014638 0.009209 -1.589523
## factor(fre_country_region)南方 -0.016840 0.009199 -1.830689
## factor(fre_city_level)二线城市 0.003531 0.003241 1.089356
## factor(fre_city_level)三线城市 0.001732 0.003199 0.541338
## factor(fre_city_level)四线城市 0.000455 0.003185 0.142931
## factor(fre_city_level)五线城市 -0.000278 0.003174 -0.087462
## factor(fre_city_level)新一线城市 0.002612 0.003258 0.801774
## factor(fre_city_level)一线城市 0.002252 0.003396 0.663357
## factor(year)2023 0.003475 0.000473 7.345921
## factor(year)2024 0.004904 0.000495 9.902526
## factor(month)2 0.001422 0.000737 1.928356
## factor(month)3 0.000072 0.000695 0.103288
## factor(month)4 0.001419 0.000729 1.945850
## factor(month)5 0.002018 0.000862 2.340318
## factor(month)6 0.001174 0.000968 1.212865
## factor(month)7 0.000343 0.000797 0.429885
## factor(month)8 0.000883 0.000786 1.123286
## factor(month)9 0.002424 0.001013 2.392301
## factor(month)10 0.002921 0.001040 2.808087
## factor(month)11 0.003798 0.001092 3.479235
## factor(month)12 0.003214 0.001134 2.834673
## factor(day)2 0.001678 0.001323 1.268489
## factor(day)3 0.001776 0.001332 1.333321
## factor(day)4 -0.000141 0.001256 -0.112536
## factor(day)5 0.003832 0.001656 2.313581
## factor(day)6 0.002082 0.001377 1.512104
## factor(day)7 0.000104 0.001294 0.080396
## factor(day)8 0.000326 0.001454 0.224374
## factor(day)9 0.000881 0.001226 0.718320
## factor(day)10 0.000606 0.001277 0.474265
## factor(day)11 0.000575 0.001317 0.436792
## factor(day)12 0.002369 0.001457 1.625451
## factor(day)13 0.001254 0.001316 0.953283
## factor(day)14 0.000449 0.001361 0.329900
## factor(day)15 0.003364 0.001705 1.972807
## factor(day)16 0.000708 0.001246 0.568331
## factor(day)17 0.002168 0.001507 1.438839
## factor(day)18 0.002503 0.001549 1.615628
## factor(day)19 0.002530 0.001500 1.687246
## factor(day)20 -0.000095 0.001341 -0.071101
## factor(day)21 0.001106 0.001331 0.830937
## factor(day)22 0.000819 0.001339 0.611863
## factor(day)23 0.001119 0.001293 0.865475
## factor(day)24 -0.000148 0.001263 -0.117423
## factor(day)25 0.002494 0.001382 1.804238
## factor(day)26 0.001203 0.001318 0.913357
## factor(day)27 0.003693 0.001449 2.549146
## factor(day)28 0.000622 0.001405 0.442772
## factor(day)29 0.002096 0.001449 1.446850
## factor(day)30 0.000717 0.001385 0.517845
## factor(day)31 0.002292 0.001614 1.420245
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.007344 0.002460 2.985758
## factor(fans_piar_cat)Big vs Small:is_cooperative 0.001055 0.001702 0.620170
## factor(fans_piar_cat)Big vs Big:is_cooperative 0.018318 0.003864 4.740119
## Pr(>|t|)
## (Intercept) 1.4951e-01
## factor(fans_piar_cat)Small vs Big 1.4950e-08 ***
## factor(fans_piar_cat)Big vs Small 1.7708e-03 **
## factor(fans_piar_cat)Big vs Big < 2.2e-16 ***
## is_cooperative 1.9912e-01
## factor(gender)M 5.9608e-14 ***
## factor(gender)U 4.4256e-02 *
## factor(age_range)12-17 7.5205e-01
## factor(age_range)18-23 8.0521e-02 .
## factor(age_range)24-30 9.1817e-01
## factor(age_range)31-40 7.7813e-01
## factor(age_range)41-49 1.8768e-01
## factor(age_range)50+ 7.8137e-02 .
## factor(fre_country_region)北方 1.1194e-01
## factor(fre_country_region)南方 6.7149e-02 .
## factor(fre_city_level)二线城市 2.7600e-01
## factor(fre_city_level)三线城市 5.8828e-01
## factor(fre_city_level)四线城市 8.8634e-01
## factor(fre_city_level)五线城市 9.3030e-01
## factor(fre_city_level)新一线城市 4.2268e-01
## factor(fre_city_level)一线城市 5.0710e-01
## factor(year)2023 2.0533e-13 ***
## factor(year)2024 < 2.2e-16 ***
## factor(month)2 5.3813e-02 .
## factor(month)3 9.1773e-01
## factor(month)4 5.1675e-02 .
## factor(month)5 1.9269e-02 *
## factor(month)6 2.2518e-01
## factor(month)7 6.6728e-01
## factor(month)8 2.6132e-01
## factor(month)9 1.6744e-02 *
## factor(month)10 4.9843e-03 **
## factor(month)11 5.0298e-04 ***
## factor(month)12 4.5879e-03 **
## factor(day)2 2.0463e-01
## factor(day)3 1.8243e-01
## factor(day)4 9.1040e-01
## factor(day)5 2.0692e-02 *
## factor(day)6 1.3051e-01
## factor(day)7 9.3592e-01
## factor(day)8 8.2247e-01
## factor(day)9 4.7256e-01
## factor(day)10 6.3531e-01
## factor(day)11 6.6226e-01
## factor(day)12 1.0407e-01
## factor(day)13 3.4045e-01
## factor(day)14 7.4148e-01
## factor(day)15 4.8519e-02 *
## factor(day)16 5.6981e-01
## factor(day)17 1.5020e-01
## factor(day)18 1.0618e-01
## factor(day)19 9.1558e-02 .
## factor(day)20 9.4332e-01
## factor(day)21 4.0601e-01
## factor(day)22 5.4063e-01
## factor(day)23 3.8678e-01
## factor(day)24 9.0652e-01
## factor(day)25 7.1196e-02 .
## factor(day)26 3.6106e-01
## factor(day)27 1.0800e-02 *
## factor(day)28 6.5793e-01
## factor(day)29 1.4794e-01
## factor(day)30 6.0457e-01
## factor(day)31 1.5554e-01
## factor(fans_piar_cat)Small vs Big:is_cooperative 2.8292e-03 **
## factor(fans_piar_cat)Big vs Small:is_cooperative 5.3515e-01
## factor(fans_piar_cat)Big vs Big:is_cooperative 2.1378e-06 ***
## ... 1 variable was removed because of collinearity (factor(age_range)UNKNOWN)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.085628 Adj. R2: 0.018267
# 画图
model <- feols(log(already_follow_other_fans_count+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative+ factor(gender) +
factor(age_range) + factor(fre_country_region) + factor(fre_city_level)
# + factor(other_gender) + factor(other_age_range) + factor(other_fre_country_region) + factor(other_fre_city_level)
+ factor(year) + factor(month) + factor(day), data = data,vcov = ~author_id)
## The variable 'factor(age_range)UNKNOWN' has been removed because of collinearity (see $collin.var).
summary(model)
## OLS estimation, Dep. Var.: log(already_follow_other_fans_count + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value
## (Intercept) 0.012543 0.008703 1.441257
## factor(fans_piar_cat)Small vs Big 0.007542 0.001332 5.662437
## factor(fans_piar_cat)Big vs Small 0.003002 0.000960 3.126249
## factor(fans_piar_cat)Big vs Big 0.029169 0.002164 13.481440
## is_cooperative 0.000364 0.000283 1.284058
## factor(gender)M -0.003184 0.000424 -7.509626
## factor(gender)U -0.017483 0.008691 -2.011674
## factor(age_range)12-17 0.000357 0.001129 0.315936
## factor(age_range)18-23 0.002070 0.001185 1.747685
## factor(age_range)24-30 0.000131 0.001271 0.102733
## factor(age_range)31-40 0.000329 0.001166 0.281759
## factor(age_range)41-49 0.001559 0.001183 1.317468
## factor(age_range)50+ 0.002042 0.001159 1.761609
## factor(fre_country_region)北方 -0.014638 0.009209 -1.589523
## factor(fre_country_region)南方 -0.016840 0.009199 -1.830689
## factor(fre_city_level)二线城市 0.003531 0.003241 1.089356
## factor(fre_city_level)三线城市 0.001732 0.003199 0.541338
## factor(fre_city_level)四线城市 0.000455 0.003185 0.142931
## factor(fre_city_level)五线城市 -0.000278 0.003174 -0.087462
## factor(fre_city_level)新一线城市 0.002612 0.003258 0.801774
## factor(fre_city_level)一线城市 0.002252 0.003396 0.663357
## factor(year)2023 0.003475 0.000473 7.345921
## factor(year)2024 0.004904 0.000495 9.902526
## factor(month)2 0.001422 0.000737 1.928356
## factor(month)3 0.000072 0.000695 0.103288
## factor(month)4 0.001419 0.000729 1.945850
## factor(month)5 0.002018 0.000862 2.340318
## factor(month)6 0.001174 0.000968 1.212865
## factor(month)7 0.000343 0.000797 0.429885
## factor(month)8 0.000883 0.000786 1.123286
## factor(month)9 0.002424 0.001013 2.392301
## factor(month)10 0.002921 0.001040 2.808087
## factor(month)11 0.003798 0.001092 3.479235
## factor(month)12 0.003214 0.001134 2.834673
## factor(day)2 0.001678 0.001323 1.268489
## factor(day)3 0.001776 0.001332 1.333321
## factor(day)4 -0.000141 0.001256 -0.112536
## factor(day)5 0.003832 0.001656 2.313581
## factor(day)6 0.002082 0.001377 1.512104
## factor(day)7 0.000104 0.001294 0.080396
## factor(day)8 0.000326 0.001454 0.224374
## factor(day)9 0.000881 0.001226 0.718320
## factor(day)10 0.000606 0.001277 0.474265
## factor(day)11 0.000575 0.001317 0.436792
## factor(day)12 0.002369 0.001457 1.625451
## factor(day)13 0.001254 0.001316 0.953283
## factor(day)14 0.000449 0.001361 0.329900
## factor(day)15 0.003364 0.001705 1.972807
## factor(day)16 0.000708 0.001246 0.568331
## factor(day)17 0.002168 0.001507 1.438839
## factor(day)18 0.002503 0.001549 1.615628
## factor(day)19 0.002530 0.001500 1.687246
## factor(day)20 -0.000095 0.001341 -0.071101
## factor(day)21 0.001106 0.001331 0.830937
## factor(day)22 0.000819 0.001339 0.611863
## factor(day)23 0.001119 0.001293 0.865475
## factor(day)24 -0.000148 0.001263 -0.117423
## factor(day)25 0.002494 0.001382 1.804238
## factor(day)26 0.001203 0.001318 0.913357
## factor(day)27 0.003693 0.001449 2.549146
## factor(day)28 0.000622 0.001405 0.442772
## factor(day)29 0.002096 0.001449 1.446850
## factor(day)30 0.000717 0.001385 0.517845
## factor(day)31 0.002292 0.001614 1.420245
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.007344 0.002460 2.985758
## factor(fans_piar_cat)Big vs Small:is_cooperative 0.001055 0.001702 0.620170
## factor(fans_piar_cat)Big vs Big:is_cooperative 0.018318 0.003864 4.740119
## Pr(>|t|)
## (Intercept) 1.4951e-01
## factor(fans_piar_cat)Small vs Big 1.4950e-08 ***
## factor(fans_piar_cat)Big vs Small 1.7708e-03 **
## factor(fans_piar_cat)Big vs Big < 2.2e-16 ***
## is_cooperative 1.9912e-01
## factor(gender)M 5.9608e-14 ***
## factor(gender)U 4.4256e-02 *
## factor(age_range)12-17 7.5205e-01
## factor(age_range)18-23 8.0521e-02 .
## factor(age_range)24-30 9.1817e-01
## factor(age_range)31-40 7.7813e-01
## factor(age_range)41-49 1.8768e-01
## factor(age_range)50+ 7.8137e-02 .
## factor(fre_country_region)北方 1.1194e-01
## factor(fre_country_region)南方 6.7149e-02 .
## factor(fre_city_level)二线城市 2.7600e-01
## factor(fre_city_level)三线城市 5.8828e-01
## factor(fre_city_level)四线城市 8.8634e-01
## factor(fre_city_level)五线城市 9.3030e-01
## factor(fre_city_level)新一线城市 4.2268e-01
## factor(fre_city_level)一线城市 5.0710e-01
## factor(year)2023 2.0533e-13 ***
## factor(year)2024 < 2.2e-16 ***
## factor(month)2 5.3813e-02 .
## factor(month)3 9.1773e-01
## factor(month)4 5.1675e-02 .
## factor(month)5 1.9269e-02 *
## factor(month)6 2.2518e-01
## factor(month)7 6.6728e-01
## factor(month)8 2.6132e-01
## factor(month)9 1.6744e-02 *
## factor(month)10 4.9843e-03 **
## factor(month)11 5.0298e-04 ***
## factor(month)12 4.5879e-03 **
## factor(day)2 2.0463e-01
## factor(day)3 1.8243e-01
## factor(day)4 9.1040e-01
## factor(day)5 2.0692e-02 *
## factor(day)6 1.3051e-01
## factor(day)7 9.3592e-01
## factor(day)8 8.2247e-01
## factor(day)9 4.7256e-01
## factor(day)10 6.3531e-01
## factor(day)11 6.6226e-01
## factor(day)12 1.0407e-01
## factor(day)13 3.4045e-01
## factor(day)14 7.4148e-01
## factor(day)15 4.8519e-02 *
## factor(day)16 5.6981e-01
## factor(day)17 1.5020e-01
## factor(day)18 1.0618e-01
## factor(day)19 9.1558e-02 .
## factor(day)20 9.4332e-01
## factor(day)21 4.0601e-01
## factor(day)22 5.4063e-01
## factor(day)23 3.8678e-01
## factor(day)24 9.0652e-01
## factor(day)25 7.1196e-02 .
## factor(day)26 3.6106e-01
## factor(day)27 1.0800e-02 *
## factor(day)28 6.5793e-01
## factor(day)29 1.4794e-01
## factor(day)30 6.0457e-01
## factor(day)31 1.5554e-01
## factor(fans_piar_cat)Small vs Big:is_cooperative 2.8292e-03 **
## factor(fans_piar_cat)Big vs Small:is_cooperative 5.3515e-01
## factor(fans_piar_cat)Big vs Big:is_cooperative 2.1378e-06 ***
## ... 1 variable was removed because of collinearity (factor(age_range)UNKNOWN)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.085628 Adj. R2: 0.018267
plot_predictions_with_ci(model, data)
## `summarise()` has grouped output by 'fans_piar_cat'. You can override using the
## `.groups` argument.
### (3) plot fans
# 画图
model <- feols(log(already_follow_other_fans_count+1) ~ factor(fans_piar_cat)+ factor(gender) +
factor(age_range) + factor(fre_country_region) + factor(fre_city_level)
# + factor(other_gender) + factor(other_age_range) + factor(other_fre_country_region) + factor(other_fre_city_level)
+ factor(year) + factor(month) + factor(day), data = data,vcov = ~author_id)
## The variable 'factor(age_range)UNKNOWN' has been removed because of collinearity (see $collin.var).
summary(model)
## OLS estimation, Dep. Var.: log(already_follow_other_fans_count + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.012404 0.008686 1.428126 1.5326e-01
## factor(fans_piar_cat)Small vs Big 0.010276 0.001150 8.938962 < 2.2e-16 ***
## factor(fans_piar_cat)Big vs Small 0.003333 0.000833 4.004106 6.2281e-05 ***
## factor(fans_piar_cat)Big vs Big 0.036784 0.001871 19.658412 < 2.2e-16 ***
## factor(gender)M -0.003370 0.000421 -7.998926 1.2636e-15 ***
## factor(gender)U -0.017149 0.008673 -1.977275 4.8012e-02 *
## factor(age_range)12-17 0.000426 0.001131 0.376723 7.0638e-01
## factor(age_range)18-23 0.002117 0.001185 1.786638 7.3998e-02 .
## factor(age_range)24-30 0.000130 0.001271 0.102643 9.1825e-01
## factor(age_range)31-40 0.000369 0.001165 0.316900 7.5132e-01
## factor(age_range)41-49 0.001563 0.001182 1.321735 1.8626e-01
## factor(age_range)50+ 0.001958 0.001158 1.690981 9.0842e-02 .
## factor(fre_country_region)北方 -0.014360 0.009196 -1.561666 1.1837e-01
## factor(fre_country_region)南方 -0.016570 0.009185 -1.804045 7.1226e-02 .
## factor(fre_city_level)二线城市 0.003644 0.003250 1.121191 2.6221e-01
## factor(fre_city_level)三线城市 0.001758 0.003207 0.548236 5.8353e-01
## factor(fre_city_level)四线城市 0.000497 0.003193 0.155707 8.7626e-01
## factor(fre_city_level)五线城市 -0.000234 0.003182 -0.073471 9.4143e-01
## factor(fre_city_level)新一线城市 0.002680 0.003267 0.820325 4.1203e-01
## factor(fre_city_level)一线城市 0.002234 0.003403 0.656397 5.1157e-01
## factor(year)2023 0.003562 0.000474 7.517860 5.5974e-14 ***
## factor(year)2024 0.004997 0.000494 10.112994 < 2.2e-16 ***
## factor(month)2 0.001419 0.000737 1.924942 5.4238e-02 .
## factor(month)3 0.000086 0.000695 0.124506 9.0091e-01
## factor(month)4 0.001376 0.000730 1.885647 5.9344e-02 .
## factor(month)5 0.001899 0.000861 2.203937 2.7530e-02 *
## factor(month)6 0.001124 0.000968 1.161603 2.4540e-01
## factor(month)7 0.000225 0.000797 0.282881 7.7727e-01
## factor(month)8 0.000776 0.000786 0.987412 3.2344e-01
## factor(month)9 0.002307 0.001013 2.278653 2.2689e-02 *
## factor(month)10 0.002841 0.001041 2.728905 6.3552e-03 **
## factor(month)11 0.003766 0.001092 3.449609 5.6155e-04 ***
## factor(month)12 0.003231 0.001136 2.845132 4.4399e-03 **
## factor(day)2 0.001744 0.001323 1.317489 1.8768e-01
## factor(day)3 0.001750 0.001332 1.313490 1.8902e-01
## factor(day)4 -0.000087 0.001257 -0.068991 9.4500e-01
## factor(day)5 0.003854 0.001658 2.324723 2.0088e-02 *
## factor(day)6 0.002064 0.001379 1.496932 1.3441e-01
## factor(day)7 0.000177 0.001293 0.136673 8.9129e-01
## factor(day)8 0.000386 0.001454 0.265285 7.9079e-01
## factor(day)9 0.000897 0.001226 0.732079 4.6412e-01
## factor(day)10 0.000651 0.001277 0.510018 6.1004e-01
## factor(day)11 0.000621 0.001318 0.471391 6.3736e-01
## factor(day)12 0.002387 0.001459 1.636023 1.0184e-01
## factor(day)13 0.001312 0.001316 0.996473 3.1902e-01
## factor(day)14 0.000504 0.001362 0.370444 7.1105e-01
## factor(day)15 0.003344 0.001706 1.959777 5.0024e-02 .
## factor(day)16 0.000775 0.001247 0.621931 5.3399e-01
## factor(day)17 0.002188 0.001506 1.453315 1.4614e-01
## factor(day)18 0.002555 0.001550 1.648953 9.9159e-02 .
## factor(day)19 0.002590 0.001499 1.727277 8.4120e-02 .
## factor(day)20 -0.000056 0.001340 -0.041610 9.6681e-01
## factor(day)21 0.001158 0.001335 0.867867 3.8547e-01
## factor(day)22 0.000872 0.001338 0.651455 5.1475e-01
## factor(day)23 0.001147 0.001293 0.887243 3.7495e-01
## factor(day)24 -0.000053 0.001263 -0.041900 9.6658e-01
## factor(day)25 0.002585 0.001383 1.868636 6.1675e-02 .
## factor(day)26 0.001264 0.001318 0.958530 3.3780e-01
## factor(day)27 0.003733 0.001450 2.575063 1.0023e-02 *
## factor(day)28 0.000624 0.001405 0.443946 6.5708e-01
## factor(day)29 0.002196 0.001450 1.514501 1.2990e-01
## factor(day)30 0.000790 0.001386 0.570059 5.6864e-01
## factor(day)31 0.002309 0.001614 1.430600 1.5255e-01
## ... 1 variable was removed because of collinearity (factor(age_range)UNKNOWN)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.085674 Adj. R2: 0.017224
plot_predictions_fans(model, data)
# 画图
model <- feols(log(already_follow_other_fans_count+1) ~ is_cooperative+ factor(gender) +
factor(age_range) + factor(fre_country_region) + factor(fre_city_level)
# + factor(other_gender) + factor(other_age_range) + factor(other_fre_country_region) + factor(other_fre_city_level)
+ factor(year) + factor(month) + factor(day), data = data,vcov = ~author_id)
## The variable 'factor(age_range)UNKNOWN' has been removed because of collinearity (see $collin.var).
summary(model)
## OLS estimation, Dep. Var.: log(already_follow_other_fans_count + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.012413 0.008760 1.417026 1.5648e-01
## is_cooperative 0.003372 0.000465 7.244498 4.3602e-13 ***
## factor(gender)M -0.007648 0.000409 -18.716054 < 2.2e-16 ***
## factor(gender)U -0.020809 0.008747 -2.378968 1.7362e-02 *
## factor(age_range)12-17 0.000835 0.001146 0.729013 4.6599e-01
## factor(age_range)18-23 0.004394 0.001202 3.656426 2.5584e-04 ***
## factor(age_range)24-30 0.006133 0.001288 4.761997 1.9186e-06 ***
## factor(age_range)31-40 0.006121 0.001192 5.134925 2.8259e-07 ***
## factor(age_range)41-49 0.004967 0.001204 4.126895 3.6788e-05 ***
## factor(age_range)50+ 0.003741 0.001173 3.189202 1.4269e-03 **
## factor(fre_country_region)北方 -0.015539 0.009235 -1.682638 9.2447e-02 .
## factor(fre_country_region)南方 -0.019155 0.009230 -2.075294 3.7961e-02 *
## factor(fre_city_level)二线城市 0.005807 0.003192 1.819378 6.8856e-02 .
## factor(fre_city_level)三线城市 0.003495 0.003141 1.112732 2.6583e-01
## factor(fre_city_level)四线城市 0.001430 0.003126 0.457445 6.4735e-01
## factor(fre_city_level)五线城市 -0.000249 0.003115 -0.080096 9.3616e-01
## factor(fre_city_level)新一线城市 0.004971 0.003206 1.550846 1.2094e-01
## factor(fre_city_level)一线城市 0.004439 0.003342 1.328508 1.8401e-01
## factor(year)2023 0.004985 0.000487 10.228477 < 2.2e-16 ***
## factor(year)2024 0.006470 0.000514 12.595457 < 2.2e-16 ***
## factor(month)2 0.001221 0.000740 1.649754 9.8995e-02 .
## factor(month)3 0.000695 0.000697 0.997870 3.1834e-01
## factor(month)4 0.001938 0.000735 2.638382 8.3311e-03 **
## factor(month)5 0.002585 0.000871 2.967278 3.0049e-03 **
## factor(month)6 0.002143 0.000980 2.186731 2.8764e-02 *
## factor(month)7 0.001203 0.000803 1.498906 1.3390e-01
## factor(month)8 0.001796 0.000792 2.267508 2.3361e-02 *
## factor(month)9 0.003904 0.001027 3.799665 1.4495e-04 ***
## factor(month)10 0.004285 0.001053 4.068711 4.7297e-05 ***
## factor(month)11 0.005128 0.001101 4.659163 3.1776e-06 ***
## factor(month)12 0.004202 0.001149 3.657047 2.5522e-04 ***
## factor(day)2 0.001381 0.001332 1.036638 2.9991e-01
## factor(day)3 0.001489 0.001339 1.111758 2.6624e-01
## factor(day)4 0.000047 0.001261 0.037185 9.7034e-01
## factor(day)5 0.003956 0.001665 2.375380 1.7532e-02 *
## factor(day)6 0.002422 0.001386 1.747057 8.0629e-02 .
## factor(day)7 0.000397 0.001298 0.305506 7.5998e-01
## factor(day)8 0.000734 0.001465 0.501113 6.1629e-01
## factor(day)9 0.000982 0.001229 0.799080 4.2425e-01
## factor(day)10 0.000707 0.001282 0.551467 5.8131e-01
## factor(day)11 0.000848 0.001321 0.641882 5.2095e-01
## factor(day)12 0.002523 0.001469 1.717192 8.5946e-02 .
## factor(day)13 0.001523 0.001324 1.150450 2.4996e-01
## factor(day)14 0.000834 0.001366 0.610119 5.4178e-01
## factor(day)15 0.003471 0.001715 2.023740 4.2999e-02 *
## factor(day)16 0.000891 0.001253 0.711201 4.7696e-01
## factor(day)17 0.002342 0.001520 1.540716 1.2339e-01
## factor(day)18 0.002660 0.001558 1.706643 8.7890e-02 .
## factor(day)19 0.002950 0.001504 1.961860 4.9781e-02 *
## factor(day)20 0.000077 0.001347 0.057133 9.5444e-01
## factor(day)21 0.001683 0.001347 1.249706 2.1141e-01
## factor(day)22 0.001017 0.001345 0.756106 4.4959e-01
## factor(day)23 0.001339 0.001298 1.031317 3.0239e-01
## factor(day)24 0.000218 0.001265 0.172306 8.6320e-01
## factor(day)25 0.002707 0.001392 1.944528 5.1834e-02 .
## factor(day)26 0.001355 0.001327 1.020820 3.0734e-01
## factor(day)27 0.003935 0.001458 2.699422 6.9468e-03 **
## factor(day)28 0.000740 0.001409 0.525154 5.9948e-01
## factor(day)29 0.002223 0.001457 1.525352 1.2717e-01
## factor(day)30 0.000701 0.001393 0.503144 6.1486e-01
## factor(day)31 0.002260 0.001623 1.392169 1.6387e-01
## ... 1 variable was removed because of collinearity (factor(age_range)UNKNOWN)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.086198 Adj. R2: 0.005182
plot_predictions_cate(model, data)
# Define the formula for the model
model_formula <- log(already_follow_other_fans_count+1) ~ factor(fans_piar_cat_temp)
# Call the function with the data and formula
plot_continue_cutoff(data, model_formula)
library(ggplot2)
library(dplyr)
library(RColorBrewer)
# Step 1: Set the ranges for the fan counts (fewer, broader bins)
data <- data %>% mutate(fans_new_range = case_when(
before_fans_count > 0 & before_fans_count <= 1000 ~ "0-1k",
before_fans_count > 1000 & before_fans_count <= 10000 ~ "1k-10k",
before_fans_count > 10000 & before_fans_count <= 100000 ~ "10k-100k",
before_fans_count > 100000 & before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">1M"
))
data <- data %>% mutate(other_fans_new_range = case_when(
other_before_fans_count > 0 & other_before_fans_count <= 1000 ~ "0-1k",
other_before_fans_count > 1000 & other_before_fans_count <= 10000 ~ "1k-10k",
other_before_fans_count > 10000 & other_before_fans_count <= 100000 ~ "10k-100k",
other_before_fans_count > 100000 & other_before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">1M"
))
# Step 2: Set the correct order for the categories
data <- data %>%
mutate(fans_new_range = factor(fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")),
other_fans_new_range = factor(other_fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")))
# Step 3: Aggregate the data to calculate the mean and confidence intervals, handling 0% cases
agg_data <- data %>%
group_by(fans_new_range, other_fans_new_range) %>%
summarize(
count = n(), # 计算每个组合的数量
Y = ifelse(count > 0, mean(log(already_follow_other_fans_count + 1), na.rm = TRUE), NA), # 如果有数据,计算均值;否则设为NA
ci_lower = ifelse(count > 0, Y - qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA), # 如果有数据,计算CI;否则设为NA
ci_upper = ifelse(count > 0, Y + qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA) # 如果有数据,计算CI;否则设为NA
) %>%
ungroup() %>%
mutate(percentage = count / sum(count) * 100)
## `summarise()` has grouped output by 'fans_new_range'. You can override using
## the `.groups` argument.
# Step 4: Plot the data in a heatmap-like format with custom colors and percentage labels
ggplot(agg_data, aes(x = fans_new_range, y = other_fans_new_range, fill = Y)) +
geom_tile() +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
geom_text(aes(label = paste0(round(percentage, 2), "%")), color = "black", size = 4) +
scale_fill_gradientn(colors = c("yellow", "orange", "darkorange", "red", "darkred"),
values = scales::rescale(c(0, 0.05, 0.1, 0.15, 0.2)),
#limits = c(0, 0.2),
na.value = "white") +
labs(title = "Average Fan Total Cost Amount by Fan Ranges",
x = "Fans Range",
y = "Other Fans Range",
fill = "follow_author_fans_count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
model <- feols(log(other_already_follow_other_fans_count+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative + factor(gender) +
factor(age_range) + factor(fre_country_region) + factor(fre_city_level)
# + factor(other_gender) + factor(other_age_range) + factor(other_fre_country_region) + factor(other_fre_city_level)
+ factor(year) + factor(month) + factor(day), data = data,vcov = ~author_id)
## The variable 'factor(age_range)UNKNOWN' has been removed because of collinearity (see $collin.var).
summary(model)
## OLS estimation, Dep. Var.: log(other_already_follow_other_fans_count + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error
## (Intercept) 0.00142947 0.004870
## factor(fans_piar_cat)Small vs Big 0.00525226 0.000987
## factor(fans_piar_cat)Big vs Small 0.00982237 0.001392
## factor(fans_piar_cat)Big vs Big 0.03340080 0.002344
## is_cooperative 0.00106367 0.000271
## factor(gender)M -0.00116302 0.000398
## factor(gender)U -0.00503465 0.004756
## factor(age_range)12-17 0.00076300 0.000326
## factor(age_range)18-23 0.00267245 0.000499
## factor(age_range)24-30 0.00209327 0.000718
## factor(age_range)31-40 0.00142397 0.000476
## factor(age_range)41-49 0.00193993 0.000494
## factor(age_range)50+ 0.00356298 0.000449
## factor(fre_country_region)北方 -0.00845192 0.004844
## factor(fre_country_region)南方 -0.00875047 0.004845
## factor(fre_city_level)二线城市 0.00487764 0.001133
## factor(fre_city_level)三线城市 0.00514957 0.001047
## factor(fre_city_level)四线城市 0.00355252 0.001002
## factor(fre_city_level)五线城市 0.00383602 0.000970
## factor(fre_city_level)新一线城市 0.00489070 0.001188
## factor(fre_city_level)一线城市 0.00822818 0.001841
## factor(year)2023 0.00301433 0.000478
## factor(year)2024 0.00439741 0.000496
## factor(month)2 0.00041128 0.000751
## factor(month)3 0.00016363 0.000794
## factor(month)4 -0.00057465 0.000758
## factor(month)5 -0.00009426 0.000881
## factor(month)6 0.00112378 0.001083
## factor(month)7 -0.00085572 0.000835
## factor(month)8 0.00017656 0.000888
## factor(month)9 -0.00077557 0.000934
## factor(month)10 0.00189188 0.001076
## factor(month)11 0.00263517 0.001154
## factor(month)12 0.00339837 0.001292
## factor(day)2 0.00228727 0.001543
## factor(day)3 0.00186958 0.001363
## factor(day)4 0.00066265 0.001356
## factor(day)5 0.00166993 0.001366
## factor(day)6 0.00165769 0.001520
## factor(day)7 -0.00023268 0.001262
## factor(day)8 0.00206686 0.001369
## factor(day)9 0.00116708 0.001335
## factor(day)10 0.00185670 0.001355
## factor(day)11 0.00107228 0.001321
## factor(day)12 0.00191638 0.001362
## factor(day)13 -0.00179614 0.001163
## factor(day)14 0.00012811 0.001322
## factor(day)15 0.00244936 0.001754
## factor(day)16 -0.00048231 0.001170
## factor(day)17 0.00243740 0.001588
## factor(day)18 0.00131186 0.001608
## factor(day)19 0.00079750 0.001348
## factor(day)20 0.00057949 0.001341
## factor(day)21 0.00017687 0.001318
## factor(day)22 0.00118581 0.001313
## factor(day)23 -0.00042043 0.001222
## factor(day)24 0.00176580 0.001417
## factor(day)25 0.00128433 0.001397
## factor(day)26 0.00216651 0.001404
## factor(day)27 0.00042943 0.001370
## factor(day)28 0.00000900 0.001300
## factor(day)29 -0.00020217 0.001408
## factor(day)30 0.00133720 0.001337
## factor(day)31 0.00095531 0.001569
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.00000459 0.001729
## factor(fans_piar_cat)Big vs Small:is_cooperative 0.00177927 0.002298
## factor(fans_piar_cat)Big vs Big:is_cooperative 0.01598802 0.003943
## t value Pr(>|t|)
## (Intercept) 0.293537 7.6911e-01
## factor(fans_piar_cat)Small vs Big 5.319285 1.0432e-07 ***
## factor(fans_piar_cat)Big vs Small 7.055642 1.7251e-12 ***
## factor(fans_piar_cat)Big vs Big 14.247912 < 2.2e-16 ***
## is_cooperative 3.923511 8.7305e-05 ***
## factor(gender)M -2.921249 3.4868e-03 **
## factor(gender)U -1.058656 2.8976e-01
## factor(age_range)12-17 2.338415 1.9367e-02 *
## factor(age_range)18-23 5.352916 8.6669e-08 ***
## factor(age_range)24-30 2.916161 3.5442e-03 **
## factor(age_range)31-40 2.994194 2.7521e-03 **
## factor(age_range)41-49 3.927383 8.5912e-05 ***
## factor(age_range)50+ 7.939252 2.0473e-15 ***
## factor(fre_country_region)北方 -1.744994 8.0988e-02 .
## factor(fre_country_region)南方 -1.806153 7.0896e-02 .
## factor(fre_city_level)二线城市 4.305696 1.6656e-05 ***
## factor(fre_city_level)三线城市 4.919148 8.7009e-07 ***
## factor(fre_city_level)四线城市 3.545697 3.9169e-04 ***
## factor(fre_city_level)五线城市 3.954016 7.6884e-05 ***
## factor(fre_city_level)新一线城市 4.115221 3.8701e-05 ***
## factor(fre_city_level)一线城市 4.468612 7.8784e-06 ***
## factor(year)2023 6.306117 2.8688e-10 ***
## factor(year)2024 8.857501 < 2.2e-16 ***
## factor(month)2 0.547789 5.8384e-01
## factor(month)3 0.206149 8.3667e-01
## factor(month)4 -0.758048 4.4842e-01
## factor(month)5 -0.107025 9.1477e-01
## factor(month)6 1.037535 2.9949e-01
## factor(month)7 -1.024674 3.0552e-01
## factor(month)8 0.198763 8.4245e-01
## factor(month)9 -0.830817 4.0608e-01
## factor(month)10 1.758911 7.8594e-02 .
## factor(month)11 2.282669 2.2451e-02 *
## factor(month)12 2.629407 8.5542e-03 **
## factor(day)2 1.482041 1.3833e-01
## factor(day)3 1.371589 1.7019e-01
## factor(day)4 0.488839 6.2496e-01
## factor(day)5 1.222712 2.2144e-01
## factor(day)6 1.090703 2.7541e-01
## factor(day)7 -0.184359 8.5373e-01
## factor(day)8 1.510097 1.3102e-01
## factor(day)9 0.874220 3.8200e-01
## factor(day)10 1.370363 1.7058e-01
## factor(day)11 0.811954 4.1682e-01
## factor(day)12 1.406911 1.5946e-01
## factor(day)13 -1.545001 1.2235e-01
## factor(day)14 0.096943 9.2277e-01
## factor(day)15 1.396101 1.6269e-01
## factor(day)16 -0.412282 6.8013e-01
## factor(day)17 1.535343 1.2470e-01
## factor(day)18 0.815740 4.1465e-01
## factor(day)19 0.591447 5.5422e-01
## factor(day)20 0.432173 6.6562e-01
## factor(day)21 0.134217 8.9323e-01
## factor(day)22 0.903347 3.6634e-01
## factor(day)23 -0.344053 7.3081e-01
## factor(day)24 1.246223 2.1268e-01
## factor(day)25 0.919577 3.5780e-01
## factor(day)26 1.543357 1.2275e-01
## factor(day)27 0.313405 7.5397e-01
## factor(day)28 0.006929 9.9447e-01
## factor(day)29 -0.143638 8.8579e-01
## factor(day)30 1.000435 3.1710e-01
## factor(day)31 0.608834 5.4264e-01
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.002658 9.9788e-01
## factor(fans_piar_cat)Big vs Small:is_cooperative 0.774375 4.3871e-01
## factor(fans_piar_cat)Big vs Big:is_cooperative 4.054705 5.0222e-05 ***
## ... 1 variable was removed because of collinearity (factor(age_range)UNKNOWN)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.085849 Adj. R2: 0.018912
# 画图
model <- feols(log(other_already_follow_other_fans_count+1) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative+ factor(gender) +
factor(age_range) + factor(fre_country_region) + factor(fre_city_level)
# + factor(other_gender) + factor(other_age_range) + factor(other_fre_country_region) + factor(other_fre_city_level)
+ factor(year) + factor(month) + factor(day), data = data,vcov = ~author_id)
## The variable 'factor(age_range)UNKNOWN' has been removed because of collinearity (see $collin.var).
summary(model)
## OLS estimation, Dep. Var.: log(other_already_follow_other_fans_count + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error
## (Intercept) 0.00142947 0.004870
## factor(fans_piar_cat)Small vs Big 0.00525226 0.000987
## factor(fans_piar_cat)Big vs Small 0.00982237 0.001392
## factor(fans_piar_cat)Big vs Big 0.03340080 0.002344
## is_cooperative 0.00106367 0.000271
## factor(gender)M -0.00116302 0.000398
## factor(gender)U -0.00503465 0.004756
## factor(age_range)12-17 0.00076300 0.000326
## factor(age_range)18-23 0.00267245 0.000499
## factor(age_range)24-30 0.00209327 0.000718
## factor(age_range)31-40 0.00142397 0.000476
## factor(age_range)41-49 0.00193993 0.000494
## factor(age_range)50+ 0.00356298 0.000449
## factor(fre_country_region)北方 -0.00845192 0.004844
## factor(fre_country_region)南方 -0.00875047 0.004845
## factor(fre_city_level)二线城市 0.00487764 0.001133
## factor(fre_city_level)三线城市 0.00514957 0.001047
## factor(fre_city_level)四线城市 0.00355252 0.001002
## factor(fre_city_level)五线城市 0.00383602 0.000970
## factor(fre_city_level)新一线城市 0.00489070 0.001188
## factor(fre_city_level)一线城市 0.00822818 0.001841
## factor(year)2023 0.00301433 0.000478
## factor(year)2024 0.00439741 0.000496
## factor(month)2 0.00041128 0.000751
## factor(month)3 0.00016363 0.000794
## factor(month)4 -0.00057465 0.000758
## factor(month)5 -0.00009426 0.000881
## factor(month)6 0.00112378 0.001083
## factor(month)7 -0.00085572 0.000835
## factor(month)8 0.00017656 0.000888
## factor(month)9 -0.00077557 0.000934
## factor(month)10 0.00189188 0.001076
## factor(month)11 0.00263517 0.001154
## factor(month)12 0.00339837 0.001292
## factor(day)2 0.00228727 0.001543
## factor(day)3 0.00186958 0.001363
## factor(day)4 0.00066265 0.001356
## factor(day)5 0.00166993 0.001366
## factor(day)6 0.00165769 0.001520
## factor(day)7 -0.00023268 0.001262
## factor(day)8 0.00206686 0.001369
## factor(day)9 0.00116708 0.001335
## factor(day)10 0.00185670 0.001355
## factor(day)11 0.00107228 0.001321
## factor(day)12 0.00191638 0.001362
## factor(day)13 -0.00179614 0.001163
## factor(day)14 0.00012811 0.001322
## factor(day)15 0.00244936 0.001754
## factor(day)16 -0.00048231 0.001170
## factor(day)17 0.00243740 0.001588
## factor(day)18 0.00131186 0.001608
## factor(day)19 0.00079750 0.001348
## factor(day)20 0.00057949 0.001341
## factor(day)21 0.00017687 0.001318
## factor(day)22 0.00118581 0.001313
## factor(day)23 -0.00042043 0.001222
## factor(day)24 0.00176580 0.001417
## factor(day)25 0.00128433 0.001397
## factor(day)26 0.00216651 0.001404
## factor(day)27 0.00042943 0.001370
## factor(day)28 0.00000900 0.001300
## factor(day)29 -0.00020217 0.001408
## factor(day)30 0.00133720 0.001337
## factor(day)31 0.00095531 0.001569
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.00000459 0.001729
## factor(fans_piar_cat)Big vs Small:is_cooperative 0.00177927 0.002298
## factor(fans_piar_cat)Big vs Big:is_cooperative 0.01598802 0.003943
## t value Pr(>|t|)
## (Intercept) 0.293537 7.6911e-01
## factor(fans_piar_cat)Small vs Big 5.319285 1.0432e-07 ***
## factor(fans_piar_cat)Big vs Small 7.055642 1.7251e-12 ***
## factor(fans_piar_cat)Big vs Big 14.247912 < 2.2e-16 ***
## is_cooperative 3.923511 8.7305e-05 ***
## factor(gender)M -2.921249 3.4868e-03 **
## factor(gender)U -1.058656 2.8976e-01
## factor(age_range)12-17 2.338415 1.9367e-02 *
## factor(age_range)18-23 5.352916 8.6669e-08 ***
## factor(age_range)24-30 2.916161 3.5442e-03 **
## factor(age_range)31-40 2.994194 2.7521e-03 **
## factor(age_range)41-49 3.927383 8.5912e-05 ***
## factor(age_range)50+ 7.939252 2.0473e-15 ***
## factor(fre_country_region)北方 -1.744994 8.0988e-02 .
## factor(fre_country_region)南方 -1.806153 7.0896e-02 .
## factor(fre_city_level)二线城市 4.305696 1.6656e-05 ***
## factor(fre_city_level)三线城市 4.919148 8.7009e-07 ***
## factor(fre_city_level)四线城市 3.545697 3.9169e-04 ***
## factor(fre_city_level)五线城市 3.954016 7.6884e-05 ***
## factor(fre_city_level)新一线城市 4.115221 3.8701e-05 ***
## factor(fre_city_level)一线城市 4.468612 7.8784e-06 ***
## factor(year)2023 6.306117 2.8688e-10 ***
## factor(year)2024 8.857501 < 2.2e-16 ***
## factor(month)2 0.547789 5.8384e-01
## factor(month)3 0.206149 8.3667e-01
## factor(month)4 -0.758048 4.4842e-01
## factor(month)5 -0.107025 9.1477e-01
## factor(month)6 1.037535 2.9949e-01
## factor(month)7 -1.024674 3.0552e-01
## factor(month)8 0.198763 8.4245e-01
## factor(month)9 -0.830817 4.0608e-01
## factor(month)10 1.758911 7.8594e-02 .
## factor(month)11 2.282669 2.2451e-02 *
## factor(month)12 2.629407 8.5542e-03 **
## factor(day)2 1.482041 1.3833e-01
## factor(day)3 1.371589 1.7019e-01
## factor(day)4 0.488839 6.2496e-01
## factor(day)5 1.222712 2.2144e-01
## factor(day)6 1.090703 2.7541e-01
## factor(day)7 -0.184359 8.5373e-01
## factor(day)8 1.510097 1.3102e-01
## factor(day)9 0.874220 3.8200e-01
## factor(day)10 1.370363 1.7058e-01
## factor(day)11 0.811954 4.1682e-01
## factor(day)12 1.406911 1.5946e-01
## factor(day)13 -1.545001 1.2235e-01
## factor(day)14 0.096943 9.2277e-01
## factor(day)15 1.396101 1.6269e-01
## factor(day)16 -0.412282 6.8013e-01
## factor(day)17 1.535343 1.2470e-01
## factor(day)18 0.815740 4.1465e-01
## factor(day)19 0.591447 5.5422e-01
## factor(day)20 0.432173 6.6562e-01
## factor(day)21 0.134217 8.9323e-01
## factor(day)22 0.903347 3.6634e-01
## factor(day)23 -0.344053 7.3081e-01
## factor(day)24 1.246223 2.1268e-01
## factor(day)25 0.919577 3.5780e-01
## factor(day)26 1.543357 1.2275e-01
## factor(day)27 0.313405 7.5397e-01
## factor(day)28 0.006929 9.9447e-01
## factor(day)29 -0.143638 8.8579e-01
## factor(day)30 1.000435 3.1710e-01
## factor(day)31 0.608834 5.4264e-01
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.002658 9.9788e-01
## factor(fans_piar_cat)Big vs Small:is_cooperative 0.774375 4.3871e-01
## factor(fans_piar_cat)Big vs Big:is_cooperative 4.054705 5.0222e-05 ***
## ... 1 variable was removed because of collinearity (factor(age_range)UNKNOWN)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.085849 Adj. R2: 0.018912
plot_predictions_with_ci(model, data)
## `summarise()` has grouped output by 'fans_piar_cat'. You can override using the
## `.groups` argument.
### (3) plot fans
# 画图
model <- feols(log(other_already_follow_other_fans_count+1) ~ factor(fans_piar_cat)+ factor(gender) +
factor(age_range) + factor(fre_country_region) + factor(fre_city_level)
# + factor(other_gender) + factor(other_age_range) + factor(other_fre_country_region) + factor(other_fre_city_level)
+ factor(year) + factor(month) + factor(day), data = data,vcov = ~author_id)
## The variable 'factor(age_range)UNKNOWN' has been removed because of collinearity (see $collin.var).
summary(model)
## OLS estimation, Dep. Var.: log(other_already_follow_other_fans_count + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.00153853 0.004880 0.315305 7.5253e-01
## factor(fans_piar_cat)Small vs Big 0.00524951 0.000841 6.244221 4.2700e-10
## factor(fans_piar_cat)Big vs Small 0.01048445 0.001131 9.272824 < 2.2e-16
## factor(fans_piar_cat)Big vs Big 0.04012882 0.001944 20.645236 < 2.2e-16
## factor(gender)M -0.00130667 0.000397 -3.291204 9.9781e-04
## factor(gender)U -0.00452330 0.004763 -0.949583 3.4233e-01
## factor(age_range)12-17 0.00086638 0.000329 2.635800 8.3947e-03
## factor(age_range)18-23 0.00270399 0.000501 5.396423 6.8080e-08
## factor(age_range)24-30 0.00204444 0.000718 2.848084 4.3989e-03
## factor(age_range)31-40 0.00138853 0.000476 2.920005 3.5008e-03
## factor(age_range)41-49 0.00188287 0.000493 3.815446 1.3599e-04
## factor(age_range)50+ 0.00343553 0.000448 7.665953 1.7852e-14
## factor(fre_country_region)北方 -0.00821030 0.004865 -1.687497 9.1510e-02
## factor(fre_country_region)南方 -0.00849600 0.004866 -1.745861 8.0837e-02
## factor(fre_city_level)二线城市 0.00500816 0.001189 4.213629 2.5144e-05
## factor(fre_city_level)三线城市 0.00519727 0.001107 4.697014 2.6422e-06
## factor(fre_city_level)四线城市 0.00361449 0.001065 3.395416 6.8541e-04
## factor(fre_city_level)五线城市 0.00390350 0.001034 3.774530 1.6037e-04
## factor(fre_city_level)新一线城市 0.00497352 0.001243 4.002726 6.2646e-05
## factor(fre_city_level)一线城市 0.00823838 0.001876 4.391361 1.1272e-05
## factor(year)2023 0.00310038 0.000478 6.484069 8.9545e-11
## factor(year)2024 0.00452732 0.000495 9.144679 < 2.2e-16
## factor(month)2 0.00040003 0.000751 0.532999 5.9404e-01
## factor(month)3 0.00016019 0.000794 0.201801 8.4007e-01
## factor(month)4 -0.00062317 0.000758 -0.822266 4.1093e-01
## factor(month)5 -0.00020330 0.000880 -0.230944 8.1736e-01
## factor(month)6 0.00108322 0.001084 0.999445 3.1758e-01
## factor(month)7 -0.00096198 0.000835 -1.151798 2.4941e-01
## factor(month)8 0.00008333 0.000888 0.093889 9.2520e-01
## factor(month)9 -0.00087206 0.000933 -0.934756 3.4992e-01
## factor(month)10 0.00183875 0.001075 1.709972 8.7273e-02
## factor(month)11 0.00263057 0.001155 2.276769 2.2801e-02
## factor(month)12 0.00344615 0.001294 2.663552 7.7328e-03
## factor(day)2 0.00234976 0.001544 1.522308 1.2793e-01
## factor(day)3 0.00185818 0.001362 1.364650 1.7236e-01
## factor(day)4 0.00070155 0.001355 0.517683 6.0468e-01
## factor(day)5 0.00168839 0.001365 1.236993 2.1609e-01
## factor(day)6 0.00166323 0.001519 1.094870 2.7358e-01
## factor(day)7 -0.00015371 0.001261 -0.121898 9.0298e-01
## factor(day)8 0.00213295 0.001368 1.558956 1.1901e-01
## factor(day)9 0.00118769 0.001334 0.890465 3.7322e-01
## factor(day)10 0.00190738 0.001354 1.408884 1.5887e-01
## factor(day)11 0.00113122 0.001320 0.856930 3.9148e-01
## factor(day)12 0.00195012 0.001362 1.432225 1.5208e-01
## factor(day)13 -0.00173981 0.001162 -1.497857 1.3417e-01
## factor(day)14 0.00017016 0.001322 0.128719 8.9758e-01
## factor(day)15 0.00244363 0.001755 1.392328 1.6382e-01
## factor(day)16 -0.00043248 0.001170 -0.369502 7.1175e-01
## factor(day)17 0.00245019 0.001586 1.545234 1.2229e-01
## factor(day)18 0.00135609 0.001609 0.842790 3.9935e-01
## factor(day)19 0.00086318 0.001349 0.639828 5.2229e-01
## factor(day)20 0.00060483 0.001340 0.451320 6.5176e-01
## factor(day)21 0.00023780 0.001318 0.180358 8.5687e-01
## factor(day)22 0.00122529 0.001311 0.934389 3.5010e-01
## factor(day)23 -0.00038611 0.001221 -0.316322 7.5176e-01
## factor(day)24 0.00184548 0.001415 1.303771 1.9231e-01
## factor(day)25 0.00137545 0.001398 0.983960 3.2514e-01
## factor(day)26 0.00221817 0.001404 1.580208 1.1406e-01
## factor(day)27 0.00046701 0.001371 0.340619 7.3339e-01
## factor(day)28 0.00000997 0.001299 0.007672 9.9388e-01
## factor(day)29 -0.00011273 0.001406 -0.080166 9.3611e-01
## factor(day)30 0.00141234 0.001337 1.056637 2.9068e-01
## factor(day)31 0.00096783 0.001571 0.615871 5.3798e-01
##
## (Intercept)
## factor(fans_piar_cat)Small vs Big ***
## factor(fans_piar_cat)Big vs Small ***
## factor(fans_piar_cat)Big vs Big ***
## factor(gender)M ***
## factor(gender)U
## factor(age_range)12-17 **
## factor(age_range)18-23 ***
## factor(age_range)24-30 **
## factor(age_range)31-40 **
## factor(age_range)41-49 ***
## factor(age_range)50+ ***
## factor(fre_country_region)北方 .
## factor(fre_country_region)南方 .
## factor(fre_city_level)二线城市 ***
## factor(fre_city_level)三线城市 ***
## factor(fre_city_level)四线城市 ***
## factor(fre_city_level)五线城市 ***
## factor(fre_city_level)新一线城市 ***
## factor(fre_city_level)一线城市 ***
## factor(year)2023 ***
## factor(year)2024 ***
## factor(month)2
## factor(month)3
## factor(month)4
## factor(month)5
## factor(month)6
## factor(month)7
## factor(month)8
## factor(month)9
## factor(month)10 .
## factor(month)11 *
## factor(month)12 **
## factor(day)2
## factor(day)3
## factor(day)4
## factor(day)5
## factor(day)6
## factor(day)7
## factor(day)8
## factor(day)9
## factor(day)10
## factor(day)11
## factor(day)12
## factor(day)13
## factor(day)14
## factor(day)15
## factor(day)16
## factor(day)17
## factor(day)18
## factor(day)19
## factor(day)20
## factor(day)21
## factor(day)22
## factor(day)23
## factor(day)24
## factor(day)25
## factor(day)26
## factor(day)27
## factor(day)28
## factor(day)29
## factor(day)30
## factor(day)31
## ... 1 variable was removed because of collinearity (factor(age_range)UNKNOWN)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.085886 Adj. R2: 0.018104
plot_predictions_fans(model, data)
# 画图
model <- feols(log(other_already_follow_other_fans_count+1) ~ is_cooperative+ factor(gender) +
factor(age_range) + factor(fre_country_region) + factor(fre_city_level)
# + factor(other_gender) + factor(other_age_range) + factor(other_fre_country_region) + factor(other_fre_city_level)
+ factor(year) + factor(month) + factor(day), data = data,vcov = ~author_id)
## The variable 'factor(age_range)UNKNOWN' has been removed because of collinearity (see $collin.var).
summary(model)
## OLS estimation, Dep. Var.: log(other_already_follow_other_fans_count + 1)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.001900 0.004974 0.381939 7.0251e-01
## is_cooperative 0.003535 0.000460 7.681816 1.5775e-14 ***
## factor(gender)M -0.006080 0.000410 -14.841172 < 2.2e-16 ***
## factor(gender)U -0.008819 0.004874 -1.809513 7.0373e-02 .
## factor(age_range)12-17 0.001356 0.000368 3.679633 2.3365e-04 ***
## factor(age_range)18-23 0.005140 0.000541 9.498123 < 2.2e-16 ***
## factor(age_range)24-30 0.008650 0.000764 11.322517 < 2.2e-16 ***
## factor(age_range)31-40 0.007805 0.000547 14.265981 < 2.2e-16 ***
## factor(age_range)41-49 0.005695 0.000538 10.579529 < 2.2e-16 ***
## factor(age_range)50+ 0.005435 0.000483 11.256989 < 2.2e-16 ***
## factor(fre_country_region)北方 -0.009960 0.004873 -2.043919 4.0963e-02 *
## factor(fre_country_region)南方 -0.011817 0.004876 -2.423588 1.5369e-02 *
## factor(fre_city_level)二线城市 0.007535 0.000779 9.667110 < 2.2e-16 ***
## factor(fre_city_level)三线城市 0.007294 0.000622 11.718572 < 2.2e-16 ***
## factor(fre_city_level)四线城市 0.004788 0.000536 8.924414 < 2.2e-16 ***
## factor(fre_city_level)五线城市 0.004021 0.000465 8.638475 < 2.2e-16 ***
## factor(fre_city_level)新一线城市 0.007639 0.000849 8.995166 < 2.2e-16 ***
## factor(fre_city_level)一线城市 0.010818 0.001700 6.362853 1.9858e-10 ***
## factor(year)2023 0.004655 0.000499 9.337807 < 2.2e-16 ***
## factor(year)2024 0.006153 0.000518 11.877987 < 2.2e-16 ***
## factor(month)2 0.000203 0.000756 0.268995 7.8793e-01
## factor(month)3 0.000860 0.000798 1.077015 2.8148e-01
## factor(month)4 0.000018 0.000759 0.023087 9.8158e-01
## factor(month)5 0.000561 0.000888 0.631341 5.2782e-01
## factor(month)6 0.002231 0.001097 2.034574 4.1895e-02 *
## factor(month)7 0.000156 0.000837 0.185744 8.5265e-01
## factor(month)8 0.001223 0.000896 1.364614 1.7238e-01
## factor(month)9 0.000928 0.000935 0.991909 3.2124e-01
## factor(month)10 0.003433 0.001085 3.165208 1.5500e-03 **
## factor(month)11 0.004156 0.001169 3.555597 3.7723e-04 ***
## factor(month)12 0.004524 0.001309 3.457123 5.4612e-04 ***
## factor(day)2 0.001987 0.001551 1.281017 2.0019e-01
## factor(day)3 0.001597 0.001365 1.169665 2.4214e-01
## factor(day)4 0.000817 0.001357 0.602250 5.4701e-01
## factor(day)5 0.001819 0.001369 1.328598 1.8398e-01
## factor(day)6 0.002053 0.001528 1.344145 1.7890e-01
## factor(day)7 0.000062 0.001265 0.049056 9.6087e-01
## factor(day)8 0.002522 0.001377 1.831596 6.7014e-02 .
## factor(day)9 0.001261 0.001336 0.944126 3.4511e-01
## factor(day)10 0.001953 0.001360 1.436374 1.5090e-01
## factor(day)11 0.001367 0.001327 1.030169 3.0293e-01
## factor(day)12 0.002080 0.001368 1.521320 1.2818e-01
## factor(day)13 -0.001495 0.001159 -1.289779 1.9713e-01
## factor(day)14 0.000542 0.001327 0.408689 6.8277e-01
## factor(day)15 0.002585 0.001764 1.465117 1.4289e-01
## factor(day)16 -0.000289 0.001168 -0.247591 8.0445e-01
## factor(day)17 0.002647 0.001595 1.659899 9.6937e-02 .
## factor(day)18 0.001437 0.001617 0.888437 3.7431e-01
## factor(day)19 0.001261 0.001353 0.932636 3.5101e-01
## factor(day)20 0.000795 0.001344 0.591599 5.5412e-01
## factor(day)21 0.000811 0.001321 0.613593 5.3949e-01
## factor(day)22 0.001382 0.001314 1.051487 2.9304e-01
## factor(day)23 -0.000136 0.001223 -0.111366 9.1133e-01
## factor(day)24 0.002167 0.001425 1.520669 1.2834e-01
## factor(day)25 0.001504 0.001403 1.071880 2.8378e-01
## factor(day)26 0.002326 0.001409 1.650389 9.8865e-02 .
## factor(day)27 0.000755 0.001376 0.548672 5.8323e-01
## factor(day)28 0.000113 0.001301 0.086583 9.3100e-01
## factor(day)29 -0.000073 0.001408 -0.051688 9.5878e-01
## factor(day)30 0.001392 0.001343 1.036510 2.9997e-01
## factor(day)31 0.000931 0.001580 0.589410 5.5559e-01
## ... 1 variable was removed because of collinearity (factor(age_range)UNKNOWN)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.086502 Adj. R2: 0.003969
plot_predictions_cate(model, data)
### (5) plot continue cutoff
# Define the formula for the model
model_formula <- log(other_already_follow_other_fans_count+1) ~ factor(fans_piar_cat_temp)
# Call the function with the data and formula
plot_continue_cutoff(data, model_formula)
### (6) plot_fan_pair_heatmap
library(ggplot2)
library(dplyr)
library(RColorBrewer)
# Step 1: Set the ranges for the fan counts (fewer, broader bins)
data <- data %>% mutate(fans_new_range = case_when(
before_fans_count > 0 & before_fans_count <= 1000 ~ "0-1k",
before_fans_count > 1000 & before_fans_count <= 10000 ~ "1k-10k",
before_fans_count > 10000 & before_fans_count <= 100000 ~ "10k-100k",
before_fans_count > 100000 & before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">1M"
))
data <- data %>% mutate(other_fans_new_range = case_when(
other_before_fans_count > 0 & other_before_fans_count <= 1000 ~ "0-1k",
other_before_fans_count > 1000 & other_before_fans_count <= 10000 ~ "1k-10k",
other_before_fans_count > 10000 & other_before_fans_count <= 100000 ~ "10k-100k",
other_before_fans_count > 100000 & other_before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">1M"
))
# Step 2: Set the correct order for the categories
data <- data %>%
mutate(fans_new_range = factor(fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")),
other_fans_new_range = factor(other_fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")))
# Step 3: Aggregate the data to calculate the mean and confidence intervals, handling 0% cases
agg_data <- data %>%
group_by(fans_new_range, other_fans_new_range) %>%
summarize(
count = n(), # 计算每个组合的数量
Y = ifelse(count > 0, mean(log(other_already_follow_other_fans_count + 1), na.rm = TRUE), NA), # 如果有数据,计算均值;否则设为NA
ci_lower = ifelse(count > 0, Y - qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA), # 如果有数据,计算CI;否则设为NA
ci_upper = ifelse(count > 0, Y + qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA) # 如果有数据,计算CI;否则设为NA
) %>%
ungroup() %>%
mutate(percentage = count / sum(count) * 100)
## `summarise()` has grouped output by 'fans_new_range'. You can override using
## the `.groups` argument.
# Step 4: Plot the data in a heatmap-like format with custom colors and percentage labels
ggplot(agg_data, aes(x = fans_new_range, y = other_fans_new_range, fill = Y)) +
geom_tile() +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
geom_text(aes(label = paste0(round(percentage, 2), "%")), color = "black", size = 4) +
scale_fill_gradientn(colors = c("yellow", "orange", "darkorange", "red", "darkred"),
values = scales::rescale(c(0, 0.05, 0.1, 0.15, 0.2)),
#limits = c(0, 0.2),
na.value = "white") +
labs(title = "other_already_follow_other_fans_count by Fan Ranges",
x = "Fans Range",
y = "Other Fans Range",
fill = "follow_author_fans_count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
summary(data$net_attract_fans)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.25e+02 0.00e+00 0.00e+00 -1.27e-03 0.00e+00 8.60e+01
min(data$net_attract_fans)
## [1] -125
model <- feols(log(net_attract_fans+126) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative + factor(gender) +
factor(age_range) + factor(fre_country_region) + factor(fre_city_level)
# + factor(other_gender) + factor(other_age_range) + factor(other_fre_country_region) + factor(other_fre_city_level)
+ factor(year) + factor(month) + factor(day), data = data,vcov = ~author_id)
## The variable 'factor(age_range)UNKNOWN' has been removed because of collinearity (see $collin.var).
summary(model)
## OLS estimation, Dep. Var.: log(net_attract_fans + 126)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error
## (Intercept) 4.836787129 0.00030783
## factor(fans_piar_cat)Small vs Big 0.000064332 0.00003705
## factor(fans_piar_cat)Big vs Small -0.000145602 0.00003721
## factor(fans_piar_cat)Big vs Big -0.000953758 0.00057946
## is_cooperative -0.000004298 0.00000790
## factor(gender)M -0.000146977 0.00011438
## factor(gender)U -0.000498071 0.00028493
## factor(age_range)12-17 0.000019303 0.00003094
## factor(age_range)18-23 0.000035342 0.00005083
## factor(age_range)24-30 -0.000175559 0.00014196
## factor(age_range)31-40 0.000076848 0.00008668
## factor(age_range)41-49 0.000027623 0.00007550
## factor(age_range)50+ 0.000047684 0.00006751
## factor(fre_country_region)北方 -0.000343349 0.00027291
## factor(fre_country_region)南方 -0.000334491 0.00026819
## factor(fre_city_level)二线城市 -0.000203281 0.00020355
## factor(fre_city_level)三线城市 -0.000021180 0.00004802
## factor(fre_city_level)四线城市 -0.000058940 0.00004960
## factor(fre_city_level)五线城市 -0.000058706 0.00004446
## factor(fre_city_level)新一线城市 0.000021832 0.00005962
## factor(fre_city_level)一线城市 -0.000095514 0.00007105
## factor(year)2023 -0.000012890 0.00003802
## factor(year)2024 -0.000043611 0.00008842
## factor(month)2 0.000065801 0.00004677
## factor(month)3 0.000041882 0.00005152
## factor(month)4 0.000062326 0.00005560
## factor(month)5 0.000030578 0.00010085
## factor(month)6 -0.000315039 0.00033816
## factor(month)7 0.000090432 0.00006101
## factor(month)8 0.000060186 0.00005680
## factor(month)9 0.000114449 0.00006994
## factor(month)10 0.000069764 0.00007224
## factor(month)11 0.000066086 0.00007558
## factor(month)12 0.000013517 0.00008123
## factor(day)2 -0.000111483 0.00007623
## factor(day)3 -0.000025019 0.00004262
## factor(day)4 -0.000030722 0.00003881
## factor(day)5 0.000145418 0.00010342
## factor(day)6 -0.000053112 0.00004258
## factor(day)7 -0.000000858 0.00002811
## factor(day)8 0.000020911 0.00003508
## factor(day)9 -0.000019909 0.00003001
## factor(day)10 -0.000024261 0.00002857
## factor(day)11 -0.000011340 0.00003175
## factor(day)12 0.000023958 0.00003099
## factor(day)13 0.000034812 0.00002540
## factor(day)14 0.000009669 0.00003313
## factor(day)15 -0.000802282 0.00080756
## factor(day)16 0.000008048 0.00002488
## factor(day)17 -0.000254121 0.00026247
## factor(day)18 -0.000132837 0.00012747
## factor(day)19 0.000034682 0.00003508
## factor(day)20 0.000004351 0.00004693
## factor(day)21 0.000001111 0.00003070
## factor(day)22 0.000005915 0.00002947
## factor(day)23 0.000020573 0.00002680
## factor(day)24 -0.000057254 0.00004387
## factor(day)25 -0.000004302 0.00002891
## factor(day)26 -0.000032536 0.00003375
## factor(day)27 0.000016009 0.00003519
## factor(day)28 0.000022928 0.00003927
## factor(day)29 -0.000084480 0.00014724
## factor(day)30 0.000006632 0.00002976
## factor(day)31 -0.000016571 0.00004660
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.000045225 0.00005803
## factor(fans_piar_cat)Big vs Small:is_cooperative 0.000022285 0.00004617
## factor(fans_piar_cat)Big vs Big:is_cooperative 0.000883245 0.00058867
## t value Pr(>|t|)
## (Intercept) 15712.667850 < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big 1.736207 8.2529e-02 .
## factor(fans_piar_cat)Big vs Small -3.912504 9.1382e-05 ***
## factor(fans_piar_cat)Big vs Big -1.645947 9.9777e-02 .
## is_cooperative -0.544047 5.8641e-01
## factor(gender)M -1.285034 1.9878e-01
## factor(gender)U -1.748067 8.0454e-02 .
## factor(age_range)12-17 0.623793 5.3276e-01
## factor(age_range)18-23 0.695315 4.8686e-01
## factor(age_range)24-30 -1.236721 2.1619e-01
## factor(age_range)31-40 0.886568 3.7531e-01
## factor(age_range)41-49 0.365851 7.1448e-01
## factor(age_range)50+ 0.706290 4.8001e-01
## factor(fre_country_region)北方 -1.258120 2.0835e-01
## factor(fre_country_region)南方 -1.247197 2.1233e-01
## factor(fre_city_level)二线城市 -0.998664 3.1796e-01
## factor(fre_city_level)三线城市 -0.441056 6.5917e-01
## factor(fre_city_level)四线城市 -1.188233 2.3474e-01
## factor(fre_city_level)五线城市 -1.320361 1.8672e-01
## factor(fre_city_level)新一线城市 0.366181 7.1423e-01
## factor(fre_city_level)一线城市 -1.344316 1.7885e-01
## factor(year)2023 -0.339023 7.3459e-01
## factor(year)2024 -0.493203 6.2187e-01
## factor(month)2 1.406936 1.5945e-01
## factor(month)3 0.812977 4.1623e-01
## factor(month)4 1.120924 2.6232e-01
## factor(month)5 0.303214 7.6173e-01
## factor(month)6 -0.931616 3.5154e-01
## factor(month)7 1.482327 1.3826e-01
## factor(month)8 1.059561 2.8935e-01
## factor(month)9 1.636417 1.0175e-01
## factor(month)10 0.965688 3.3420e-01
## factor(month)11 0.874436 3.8188e-01
## factor(month)12 0.166417 8.6783e-01
## factor(day)2 -1.462540 1.4360e-01
## factor(day)3 -0.586964 5.5723e-01
## factor(day)4 -0.791688 4.2854e-01
## factor(day)5 1.406058 1.5971e-01
## factor(day)6 -1.247491 2.1222e-01
## factor(day)7 -0.030526 9.7565e-01
## factor(day)8 0.596125 5.5109e-01
## factor(day)9 -0.663495 5.0701e-01
## factor(day)10 -0.849278 3.9573e-01
## factor(day)11 -0.357172 7.2096e-01
## factor(day)12 0.773155 4.3943e-01
## factor(day)13 1.370580 1.7051e-01
## factor(day)14 0.291820 7.7042e-01
## factor(day)15 -0.993460 3.2049e-01
## factor(day)16 0.323473 7.4634e-01
## factor(day)17 -0.968187 3.3295e-01
## factor(day)18 -1.042066 2.9738e-01
## factor(day)19 0.988668 3.2283e-01
## factor(day)20 0.092709 9.2614e-01
## factor(day)21 0.036178 9.7114e-01
## factor(day)22 0.200706 8.4093e-01
## factor(day)23 0.767603 4.4272e-01
## factor(day)24 -1.305118 1.9185e-01
## factor(day)25 -0.148836 8.8168e-01
## factor(day)26 -0.964059 3.3502e-01
## factor(day)27 0.454888 6.4919e-01
## factor(day)28 0.583913 5.5928e-01
## factor(day)29 -0.573749 5.6614e-01
## factor(day)30 0.222840 8.2366e-01
## factor(day)31 -0.355570 7.2216e-01
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.779406 4.3574e-01
## factor(fans_piar_cat)Big vs Small:is_cooperative 0.482648 6.2935e-01
## factor(fans_piar_cat)Big vs Big:is_cooperative 1.500416 1.3351e-01
## ... 1 variable was removed because of collinearity (factor(age_range)UNKNOWN)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.012361 Adj. R2: 2.277e-4
# 画图
model <- feols(log(net_attract_fans+126) ~ factor(fans_piar_cat) + is_cooperative + factor(fans_piar_cat)*is_cooperative+ factor(gender) +
factor(age_range) + factor(fre_country_region) + factor(fre_city_level)
# + factor(other_gender) + factor(other_age_range) + factor(other_fre_country_region) + factor(other_fre_city_level)
+ factor(year) + factor(month) + factor(day), data = data,vcov = ~author_id)
## The variable 'factor(age_range)UNKNOWN' has been removed because of collinearity (see $collin.var).
summary(model)
## OLS estimation, Dep. Var.: log(net_attract_fans + 126)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error
## (Intercept) 4.836787129 0.00030783
## factor(fans_piar_cat)Small vs Big 0.000064332 0.00003705
## factor(fans_piar_cat)Big vs Small -0.000145602 0.00003721
## factor(fans_piar_cat)Big vs Big -0.000953758 0.00057946
## is_cooperative -0.000004298 0.00000790
## factor(gender)M -0.000146977 0.00011438
## factor(gender)U -0.000498071 0.00028493
## factor(age_range)12-17 0.000019303 0.00003094
## factor(age_range)18-23 0.000035342 0.00005083
## factor(age_range)24-30 -0.000175559 0.00014196
## factor(age_range)31-40 0.000076848 0.00008668
## factor(age_range)41-49 0.000027623 0.00007550
## factor(age_range)50+ 0.000047684 0.00006751
## factor(fre_country_region)北方 -0.000343349 0.00027291
## factor(fre_country_region)南方 -0.000334491 0.00026819
## factor(fre_city_level)二线城市 -0.000203281 0.00020355
## factor(fre_city_level)三线城市 -0.000021180 0.00004802
## factor(fre_city_level)四线城市 -0.000058940 0.00004960
## factor(fre_city_level)五线城市 -0.000058706 0.00004446
## factor(fre_city_level)新一线城市 0.000021832 0.00005962
## factor(fre_city_level)一线城市 -0.000095514 0.00007105
## factor(year)2023 -0.000012890 0.00003802
## factor(year)2024 -0.000043611 0.00008842
## factor(month)2 0.000065801 0.00004677
## factor(month)3 0.000041882 0.00005152
## factor(month)4 0.000062326 0.00005560
## factor(month)5 0.000030578 0.00010085
## factor(month)6 -0.000315039 0.00033816
## factor(month)7 0.000090432 0.00006101
## factor(month)8 0.000060186 0.00005680
## factor(month)9 0.000114449 0.00006994
## factor(month)10 0.000069764 0.00007224
## factor(month)11 0.000066086 0.00007558
## factor(month)12 0.000013517 0.00008123
## factor(day)2 -0.000111483 0.00007623
## factor(day)3 -0.000025019 0.00004262
## factor(day)4 -0.000030722 0.00003881
## factor(day)5 0.000145418 0.00010342
## factor(day)6 -0.000053112 0.00004258
## factor(day)7 -0.000000858 0.00002811
## factor(day)8 0.000020911 0.00003508
## factor(day)9 -0.000019909 0.00003001
## factor(day)10 -0.000024261 0.00002857
## factor(day)11 -0.000011340 0.00003175
## factor(day)12 0.000023958 0.00003099
## factor(day)13 0.000034812 0.00002540
## factor(day)14 0.000009669 0.00003313
## factor(day)15 -0.000802282 0.00080756
## factor(day)16 0.000008048 0.00002488
## factor(day)17 -0.000254121 0.00026247
## factor(day)18 -0.000132837 0.00012747
## factor(day)19 0.000034682 0.00003508
## factor(day)20 0.000004351 0.00004693
## factor(day)21 0.000001111 0.00003070
## factor(day)22 0.000005915 0.00002947
## factor(day)23 0.000020573 0.00002680
## factor(day)24 -0.000057254 0.00004387
## factor(day)25 -0.000004302 0.00002891
## factor(day)26 -0.000032536 0.00003375
## factor(day)27 0.000016009 0.00003519
## factor(day)28 0.000022928 0.00003927
## factor(day)29 -0.000084480 0.00014724
## factor(day)30 0.000006632 0.00002976
## factor(day)31 -0.000016571 0.00004660
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.000045225 0.00005803
## factor(fans_piar_cat)Big vs Small:is_cooperative 0.000022285 0.00004617
## factor(fans_piar_cat)Big vs Big:is_cooperative 0.000883245 0.00058867
## t value Pr(>|t|)
## (Intercept) 15712.667850 < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big 1.736207 8.2529e-02 .
## factor(fans_piar_cat)Big vs Small -3.912504 9.1382e-05 ***
## factor(fans_piar_cat)Big vs Big -1.645947 9.9777e-02 .
## is_cooperative -0.544047 5.8641e-01
## factor(gender)M -1.285034 1.9878e-01
## factor(gender)U -1.748067 8.0454e-02 .
## factor(age_range)12-17 0.623793 5.3276e-01
## factor(age_range)18-23 0.695315 4.8686e-01
## factor(age_range)24-30 -1.236721 2.1619e-01
## factor(age_range)31-40 0.886568 3.7531e-01
## factor(age_range)41-49 0.365851 7.1448e-01
## factor(age_range)50+ 0.706290 4.8001e-01
## factor(fre_country_region)北方 -1.258120 2.0835e-01
## factor(fre_country_region)南方 -1.247197 2.1233e-01
## factor(fre_city_level)二线城市 -0.998664 3.1796e-01
## factor(fre_city_level)三线城市 -0.441056 6.5917e-01
## factor(fre_city_level)四线城市 -1.188233 2.3474e-01
## factor(fre_city_level)五线城市 -1.320361 1.8672e-01
## factor(fre_city_level)新一线城市 0.366181 7.1423e-01
## factor(fre_city_level)一线城市 -1.344316 1.7885e-01
## factor(year)2023 -0.339023 7.3459e-01
## factor(year)2024 -0.493203 6.2187e-01
## factor(month)2 1.406936 1.5945e-01
## factor(month)3 0.812977 4.1623e-01
## factor(month)4 1.120924 2.6232e-01
## factor(month)5 0.303214 7.6173e-01
## factor(month)6 -0.931616 3.5154e-01
## factor(month)7 1.482327 1.3826e-01
## factor(month)8 1.059561 2.8935e-01
## factor(month)9 1.636417 1.0175e-01
## factor(month)10 0.965688 3.3420e-01
## factor(month)11 0.874436 3.8188e-01
## factor(month)12 0.166417 8.6783e-01
## factor(day)2 -1.462540 1.4360e-01
## factor(day)3 -0.586964 5.5723e-01
## factor(day)4 -0.791688 4.2854e-01
## factor(day)5 1.406058 1.5971e-01
## factor(day)6 -1.247491 2.1222e-01
## factor(day)7 -0.030526 9.7565e-01
## factor(day)8 0.596125 5.5109e-01
## factor(day)9 -0.663495 5.0701e-01
## factor(day)10 -0.849278 3.9573e-01
## factor(day)11 -0.357172 7.2096e-01
## factor(day)12 0.773155 4.3943e-01
## factor(day)13 1.370580 1.7051e-01
## factor(day)14 0.291820 7.7042e-01
## factor(day)15 -0.993460 3.2049e-01
## factor(day)16 0.323473 7.4634e-01
## factor(day)17 -0.968187 3.3295e-01
## factor(day)18 -1.042066 2.9738e-01
## factor(day)19 0.988668 3.2283e-01
## factor(day)20 0.092709 9.2614e-01
## factor(day)21 0.036178 9.7114e-01
## factor(day)22 0.200706 8.4093e-01
## factor(day)23 0.767603 4.4272e-01
## factor(day)24 -1.305118 1.9185e-01
## factor(day)25 -0.148836 8.8168e-01
## factor(day)26 -0.964059 3.3502e-01
## factor(day)27 0.454888 6.4919e-01
## factor(day)28 0.583913 5.5928e-01
## factor(day)29 -0.573749 5.6614e-01
## factor(day)30 0.222840 8.2366e-01
## factor(day)31 -0.355570 7.2216e-01
## factor(fans_piar_cat)Small vs Big:is_cooperative 0.779406 4.3574e-01
## factor(fans_piar_cat)Big vs Small:is_cooperative 0.482648 6.2935e-01
## factor(fans_piar_cat)Big vs Big:is_cooperative 1.500416 1.3351e-01
## ... 1 variable was removed because of collinearity (factor(age_range)UNKNOWN)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.012361 Adj. R2: 2.277e-4
plot_predictions_with_ci(model, data)
## `summarise()` has grouped output by 'fans_piar_cat'. You can override using the
## `.groups` argument.
# 画图
model <- feols(log(net_attract_fans+126) ~ factor(fans_piar_cat)+ factor(gender) +
factor(age_range) + factor(fre_country_region) + factor(fre_city_level)
# + factor(other_gender) + factor(other_age_range) + factor(other_fre_country_region) + factor(other_fre_city_level)
+ factor(year) + factor(month) + factor(day), data = data,vcov = ~author_id)
## The variable 'factor(age_range)UNKNOWN' has been removed because of collinearity (see $collin.var).
summary(model)
## OLS estimation, Dep. Var.: log(net_attract_fans + 126)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value
## (Intercept) 4.83677386 0.000304 15907.849925
## factor(fans_piar_cat)Small vs Big 0.00007893 0.000030 2.664195
## factor(fans_piar_cat)Big vs Small -0.00014005 0.000030 -4.710224
## factor(fans_piar_cat)Big vs Big -0.00058675 0.000351 -1.674045
## factor(gender)M -0.00015260 0.000118 -1.297611
## factor(gender)U -0.00048786 0.000281 -1.738129
## factor(age_range)12-17 0.00002033 0.000032 0.639144
## factor(age_range)18-23 0.00003606 0.000051 0.705980
## factor(age_range)24-30 -0.00017480 0.000142 -1.229841
## factor(age_range)31-40 0.00007904 0.000087 0.905513
## factor(age_range)41-49 0.00002898 0.000075 0.383896
## factor(age_range)50+ 0.00004623 0.000066 0.699584
## factor(fre_country_region)北方 -0.00033169 0.000270 -1.227460
## factor(fre_country_region)南方 -0.00032357 0.000267 -1.211530
## factor(fre_city_level)二线城市 -0.00019795 0.000200 -0.990980
## factor(fre_city_level)三线城市 -0.00002015 0.000047 -0.431241
## factor(fre_city_level)四线城市 -0.00005709 0.000048 -1.184916
## factor(fre_city_level)五线城市 -0.00005629 0.000043 -1.314413
## factor(fre_city_level)新一线城市 0.00002533 0.000059 0.429797
## factor(fre_city_level)一线城市 -0.00009585 0.000070 -1.368413
## factor(year)2023 -0.00001058 0.000038 -0.276676
## factor(year)2024 -0.00004187 0.000087 -0.481668
## factor(month)2 0.00006565 0.000047 1.403696
## factor(month)3 0.00004239 0.000052 0.822295
## factor(month)4 0.00006120 0.000056 1.100578
## factor(month)5 0.00002593 0.000101 0.255918
## factor(month)6 -0.00031684 0.000339 -0.934483
## factor(month)7 0.00008637 0.000061 1.421682
## factor(month)8 0.00005654 0.000057 0.997205
## factor(month)9 0.00010992 0.000070 1.561812
## factor(month)10 0.00006692 0.000073 0.921703
## factor(month)11 0.00006516 0.000076 0.861797
## factor(month)12 0.00001390 0.000081 0.171555
## factor(day)2 -0.00010922 0.000076 -1.435254
## factor(day)3 -0.00002582 0.000043 -0.604907
## factor(day)4 -0.00002906 0.000039 -0.747029
## factor(day)5 0.00014554 0.000104 1.405872
## factor(day)6 -0.00005338 0.000043 -1.255411
## factor(day)7 0.00000238 0.000028 0.085032
## factor(day)8 0.00002294 0.000035 0.652994
## factor(day)9 -0.00002038 0.000030 -0.681608
## factor(day)10 -0.00002261 0.000029 -0.790730
## factor(day)11 -0.00000953 0.000032 -0.300276
## factor(day)12 0.00002400 0.000031 0.774587
## factor(day)13 0.00003668 0.000025 1.444168
## factor(day)14 0.00001131 0.000033 0.342317
## factor(day)15 -0.00080381 0.000808 -0.994216
## factor(day)16 0.00000997 0.000025 0.404570
## factor(day)17 -0.00025415 0.000263 -0.967956
## factor(day)18 -0.00013134 0.000128 -1.029707
## factor(day)19 0.00003678 0.000035 1.049649
## factor(day)20 0.00000494 0.000047 0.105474
## factor(day)21 0.00000298 0.000031 0.096649
## factor(day)22 0.00000721 0.000029 0.244769
## factor(day)23 0.00002104 0.000027 0.785063
## factor(day)24 -0.00005397 0.000044 -1.231904
## factor(day)25 -0.00000150 0.000029 -0.052259
## factor(day)26 -0.00003065 0.000034 -0.910605
## factor(day)27 0.00001745 0.000035 0.497809
## factor(day)28 0.00002235 0.000039 0.570565
## factor(day)29 -0.00008051 0.000147 -0.548022
## factor(day)30 0.00001013 0.000030 0.340187
## factor(day)31 -0.00001656 0.000047 -0.355918
## Pr(>|t|)
## (Intercept) < 2.2e-16 ***
## factor(fans_piar_cat)Small vs Big 7.7181e-03 **
## factor(fans_piar_cat)Big vs Small 2.4766e-06 ***
## factor(fans_piar_cat)Big vs Big 9.4124e-02 .
## factor(gender)M 1.9442e-01
## factor(gender)U 8.2190e-02 .
## factor(age_range)12-17 5.2273e-01
## factor(age_range)18-23 4.8020e-01
## factor(age_range)24-30 2.1876e-01
## factor(age_range)31-40 3.6520e-01
## factor(age_range)41-49 7.0106e-01
## factor(age_range)50+ 4.8419e-01
## factor(fre_country_region)北方 2.1965e-01
## factor(fre_country_region)南方 2.2569e-01
## factor(fre_city_level)二线城市 3.2170e-01
## factor(fre_city_level)三线城市 6.6629e-01
## factor(fre_city_level)四线城市 2.3605e-01
## factor(fre_city_level)五线城市 1.8871e-01
## factor(fre_city_level)新一线城市 6.6734e-01
## factor(fre_city_level)一线城市 1.7118e-01
## factor(year)2023 7.8203e-01
## factor(year)2024 6.3004e-01
## factor(month)2 1.6041e-01
## factor(month)3 4.1091e-01
## factor(month)4 2.7108e-01
## factor(month)5 7.9801e-01
## factor(month)6 3.5006e-01
## factor(month)7 1.5512e-01
## factor(month)8 3.1867e-01
## factor(month)9 1.1833e-01
## factor(month)10 3.5669e-01
## factor(month)11 3.8880e-01
## factor(month)12 8.6379e-01
## factor(day)2 1.5122e-01
## factor(day)3 5.4524e-01
## factor(day)4 4.5505e-01
## factor(day)5 1.5976e-01
## factor(day)6 2.0933e-01
## factor(day)7 9.3224e-01
## factor(day)8 5.1376e-01
## factor(day)9 4.9549e-01
## factor(day)10 4.2910e-01
## factor(day)11 7.6397e-01
## factor(day)12 4.3858e-01
## factor(day)13 1.4869e-01
## factor(day)14 7.3211e-01
## factor(day)15 3.2012e-01
## factor(day)16 6.8579e-01
## factor(day)17 3.3307e-01
## factor(day)18 3.0315e-01
## factor(day)19 2.9388e-01
## factor(day)20 9.1600e-01
## factor(day)21 9.2301e-01
## factor(day)22 8.0664e-01
## factor(day)23 4.3242e-01
## factor(day)24 2.1799e-01
## factor(day)25 9.5832e-01
## factor(day)26 3.6250e-01
## factor(day)27 6.1862e-01
## factor(day)28 5.6830e-01
## factor(day)29 5.8368e-01
## factor(day)30 7.3372e-01
## factor(day)31 7.2190e-01
## ... 1 variable was removed because of collinearity (factor(age_range)UNKNOWN)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.012362 Adj. R2: 1.462e-4
plot_predictions_fans(model, data)
# 画图
model <- feols(log(net_attract_fans+126) ~ is_cooperative+ factor(gender) +
factor(age_range) + factor(fre_country_region) + factor(fre_city_level)
# + factor(other_gender) + factor(other_age_range) + factor(other_fre_country_region) + factor(other_fre_city_level)
+ factor(year) + factor(month) + factor(day), data = data,vcov = ~author_id)
## The variable 'factor(age_range)UNKNOWN' has been removed because of collinearity (see $collin.var).
summary(model)
## OLS estimation, Dep. Var.: log(net_attract_fans + 126)
## Observations: 187,126
## Standard-errors: Clustered (author_id)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.836730004 0.000293 16533.628359 < 2.2e-16
## is_cooperative 0.000067457 0.000050 1.339831 0.180302
## factor(gender)M -0.000086516 0.000082 -1.054719 0.291555
## factor(gender)U -0.000458895 0.000276 -1.661820 0.096551
## factor(age_range)12-17 0.000005579 0.000025 0.222043 0.824281
## factor(age_range)18-23 0.000005764 0.000036 0.158300 0.874220
## factor(age_range)24-30 -0.000252351 0.000180 -1.402768 0.160688
## factor(age_range)31-40 0.000002102 0.000052 0.040343 0.967820
## factor(age_range)41-49 -0.000011965 0.000063 -0.189060 0.850046
## factor(age_range)50+ 0.000032834 0.000060 0.543344 0.586894
## factor(fre_country_region)北方 -0.000304926 0.000267 -1.141816 0.253532
## factor(fre_country_region)南方 -0.000278103 0.000268 -1.038023 0.299261
## factor(fre_city_level)二线城市 -0.000235457 0.000219 -1.077392 0.281307
## factor(fre_city_level)三线城市 -0.000052218 0.000041 -1.272604 0.203161
## factor(fre_city_level)四线城市 -0.000076222 0.000046 -1.648460 0.099260
## factor(fre_city_level)五线城市 -0.000060191 0.000039 -1.532315 0.125447
## factor(fre_city_level)新一线城市 -0.000012239 0.000057 -0.213710 0.830774
## factor(fre_city_level)一线城市 -0.000131864 0.000070 -1.878276 0.060345
## factor(year)2023 -0.000036577 0.000038 -0.952214 0.340990
## factor(year)2024 -0.000074306 0.000104 -0.714914 0.474663
## factor(month)2 0.000068640 0.000047 1.467066 0.142360
## factor(month)3 0.000033915 0.000051 0.664349 0.506468
## factor(month)4 0.000055263 0.000056 0.992424 0.320992
## factor(month)5 0.000018413 0.000103 0.179054 0.857896
## factor(month)6 -0.000331208 0.000346 -0.956985 0.338576
## factor(month)7 0.000074303 0.000061 1.216321 0.223864
## factor(month)8 0.000043783 0.000057 0.766389 0.443446
## factor(month)9 0.000086946 0.000074 1.176491 0.239400
## factor(month)10 0.000045514 0.000077 0.594630 0.552092
## factor(month)11 0.000043209 0.000079 0.544541 0.586070
## factor(month)12 -0.000004344 0.000084 -0.051419 0.958992
## factor(day)2 -0.000106565 0.000076 -1.402253 0.160842
## factor(day)3 -0.000022840 0.000043 -0.536868 0.591360
## factor(day)4 -0.000031574 0.000039 -0.816498 0.414217
## factor(day)5 0.000141624 0.000104 1.367760 0.171389
## factor(day)6 -0.000059157 0.000043 -1.388253 0.165062
## factor(day)7 -0.000001403 0.000028 -0.050301 0.959883
## factor(day)8 0.000013671 0.000035 0.394973 0.692863
## factor(day)9 -0.000023943 0.000030 -0.807826 0.419192
## factor(day)10 -0.000024964 0.000028 -0.880103 0.378805
## factor(day)11 -0.000014723 0.000032 -0.464387 0.642371
## factor(day)12 0.000019870 0.000031 0.644974 0.518945
## factor(day)13 0.000032225 0.000025 1.283670 0.199259
## factor(day)14 0.000004407 0.000033 0.132775 0.894371
## factor(day)15 -0.000807414 0.000811 -0.996069 0.319218
## factor(day)16 0.000006524 0.000025 0.264448 0.791435
## factor(day)17 -0.000258567 0.000263 -0.981670 0.326264
## factor(day)18 -0.000133190 0.000128 -1.044235 0.296378
## factor(day)19 0.000028585 0.000035 0.821423 0.411407
## factor(day)20 0.000000613 0.000047 0.013127 0.989527
## factor(day)21 -0.000006950 0.000030 -0.229472 0.818502
## factor(day)22 0.000004020 0.000029 0.138126 0.890141
## factor(day)23 0.000015033 0.000026 0.569530 0.568997
## factor(day)24 -0.000060325 0.000044 -1.372260 0.169984
## factor(day)25 -0.000007154 0.000029 -0.248070 0.804081
## factor(day)26 -0.000033160 0.000034 -0.986930 0.323678
## factor(day)27 0.000011816 0.000035 0.335572 0.737194
## factor(day)28 0.000020410 0.000039 0.521669 0.601902
## factor(day)29 -0.000082120 0.000147 -0.558717 0.576356
## factor(day)30 0.000008498 0.000030 0.287156 0.773993
## factor(day)31 -0.000017542 0.000047 -0.375157 0.707544
##
## (Intercept) ***
## is_cooperative
## factor(gender)M
## factor(gender)U .
## factor(age_range)12-17
## factor(age_range)18-23
## factor(age_range)24-30
## factor(age_range)31-40
## factor(age_range)41-49
## factor(age_range)50+
## factor(fre_country_region)北方
## factor(fre_country_region)南方
## factor(fre_city_level)二线城市
## factor(fre_city_level)三线城市
## factor(fre_city_level)四线城市 .
## factor(fre_city_level)五线城市
## factor(fre_city_level)新一线城市
## factor(fre_city_level)一线城市 .
## factor(year)2023
## factor(year)2024
## factor(month)2
## factor(month)3
## factor(month)4
## factor(month)5
## factor(month)6
## factor(month)7
## factor(month)8
## factor(month)9
## factor(month)10
## factor(month)11
## factor(month)12
## factor(day)2
## factor(day)3
## factor(day)4
## factor(day)5
## factor(day)6
## factor(day)7
## factor(day)8
## factor(day)9
## factor(day)10
## factor(day)11
## factor(day)12
## factor(day)13
## factor(day)14
## factor(day)15
## factor(day)16
## factor(day)17
## factor(day)18
## factor(day)19
## factor(day)20
## factor(day)21
## factor(day)22
## factor(day)23
## factor(day)24
## factor(day)25
## factor(day)26
## factor(day)27
## factor(day)28
## factor(day)29
## factor(day)30
## factor(day)31
## ... 1 variable was removed because of collinearity (factor(age_range)UNKNOWN)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.012363 Adj. R2: 4.961e-7
plot_predictions_cate(model, data)
### (5) plot continue cutoff
# Define the formula for the model
model_formula <- log(net_attract_fans+126) ~ factor(fans_piar_cat_temp)
# Call the function with the data and formula
plot_continue_cutoff(data, model_formula)
library(ggplot2)
library(dplyr)
library(RColorBrewer)
# Step 1: Set the ranges for the fan counts (fewer, broader bins)
data <- data %>% mutate(fans_new_range = case_when(
before_fans_count > 0 & before_fans_count <= 1000 ~ "0-1k",
before_fans_count > 1000 & before_fans_count <= 10000 ~ "1k-10k",
before_fans_count > 10000 & before_fans_count <= 100000 ~ "10k-100k",
before_fans_count > 100000 & before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">1M"
))
data <- data %>% mutate(other_fans_new_range = case_when(
other_before_fans_count > 0 & other_before_fans_count <= 1000 ~ "0-1k",
other_before_fans_count > 1000 & other_before_fans_count <= 10000 ~ "1k-10k",
other_before_fans_count > 10000 & other_before_fans_count <= 100000 ~ "10k-100k",
other_before_fans_count > 100000 & other_before_fans_count <= 1000000 ~ "100k-1M",
TRUE ~ ">1M"
))
# Step 2: Set the correct order for the categories
data <- data %>%
mutate(fans_new_range = factor(fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")),
other_fans_new_range = factor(other_fans_new_range, levels = c("0-1k", "1k-10k", "10k-100k", "100k-1M", ">1M")))
# Step 3: Aggregate the data to calculate the mean and confidence intervals, handling 0% cases
agg_data <- data %>%
group_by(fans_new_range, other_fans_new_range) %>%
summarize(
count = n(), # 计算每个组合的数量
Y = ifelse(count > 0, mean(log(net_attract_fans+126), na.rm = TRUE), NA), # 如果有数据,计算均值;否则设为NA
ci_lower = ifelse(count > 0, Y - qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA), # 如果有数据,计算CI;否则设为NA
ci_upper = ifelse(count > 0, Y + qt(0.975, n()) * sd(Y, na.rm = TRUE) / sqrt(n()), NA) # 如果有数据,计算CI;否则设为NA
) %>%
ungroup() %>%
mutate(percentage = count / sum(count) * 100)
## `summarise()` has grouped output by 'fans_new_range'. You can override using
## the `.groups` argument.
# Step 4: Plot the data in a heatmap-like format with custom colors and percentage labels
ggplot(agg_data, aes(x = fans_new_range, y = other_fans_new_range, fill = Y)) +
geom_tile() +
geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.2) +
geom_text(aes(label = paste0(round(percentage, 2), "%")), color = "black", size = 4) +
scale_fill_gradientn(colors = c("yellow", "orange", "darkorange", "red", "darkred"),
values = scales::rescale(c(0, 0.05, 0.1, 0.15, 0.2)),
#limits = c(0, 0.2),
na.value = "white") +
labs(title = "other_already_follow_other_fans_count by Fan Ranges",
x = "Fans Range",
y = "Other Fans Range",
fill = "follow_author_fans_count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))