setwd("C:/Users/ASUS/Desktop/快手")

数据: period: 20230101 to 2024010 条件:选择前一年有超过20次invite PK的主播,在2023年的invite PK数据

1.读取并处理数据(后续无需运行)

data = read.csv("Within_Invited_PK.csv")
# 修改列名:使用sub()函数移除列名前面的"data."前缀
names <- sub("^a\\.", "", names(data))
names(data) <- names
# 有两个PK的author
data = data %>% group_by(pk_id)%>% filter(n()==2)

处理数据为包含author_id特征和other_author_id特征

colnames(data)
##  [1] "author_id"            "live_id"              "p_date"              
##  [4] "is_pk_live"           "pk_id"                "pk_type"             
##  [7] "gender"               "fans_user_num"        "fans_range"          
## [10] "fans_group_fans_num"  "author_type"          "author_category_type"
## [13] "author_income_range"  "live_duration_7d"     "play_duration_7d"    
## [16] "live_operation_tag"   "live_stream_category" "age_range"           
## [19] "fre_country_region"   "fre_city_level"       "follow_user_num"     
## [22] "reg_day_cnt"          "is_big_v"             "live_type"           
## [25] "other_author_id"
id= c("author_id","live_id","pk_id","p_date","other_author_id")

author_feature=c("gender",
                 "fans_user_num","fans_range","fans_group_fans_num",
                 "author_type","live_operation_tag","live_stream_category",
                 "author_income_range",
                 "age_range","fre_country_region","fre_city_level",
                 "follow_user_num","reg_day_cnt")
data_author=data[,c(id,author_feature)]
colnames(data_author)
##  [1] "author_id"            "live_id"              "pk_id"               
##  [4] "p_date"               "other_author_id"      "gender"              
##  [7] "fans_user_num"        "fans_range"           "fans_group_fans_num" 
## [10] "author_type"          "live_operation_tag"   "live_stream_category"
## [13] "author_income_range"  "age_range"            "fre_country_region"  
## [16] "fre_city_level"       "follow_user_num"      "reg_day_cnt"
data_other_author= data[,c("author_id","live_id","pk_id","p_date",author_feature)]
colnames(data_other_author)=c("author_id","live_id","pk_id","p_date",
                              "other_gender",
                              "other_fans_user_num","other_fans_range","other_fans_group_fans_num",
                              "other_author_type","other_live_operation_tag","other_live_stream_category",
                              "other_author_income_range",
                              "other_age_range","other_fre_country_region","other_fre_city_level",
                              "other_follow_user_num","other_reg_day_cnt")
colnames(data_other_author)
##  [1] "author_id"                  "live_id"                   
##  [3] "pk_id"                      "p_date"                    
##  [5] "other_gender"               "other_fans_user_num"       
##  [7] "other_fans_range"           "other_fans_group_fans_num" 
##  [9] "other_author_type"          "other_live_operation_tag"  
## [11] "other_live_stream_category" "other_author_income_range" 
## [13] "other_age_range"            "other_fre_country_region"  
## [15] "other_fre_city_level"       "other_follow_user_num"     
## [17] "other_reg_day_cnt"
data_all= merge(data_author,data_other_author,by.x = c("pk_id","p_date","other_author_id"),by.y= c("pk_id","p_date","author_id"),all = T)
colnames(data_all)
##  [1] "pk_id"                      "p_date"                    
##  [3] "other_author_id"            "author_id"                 
##  [5] "live_id.x"                  "gender"                    
##  [7] "fans_user_num"              "fans_range"                
##  [9] "fans_group_fans_num"        "author_type"               
## [11] "live_operation_tag"         "live_stream_category"      
## [13] "author_income_range"        "age_range"                 
## [15] "fre_country_region"         "fre_city_level"            
## [17] "follow_user_num"            "reg_day_cnt"               
## [19] "live_id.y"                  "other_gender"              
## [21] "other_fans_user_num"        "other_fans_range"          
## [23] "other_fans_group_fans_num"  "other_author_type"         
## [25] "other_live_operation_tag"   "other_live_stream_category"
## [27] "other_author_income_range"  "other_age_range"           
## [29] "other_fre_country_region"   "other_fre_city_level"      
## [31] "other_follow_user_num"      "other_reg_day_cnt"
colnames(data_all)[5]="live_id"

对每个 pk_id 组应用随机分配 0 和 1,只选取其中一个author

# 对每个 pk_id 组应用随机分配 0 和 1
data_all <- data_all %>%
  group_by(pk_id) %>%
  mutate(new_author_index = sample(c(0, 1), size = n(), replace = FALSE))

# 计算每个 author_id 被分配到1的概率
probability_of_one <- mean(data_all$new_author_index == 1)

# 只选取其中一个author
data_all=data_all[data_all$new_author_index==1,]

存储处理好的数据

# 存储处理好的数据
write.csv(data_all, file = "Within_Invited_PK_clean.csv", row.names = FALSE)

2.查看邀请PK的pair特征

data= read.csv("Within_Invited_PK_clean.csv")

2.1 查看用户author_pair分布

(1) 查看分布

table(data$author_type)
## 
##      大V 电商主播 秀场主播 游戏主播 
##       23     3855    43863       65
table(data$other_author_type)
## 
##      大V 电商主播 秀场主播 游戏主播 
##       33     3952    43739       82
data= data%>% mutate(both_author_type_pair= case_when(
  (author_type == "电商主播") & (other_author_type == "电商主播") ~1,
  (author_type == "游戏主播") & (other_author_type == "游戏主播") ~2,
  (author_type == "秀场主播") & (other_author_type == "秀场主播") ~3,
  (author_type == "电商主播") & (other_author_type == "游戏主播") ~4,
  (author_type == "游戏主播") & (other_author_type == "电商主播") ~4,
  (author_type == "电商主播") & (other_author_type == "秀场主播") ~5,
  (author_type == "秀场主播") & (other_author_type == "电商主播") ~5,
  (author_type == "游戏主播") & (other_author_type == "秀场主播") ~6,
  (author_type == "秀场主播") & (other_author_type == "游戏主播") ~6,
  TRUE~0
))
ggplot(data, aes(x = both_author_type_pair)) +
  geom_histogram(binwidth = 1)  # 绘制直方图

# 计算 both_live_operation_tag_pair 的频率并按照频率从高到低排序
frequency_table <- data %>%
  count(both_author_type_pair) %>%  # 计算频率
  arrange(desc(n))  # 按频率从高到低排序

# 显示结果
print(frequency_table)
##   both_author_type_pair     n
## 1                     3 41499
## 2                     5  4455
## 3                     1  1658
## 4                     6   135
## 5                     0    52
## 6                     2     5
## 7                     4     2

(2) 计算pair lift

# 定义计算 lift 的函数
calculate_lift <- function(author_type_1, author_type_2, data) {
  # 计算A的数量(第一个主播类型的数量)
  A <- length(data[data$author_type == author_type_1, "pk_id"])
  
  # 计算B的数量(第二个主播类型的数量)
  B <- length(data[data$other_author_type == author_type_2, "pk_id"])
  
  # 计算A和B共同出现的数量
  A_B <- length(data[(data$author_type == author_type_1) & (data$other_author_type == author_type_2), "pk_id"])
  
  # 防止除以零的情况
  if (A == 0 | B == 0) {
    lift_A_B <- NA
  } else {
    # 计算lift
    lift_A_B <- A_B / (A * B)
  }
  
  return(lift_A_B)
}

# 定义要遍历的主播类型
author_types <- c("电商主播", "秀场主播", "游戏主播")

# 初始化空的 result 数据框
lift_result_1 <- data.frame(
  Author_Type_1 = character(),
  Author_Type_2 = character(),
  Lift_A_B = numeric()
)

# 嵌套循环遍历 author_type_1 和 author_type_2 的所有组合
for (author_type_1 in author_types) {
  for (author_type_2 in author_types) {
    lift_A_B <- calculate_lift(author_type_1, author_type_2, data)
    
    # 将结果追加到 result 数据框中
    lift_result_1 <- rbind(lift_result_1, data.frame(
      Author_Type_1 = author_type_1,
      Author_Type_2 = author_type_2,
      Lift_A_B = lift_A_B
    ))
  }
}
# 查看结果
lift_result_1 %>% arrange(desc(Lift_A_B))
##   Author_Type_1 Author_Type_2     Lift_A_B
## 1      游戏主播      游戏主播 9.380863e-04
## 2      电商主播      电商主播 1.088286e-04
## 3      秀场主播      秀场主播 2.163069e-05
## 4      秀场主播      游戏主播 2.113009e-05
## 5      游戏主播      秀场主播 2.075247e-05
## 6      秀场主播      电商主播 1.316438e-05
## 7      电商主播      秀场主播 1.288744e-05
## 8      游戏主播      电商主播 3.892868e-06
## 9      电商主播      游戏主播 3.163456e-06
# AB pair 和 BA pair 进行合并
# 对 Live_Operation_Tag_1 和 Live_Operation_Tag_2 进行排序,生成无序组合的唯一标识符
lift_result_1 <- lift_result_1 %>%
  rowwise() %>%
  mutate(sorted_pair = paste(sort(c(Author_Type_1, Author_Type_2)), collapse = "_")) %>%
  ungroup()

# 根据 sorted_pair 分组,并对 Lift_A_B 进行求和
df <- lift_result_1 %>%
  group_by(sorted_pair) %>%
  summarise(Lift_A_B_sum = sum(Lift_A_B))

df %>% arrange(desc(Lift_A_B_sum))
## # A tibble: 6 × 2
##   sorted_pair       Lift_A_B_sum
##   <chr>                    <dbl>
## 1 游戏主播_游戏主播   0.000938  
## 2 电商主播_电商主播   0.000109  
## 3 秀场主播_游戏主播   0.0000419 
## 4 电商主播_秀场主播   0.0000261 
## 5 秀场主播_秀场主播   0.0000216 
## 6 电商主播_游戏主播   0.00000706

2.2 查看用户更细的live_operation_tag 分布

(1) 查看分布

table(data$live_operation_tag)
## 
##          UNKNOWN     才艺技能展示 个人爱好兴趣分享         科普教学 
##             1773             2608              233             1907 
##         媒体直播         情感互动     日常生活展示   商品售卖及推荐 
##              127             3620             4199             5544 
##         团队表演           无主题         闲聊互动             颜值 
##               20             1752            24745              993 
##         游戏直播         运动健身 
##              113              172
table(data$other_live_operation_tag)
## 
##          UNKNOWN     才艺技能展示 个人爱好兴趣分享         科普教学 
##             1755             2580              225             1884 
##         媒体直播         情感互动     日常生活展示   商品售卖及推荐 
##              160             3639             4196             5575 
##         团队表演           无主题         闲聊互动             颜值 
##               24             1804            24717              961 
##         游戏直播         运动健身 
##              118              168
unique(data$live_operation_tag)
##  [1] "商品售卖及推荐"   "闲聊互动"         "才艺技能展示"     "颜值"            
##  [5] "日常生活展示"     "无主题"           "UNKNOWN"          "情感互动"        
##  [9] "科普教学"         "个人爱好兴趣分享" "媒体直播"         "游戏直播"        
## [13] "运动健身"         "团队表演"
data <- data %>%
  mutate(both_live_operation_tag_pair = case_when(
    # 组合1
    (live_operation_tag == "才艺技能展示" & other_live_operation_tag == "个人爱好兴趣分享") ~ 1,
    (live_operation_tag == "个人爱好兴趣分享" & other_live_operation_tag == "才艺技能展示") ~ 1,

    # 组合2
    (live_operation_tag == "才艺技能展示" & other_live_operation_tag == "科普教学") ~ 2,
    (live_operation_tag == "科普教学" & other_live_operation_tag == "才艺技能展示") ~ 2,

    # 组合3
    (live_operation_tag == "才艺技能展示" & other_live_operation_tag == "媒体直播") ~ 3,
    (live_operation_tag == "媒体直播" & other_live_operation_tag == "才艺技能展示") ~ 3,

    # 组合4
    (live_operation_tag == "才艺技能展示" & other_live_operation_tag == "情感互动") ~ 4,
    (live_operation_tag == "情感互动" & other_live_operation_tag == "才艺技能展示") ~ 4,

    # 组合5
    (live_operation_tag == "才艺技能展示" & other_live_operation_tag == "日常生活展示") ~ 5,
    (live_operation_tag == "日常生活展示" & other_live_operation_tag == "才艺技能展示") ~ 5,

    # 组合6
    (live_operation_tag == "才艺技能展示" & other_live_operation_tag == "商品售卖及推荐") ~ 6,
    (live_operation_tag == "商品售卖及推荐" & other_live_operation_tag == "才艺技能展示") ~ 6,

    # 组合7
    (live_operation_tag == "才艺技能展示" & other_live_operation_tag == "团队表演") ~ 7,
    (live_operation_tag == "团队表演" & other_live_operation_tag == "才艺技能展示") ~ 7,

    # 组合8
    (live_operation_tag == "才艺技能展示" & other_live_operation_tag == "闲聊互动") ~ 8,
    (live_operation_tag == "闲聊互动" & other_live_operation_tag == "才艺技能展示") ~ 8,

    # 组合9
    (live_operation_tag == "才艺技能展示" & other_live_operation_tag == "颜值") ~ 9,
    (live_operation_tag == "颜值" & other_live_operation_tag == "才艺技能展示") ~ 9,

    # 组合10
    (live_operation_tag == "才艺技能展示" & other_live_operation_tag == "游戏直播") ~ 10,
    (live_operation_tag == "游戏直播" & other_live_operation_tag == "才艺技能展示") ~ 10,

    # 组合11
    (live_operation_tag == "才艺技能展示" & other_live_operation_tag == "运动健身") ~ 11,
    (live_operation_tag == "运动健身" & other_live_operation_tag == "才艺技能展示") ~ 11,

    # 组合12
    (live_operation_tag == "个人爱好兴趣分享" & other_live_operation_tag == "科普教学") ~ 12,
    (live_operation_tag == "科普教学" & other_live_operation_tag == "个人爱好兴趣分享") ~ 12,

    # 组合13
    (live_operation_tag == "个人爱好兴趣分享" & other_live_operation_tag == "媒体直播") ~ 13,
    (live_operation_tag == "媒体直播" & other_live_operation_tag == "个人爱好兴趣分享") ~ 13,

    # 组合14
    (live_operation_tag == "个人爱好兴趣分享" & other_live_operation_tag == "情感互动") ~ 14,
    (live_operation_tag == "情感互动" & other_live_operation_tag == "个人爱好兴趣分享") ~ 14,

    # 组合15
    (live_operation_tag == "个人爱好兴趣分享" & other_live_operation_tag == "日常生活展示") ~ 15,
    (live_operation_tag == "日常生活展示" & other_live_operation_tag == "个人爱好兴趣分享") ~ 15,

    # 组合16
    (live_operation_tag == "个人爱好兴趣分享" & other_live_operation_tag == "商品售卖及推荐") ~ 16,
    (live_operation_tag == "商品售卖及推荐" & other_live_operation_tag == "个人爱好兴趣分享") ~ 16,

    # 组合17
    (live_operation_tag == "个人爱好兴趣分享" & other_live_operation_tag == "团队表演") ~ 17,
    (live_operation_tag == "团队表演" & other_live_operation_tag == "个人爱好兴趣分享") ~ 17,

    # 组合18
    (live_operation_tag == "个人爱好兴趣分享" & other_live_operation_tag == "闲聊互动") ~ 18,
    (live_operation_tag == "闲聊互动" & other_live_operation_tag == "个人爱好兴趣分享") ~ 18,

    # 组合19
    (live_operation_tag == "个人爱好兴趣分享" & other_live_operation_tag == "颜值") ~ 19,
    (live_operation_tag == "颜值" & other_live_operation_tag == "个人爱好兴趣分享") ~ 19,

    # 组合20
    (live_operation_tag == "个人爱好兴趣分享" & other_live_operation_tag == "游戏直播") ~ 20,
    (live_operation_tag == "游戏直播" & other_live_operation_tag == "个人爱好兴趣分享") ~ 20,

    # 组合21
    (live_operation_tag == "个人爱好兴趣分享" & other_live_operation_tag == "运动健身") ~ 21,
    (live_operation_tag == "运动健身" & other_live_operation_tag == "个人爱好兴趣分享") ~ 21,

    # 组合22
    (live_operation_tag == "科普教学" & other_live_operation_tag == "媒体直播") ~ 22,
    (live_operation_tag == "媒体直播" & other_live_operation_tag == "科普教学") ~ 22,

    # 组合23
    (live_operation_tag == "科普教学" & other_live_operation_tag == "情感互动") ~ 23,
    (live_operation_tag == "情感互动" & other_live_operation_tag == "科普教学") ~ 23,

    # 组合24
    (live_operation_tag == "科普教学" & other_live_operation_tag == "日常生活展示") ~ 24,
    (live_operation_tag == "日常生活展示" & other_live_operation_tag == "科普教学") ~ 24,

    # 组合25
    (live_operation_tag == "科普教学" & other_live_operation_tag == "商品售卖及推荐") ~ 25,
    (live_operation_tag == "商品售卖及推荐" & other_live_operation_tag == "科普教学") ~ 25,

    # 组合26
    (live_operation_tag == "科普教学" & other_live_operation_tag == "团队表演") ~ 26,
    (live_operation_tag == "团队表演" & other_live_operation_tag == "科普教学") ~ 26,

    # 组合27
    (live_operation_tag == "科普教学" & other_live_operation_tag == "闲聊互动") ~ 27,
    (live_operation_tag == "闲聊互动" & other_live_operation_tag == "科普教学") ~ 27,

    # 组合28
    (live_operation_tag == "科普教学" & other_live_operation_tag == "颜值") ~ 28,
    (live_operation_tag == "颜值" & other_live_operation_tag == "科普教学") ~ 28,

    # 组合29
    (live_operation_tag == "科普教学" & other_live_operation_tag == "游戏直播") ~ 29,
    (live_operation_tag == "游戏直播" & other_live_operation_tag == "科普教学") ~ 29,

    # 组合30
    (live_operation_tag == "科普教学" & other_live_operation_tag == "运动健身") ~ 30,
    (live_operation_tag == "运动健身" & other_live_operation_tag == "科普教学") ~ 30,

    # 组合31
    (live_operation_tag == "媒体直播" & other_live_operation_tag == "情感互动") ~ 31,
    (live_operation_tag == "情感互动" & other_live_operation_tag == "媒体直播") ~ 31,

    # 组合32
    (live_operation_tag == "媒体直播" & other_live_operation_tag == "日常生活展示") ~ 32,
    (live_operation_tag == "日常生活展示" & other_live_operation_tag == "媒体直播") ~ 32,

    # 组合33
    (live_operation_tag == "媒体直播" & other_live_operation_tag == "商品售卖及推荐") ~ 33,
    (live_operation_tag == "商品售卖及推荐" & other_live_operation_tag == "媒体直播") ~ 33,

    # 组合34
    (live_operation_tag == "媒体直播" & other_live_operation_tag == "团队表演") ~ 34,
    (live_operation_tag == "团队表演" & other_live_operation_tag == "媒体直播") ~ 34,

    # 组合35
    (live_operation_tag == "媒体直播" & other_live_operation_tag == "闲聊互动") ~ 35,
    (live_operation_tag == "闲聊互动" & other_live_operation_tag == "媒体直播") ~ 35,

    # 组合36
    (live_operation_tag == "媒体直播" & other_live_operation_tag == "颜值") ~ 36,
    (live_operation_tag == "颜值" & other_live_operation_tag == "媒体直播") ~ 36,

    # 组合37
    (live_operation_tag == "媒体直播" & other_live_operation_tag == "游戏直播") ~ 37,
    (live_operation_tag == "游戏直播" & other_live_operation_tag == "媒体直播") ~ 37,

    # 组合38
    (live_operation_tag == "媒体直播" & other_live_operation_tag == "运动健身") ~ 38,
    (live_operation_tag == "运动健身" & other_live_operation_tag == "媒体直播") ~ 38,

    # 组合39
    (live_operation_tag == "情感互动" & other_live_operation_tag == "日常生活展示") ~ 39,
    (live_operation_tag == "日常生活展示" & other_live_operation_tag == "情感互动") ~ 39,

    # 组合40
    (live_operation_tag == "情感互动" & other_live_operation_tag == "商品售卖及推荐") ~ 40,
    (live_operation_tag == "商品售卖及推荐" & other_live_operation_tag == "情感互动") ~ 40,

    # 组合41
    (live_operation_tag == "情感互动" & other_live_operation_tag == "团队表演") ~ 41,
    (live_operation_tag == "团队表演" & other_live_operation_tag == "情感互动") ~ 41,

    # 组合42
    (live_operation_tag == "情感互动" & other_live_operation_tag == "闲聊互动") ~ 42,
    (live_operation_tag == "闲聊互动" & other_live_operation_tag == "情感互动") ~ 42,

    # 组合43
    (live_operation_tag == "情感互动" & other_live_operation_tag == "颜值") ~ 43,
    (live_operation_tag == "颜值" & other_live_operation_tag == "情感互动") ~ 43,

    # 组合44
    (live_operation_tag == "情感互动" & other_live_operation_tag == "游戏直播") ~ 44,
    (live_operation_tag == "游戏直播" & other_live_operation_tag == "情感互动") ~ 44,

    # 组合45
    (live_operation_tag == "情感互动" & other_live_operation_tag == "运动健身") ~ 45,
    (live_operation_tag == "运动健身" & other_live_operation_tag == "情感互动") ~ 45,

    # 组合46
    (live_operation_tag == "日常生活展示" & other_live_operation_tag == "商品售卖及推荐") ~ 46,
    (live_operation_tag == "商品售卖及推荐" & other_live_operation_tag == "日常生活展示") ~ 46,

    # 组合47
    (live_operation_tag == "日常生活展示" & other_live_operation_tag == "团队表演") ~ 47,
    (live_operation_tag == "团队表演" & other_live_operation_tag == "日常生活展示") ~ 47,

    # 组合48
    (live_operation_tag == "日常生活展示" & other_live_operation_tag == "闲聊互动") ~ 48,
    (live_operation_tag == "闲聊互动" & other_live_operation_tag == "日常生活展示") ~ 48,

    # 组合49
    (live_operation_tag == "日常生活展示" & other_live_operation_tag == "颜值") ~ 49,
    (live_operation_tag == "颜值" & other_live_operation_tag == "日常生活展示") ~ 49,

    # 组合50
    (live_operation_tag == "日常生活展示" & other_live_operation_tag == "游戏直播") ~ 50,
    (live_operation_tag == "游戏直播" & other_live_operation_tag == "日常生活展示") ~ 50,

    # 组合51
    (live_operation_tag == "日常生活展示" & other_live_operation_tag == "运动健身") ~ 51,
    (live_operation_tag == "运动健身" & other_live_operation_tag == "日常生活展示") ~ 51,

    # 组合52
    (live_operation_tag == "商品售卖及推荐" & other_live_operation_tag == "团队表演") ~ 52,
    (live_operation_tag == "团队表演" & other_live_operation_tag == "商品售卖及推荐") ~ 52,

    # 组合53
    (live_operation_tag == "商品售卖及推荐" & other_live_operation_tag == "闲聊互动") ~ 53,
    (live_operation_tag == "闲聊互动" & other_live_operation_tag == "商品售卖及推荐") ~ 53,

    # 组合54
    (live_operation_tag == "商品售卖及推荐" & other_live_operation_tag == "颜值") ~ 54,
    (live_operation_tag == "颜值" & other_live_operation_tag == "商品售卖及推荐") ~ 54,

    # 组合55
    (live_operation_tag == "商品售卖及推荐" & other_live_operation_tag == "游戏直播") ~ 55,
    (live_operation_tag == "游戏直播" & other_live_operation_tag == "商品售卖及推荐") ~ 55,

    # 组合56
    (live_operation_tag == "商品售卖及推荐" & other_live_operation_tag == "运动健身") ~ 56,
    (live_operation_tag == "运动健身" & other_live_operation_tag == "商品售卖及推荐") ~ 56,

    # 组合57
    (live_operation_tag == "团队表演" & other_live_operation_tag == "闲聊互动") ~ 57,
    (live_operation_tag == "闲聊互动" & other_live_operation_tag == "团队表演") ~ 57,

    # 组合58
    (live_operation_tag == "团队表演" & other_live_operation_tag == "颜值") ~ 58,
    (live_operation_tag == "颜值" & other_live_operation_tag == "团队表演") ~ 58,

    # 组合59
    (live_operation_tag == "团队表演" & other_live_operation_tag == "游戏直播") ~ 59,
    (live_operation_tag == "游戏直播" & other_live_operation_tag == "团队表演") ~ 59,

    # 组合60
    (live_operation_tag == "团队表演" & other_live_operation_tag == "运动健身") ~ 60,
    (live_operation_tag == "运动健身" & other_live_operation_tag == "团队表演") ~ 60,

    # 组合61
    (live_operation_tag == "闲聊互动" & other_live_operation_tag == "颜值") ~ 61,
    (live_operation_tag == "颜值" & other_live_operation_tag == "闲聊互动") ~ 61,

    # 组合62
    (live_operation_tag == "闲聊互动" & other_live_operation_tag == "游戏直播") ~ 62,
    (live_operation_tag == "游戏直播" & other_live_operation_tag == "闲聊互动") ~ 62,

    # 组合63
    (live_operation_tag == "闲聊互动" & other_live_operation_tag == "运动健身") ~ 63,
    (live_operation_tag == "运动健身" & other_live_operation_tag == "闲聊互动") ~ 63,

    # 组合64
    (live_operation_tag == "颜值" & other_live_operation_tag == "游戏直播") ~ 64,
    (live_operation_tag == "游戏直播" & other_live_operation_tag == "颜值") ~ 64,

    # 组合65
    (live_operation_tag == "颜值" & other_live_operation_tag == "运动健身") ~ 65,
    (live_operation_tag == "运动健身" & other_live_operation_tag == "颜值") ~ 65,

    # 组合66
    (live_operation_tag == "游戏直播" & other_live_operation_tag == "运动健身") ~ 66,
    (live_operation_tag == "运动健身" & other_live_operation_tag == "游戏直播") ~ 66,

    TRUE ~ 0  # 其他未指定的组合默认值为0
  ))
library(ggplot2)

ggplot(data, aes(x = both_live_operation_tag_pair)) +
  geom_histogram(binwidth = 1, fill = "blue", color = "black") +  # 增加颜色填充和边框
  scale_x_continuous(breaks = seq(min(data$both_live_operation_tag_pair), max(data$both_live_operation_tag_pair), by = 3)) +  # 设置横轴刻度
  labs(x = "Live Operation Tag Pair", y = "Frequency", title = "Distribution of Live Operation Tag Pairs") +  # 设置轴标签和标题
  theme_minimal()  # 使用简单的主题

table(data$both_live_operation_tag_pair)
## 
##     0     1     2     3     4     5     6     7     8     9    10    11    12 
## 34225     7    16     1    75   242    96     2  1800    37    14    23    25 
##    13    14    15    16    18    22    23    24    25    27    28    30    31 
##     8    36    24    22   187     8    68   206   764   491   130    35     7 
##    32    33    35    39    40    42    43    44    45    46    47    48    49 
##    37    69   130   174   701  1385     8     2     1   655     6  3355    20 
##    50    51    52    53    54    55    56    57    58    61    62    63    64 
##    13    12    16  1598    41    15    20    16     1   795   146    39     2
# 计算 both_live_operation_tag_pair 的频率并按照频率从高到低排序
frequency_table <- data %>%
  count(both_live_operation_tag_pair) %>%  # 计算频率
  arrange(desc(n))  # 按频率从高到低排序

# 显示结果
print(frequency_table)
##    both_live_operation_tag_pair     n
## 1                             0 34225
## 2                            48  3355
## 3                             8  1800
## 4                            53  1598
## 5                            42  1385
## 6                            61   795
## 7                            25   764
## 8                            40   701
## 9                            46   655
## 10                           27   491
## 11                            5   242
## 12                           24   206
## 13                           18   187
## 14                           39   174
## 15                           62   146
## 16                           28   130
## 17                           35   130
## 18                            6    96
## 19                            4    75
## 20                           33    69
## 21                           23    68
## 22                           54    41
## 23                           63    39
## 24                            9    37
## 25                           32    37
## 26                           14    36
## 27                           30    35
## 28                           12    25
## 29                           15    24
## 30                           11    23
## 31                           16    22
## 32                           49    20
## 33                           56    20
## 34                            2    16
## 35                           52    16
## 36                           57    16
## 37                           55    15
## 38                           10    14
## 39                           50    13
## 40                           51    12
## 41                           13     8
## 42                           22     8
## 43                           43     8
## 44                            1     7
## 45                           31     7
## 46                           47     6
## 47                            7     2
## 48                           44     2
## 49                           64     2
## 50                            3     1
## 51                           45     1
## 52                           58     1

(2) 计算pair lift

# 定义计算 lift 的函数
calculate_lift <- function(live_operation_tag_1, live_operation_tag_2, data) {
  # 计算A的数量(第一个类别的数量)
  A <- length(data[data$live_operation_tag == live_operation_tag_1, "pk_id"])
  
  # 计算B的数量(第二个类别的数量)
  B <- length(data[data$other_live_operation_tag == live_operation_tag_2, "pk_id"])
  
  # 计算A和B共同出现的数量
  A_B <- length(data[(data$live_operation_tag == live_operation_tag_1) & (data$other_live_operation_tag == live_operation_tag_2), "pk_id"])
  
  # 防止除以零的情况
  if (A == 0 | B == 0) {
    lift_A_B <- NA
  } else {
    # 计算lift
    lift_A_B <- A_B / (A * B)
  }
  
  return(lift_A_B)
}

# 定义要遍历的类别
live_operation_tags <- c(
  "商品售卖及推荐", "闲聊互动", "才艺技能展示", "颜值", "日常生活展示", "情感互动", "科普教学", "个人爱好兴趣分享", "游戏直播", "媒体直播", 
  "运动健身", "团队表演"
)

# 初始化空的 result 数据框
lift_result_2 <- data.frame(
  Live_Operation_Tag_1 = character(),
  Live_Operation_Tag_2 = character(),
  Lift_A_B = numeric()
)

# 嵌套循环遍历 live_operation_tag_1 和 live_operation_tag_2 的所有组合
for (live_operation_tag_1 in live_operation_tags) {
  for (live_operation_tag_2 in live_operation_tags) {
    lift_A_B <- calculate_lift(live_operation_tag_1, live_operation_tag_2, data)
    
    # 将结果追加到 result 数据框中
    lift_result_2 <- rbind(lift_result_2, data.frame(
      Live_Operation_Tag_1 = live_operation_tag_1,
      Live_Operation_Tag_2 = live_operation_tag_2,
      Lift_A_B = lift_A_B
    ))
  }
}
# 查看结果
lift_result_2 %>% arrange(desc(Lift_A_B))
##     Live_Operation_Tag_1 Live_Operation_Tag_2     Lift_A_B
## 1               运动健身             运动健身 3.356866e-03
## 2       个人爱好兴趣分享     个人爱好兴趣分享 1.068193e-03
## 3               游戏直播             游戏直播 5.999700e-04
## 4                   颜值                 颜值 4.464131e-04
## 5               科普教学             科普教学 2.624703e-04
## 6               媒体直播             媒体直播 2.460630e-04
## 7               媒体直播     个人爱好兴趣分享 2.099738e-04
## 8           才艺技能展示         才艺技能展示 1.880023e-04
## 9               情感互动             情感互动 1.667023e-04
## 10        商品售卖及推荐       商品售卖及推荐 1.089045e-04
## 11          日常生活展示         日常生活展示 9.001644e-05
## 12        商品售卖及推荐             团队表演 7.515633e-05
## 13              运动健身             科普教学 5.554733e-05
## 14              团队表演       商品售卖及推荐 5.381166e-05
## 15      个人爱好兴趣分享             媒体直播 5.364807e-05
## 16              科普教学             运动健身 5.306265e-05
## 17        商品售卖及推荐             媒体直播 4.960317e-05
## 18                  颜值             团队表演 4.196039e-05
## 19              科普教学                 颜值 3.874220e-05
## 20        商品售卖及推荐             科普教学 3.753029e-05
## 21              团队表演         日常生活展示 3.574833e-05
## 22              媒体直播       商品售卖及推荐 3.530949e-05
## 23              科普教学       商品售卖及推荐 3.499028e-05
## 24          才艺技能展示             团队表演 3.195297e-05
## 25              媒体直播         日常生活展示 3.190140e-05
## 26                  颜值             科普教学 3.153711e-05
## 27          日常生活展示             媒体直播 2.976899e-05
## 28          日常生活展示             团队表演 2.976899e-05
## 29      个人爱好兴趣分享             科普教学 2.961465e-05
## 30              闲聊互动             闲聊互动 2.960324e-05
## 31          才艺技能展示             游戏直播 2.924509e-05
## 32              科普教学     个人爱好兴趣分享 2.796714e-05
## 33          才艺技能展示             运动健身 2.738826e-05
## 34              游戏直播             闲聊互动 2.613657e-05
## 35              闲聊互动             游戏直播 2.500077e-05
## 36              运动健身         才艺技能展示 2.478817e-05
## 37              情感互动     个人爱好兴趣分享 2.332719e-05
## 38          日常生活展示             游戏直播 2.018237e-05
## 39      个人爱好兴趣分享             情感互动 2.004984e-05
## 40              媒体直播             闲聊互动 1.879544e-05
## 41              闲聊互动             媒体直播 1.793292e-05
## 42        商品售卖及推荐             情感互动 1.779464e-05
## 43              游戏直播         才艺技能展示 1.715031e-05
## 44                  颜值             闲聊互动 1.711214e-05
## 45                  颜值             游戏直播 1.706863e-05
## 46              情感互动       商品售卖及推荐 1.694621e-05
## 47              闲聊互动     个人爱好兴趣分享 1.688332e-05
## 48              媒体直播             科普教学 1.671766e-05
## 49          日常生活展示             闲聊互动 1.625449e-05
## 50              团队表演             闲聊互动 1.618319e-05
## 51      个人爱好兴趣分享             闲聊互动 1.614847e-05
## 52              闲聊互动         日常生活展示 1.606472e-05
## 53              闲聊互动                 颜值 1.576959e-05
## 54              科普教学         日常生活展示 1.487171e-05
## 55          才艺技能展示             闲聊互动 1.420994e-05
## 56        商品售卖及推荐         日常生活展示 1.405688e-05
## 57          日常生活展示       商品售卖及推荐 1.401145e-05
## 58      个人爱好兴趣分享       商品售卖及推荐 1.385708e-05
## 59              闲聊互动         才艺技能展示 1.384666e-05
## 60              闲聊互动             团队表演 1.347073e-05
## 61              科普教学             媒体直播 1.310960e-05
## 62          日常生活展示     个人爱好兴趣分享 1.270144e-05
## 63              游戏直播       商品售卖及推荐 1.269892e-05
## 64      个人爱好兴趣分享         日常生活展示 1.227411e-05
## 65          日常生活展示         才艺技能展示 1.209221e-05
## 66        商品售卖及推荐             运动健身 1.181028e-05
## 67          日常生活展示             科普教学 1.099746e-05
## 68        商品售卖及推荐             游戏直播 1.070022e-05
## 69          才艺技能展示         日常生活展示 1.014331e-05
## 70          日常生活展示             运动健身 9.922998e-06
## 71              运动健身       商品售卖及推荐 9.385755e-06
## 72                  颜值         才艺技能展示 8.977572e-06
## 73              情感互动             媒体直播 8.632597e-06
## 74              情感互动             闲聊互动 7.812191e-06
## 75              闲聊互动             情感互动 7.618239e-06
## 76              运动健身         日常生活展示 6.927971e-06
## 77      个人爱好兴趣分享         才艺技能展示 6.654024e-06
## 78              游戏直播         日常生活展示 6.327138e-06
## 79              闲聊互动       商品售卖及推荐 5.987530e-06
## 80              情感互动         日常生活展示 5.727633e-06
## 81          日常生活展示             情感互动 5.693657e-06
## 82        商品售卖及推荐             闲聊互动 5.633760e-06
## 83          才艺技能展示                 颜值 5.585950e-06
## 84              闲聊互动             科普教学 5.362554e-06
## 85              科普教学             情感互动 5.331740e-06
## 86              科普教学             闲聊互动 5.112939e-06
## 87          才艺技能展示     个人爱好兴趣分享 5.112474e-06
## 88              运动健身             闲聊互动 4.939638e-06
## 89              情感互动             科普教学 4.545401e-06
## 90              闲聊互动             运动健身 4.329879e-06
## 91              媒体直播             情感互动 4.327571e-06
## 92          才艺技能展示             情感互动 4.109367e-06
## 93        商品售卖及推荐                 颜值 3.941601e-06
## 94              情感互动         才艺技能展示 3.854555e-06
## 95          才艺技能展示       商品售卖及推荐 3.782773e-06
## 96                  颜值       商品售卖及推荐 3.612733e-06
## 97          日常生活展示                 颜值 3.221618e-06
## 98        商品售卖及推荐     个人爱好兴趣分享 3.206670e-06
## 99        商品售卖及推荐         才艺技能展示 2.866427e-06
## 100             游戏直播             情感互动 2.431865e-06
## 101         才艺技能展示             媒体直播 2.396472e-06
## 102             情感互动             游戏直播 2.341043e-06
## 103                 颜值         日常生活展示 1.680016e-06
## 104             情感互动             运动健身 1.644304e-06
## 105         才艺技能展示             科普教学 1.628177e-06
## 106             科普教学         才艺技能展示 1.625996e-06
## 107             情感互动                 颜值 1.437269e-06
## 108                 颜值             情感互动 8.302138e-07
## 109                 颜值     个人爱好兴趣分享 0.000000e+00
## 110                 颜值             媒体直播 0.000000e+00
## 111                 颜值             运动健身 0.000000e+00
## 112             情感互动             团队表演 0.000000e+00
## 113             科普教学             游戏直播 0.000000e+00
## 114             科普教学             团队表演 0.000000e+00
## 115     个人爱好兴趣分享                 颜值 0.000000e+00
## 116     个人爱好兴趣分享             游戏直播 0.000000e+00
## 117     个人爱好兴趣分享             运动健身 0.000000e+00
## 118     个人爱好兴趣分享             团队表演 0.000000e+00
## 119             游戏直播                 颜值 0.000000e+00
## 120             游戏直播             科普教学 0.000000e+00
## 121             游戏直播     个人爱好兴趣分享 0.000000e+00
## 122             游戏直播             媒体直播 0.000000e+00
## 123             游戏直播             运动健身 0.000000e+00
## 124             游戏直播             团队表演 0.000000e+00
## 125             媒体直播         才艺技能展示 0.000000e+00
## 126             媒体直播                 颜值 0.000000e+00
## 127             媒体直播             游戏直播 0.000000e+00
## 128             媒体直播             运动健身 0.000000e+00
## 129             媒体直播             团队表演 0.000000e+00
## 130             运动健身                 颜值 0.000000e+00
## 131             运动健身             情感互动 0.000000e+00
## 132             运动健身     个人爱好兴趣分享 0.000000e+00
## 133             运动健身             游戏直播 0.000000e+00
## 134             运动健身             媒体直播 0.000000e+00
## 135             运动健身             团队表演 0.000000e+00
## 136             团队表演         才艺技能展示 0.000000e+00
## 137             团队表演                 颜值 0.000000e+00
## 138             团队表演             情感互动 0.000000e+00
## 139             团队表演             科普教学 0.000000e+00
## 140             团队表演     个人爱好兴趣分享 0.000000e+00
## 141             团队表演             游戏直播 0.000000e+00
## 142             团队表演             媒体直播 0.000000e+00
## 143             团队表演             运动健身 0.000000e+00
## 144             团队表演             团队表演 0.000000e+00
# AB pair 和 BA pair 进行合并
# 对 Live_Operation_Tag_1 和 Live_Operation_Tag_2 进行排序,生成无序组合的唯一标识符
lift_result_2 <- lift_result_2 %>%
  rowwise() %>%
  mutate(sorted_pair = paste(sort(c(Live_Operation_Tag_1, Live_Operation_Tag_2)), collapse = "_")) %>%
  ungroup()

# 根据 sorted_pair 分组,并对 Lift_A_B 进行求和
df <- lift_result_2 %>%
  group_by(sorted_pair) %>%
  summarise(Lift_A_B_sum = sum(Lift_A_B))

df %>% arrange(desc(Lift_A_B_sum))
## # A tibble: 78 × 2
##    sorted_pair                       Lift_A_B_sum
##    <chr>                                    <dbl>
##  1 运动健身_运动健身                     0.00336 
##  2 个人爱好兴趣分享_个人爱好兴趣分享     0.00107 
##  3 游戏直播_游戏直播                     0.000600
##  4 颜值_颜值                             0.000446
##  5 个人爱好兴趣分享_媒体直播             0.000264
##  6 科普教学_科普教学                     0.000262
##  7 媒体直播_媒体直播                     0.000246
##  8 才艺技能展示_才艺技能展示             0.000188
##  9 情感互动_情感互动                     0.000167
## 10 商品售卖及推荐_团队表演               0.000129
## # ℹ 68 more rows

2.3.查看邀请PK的pair fans 特征

(1) 查看分布

table(data$fans_range)
## 
##      0-100     100-1k     1000w+ 100w-1000w   10w-100w      1k-1w     1w-10w 
##         91       1816         23        787       7811      15048      22230
table(data$other_fans_range)
## 
##      0-100     100-1k     1000w+ 100w-1000w   10w-100w      1k-1w     1w-10w 
##        109       1781         33        776       8000      15026      22081
data = data %>% mutate(fans_pair = case_when(
   # 组合1:粉丝范围在 0-100
   (fans_range == "0-100" & other_fans_range == "0-100") ~ 1,
   (fans_range == "0-100" & other_fans_range == "100-1k") ~ 2,
   (fans_range == "100-1k" & other_fans_range == "0-100") ~ 2,
   (fans_range == "0-100" & other_fans_range == "1k-1w") ~ 3,
   (fans_range == "1k-1w" & other_fans_range == "0-100") ~ 3,
   (fans_range == "0-100" & other_fans_range == "1w-10w") ~ 4,
   (fans_range == "1w-10w" & other_fans_range == "0-100") ~ 4,
   (fans_range == "0-100" & other_fans_range == "10w-100w") ~ 5,
   (fans_range == "10w-100w" & other_fans_range == "0-100") ~ 5,
   (fans_range == "0-100" & other_fans_range == "100w-1000w") ~ 6,
   (fans_range == "100w-1000w" & other_fans_range == "0-100") ~ 6,

   # 组合2:粉丝范围在 100-1k
   (fans_range == "100-1k" & other_fans_range == "100-1k") ~ 7,
   (fans_range == "100-1k" & other_fans_range == "1k-1w") ~ 8,
   (fans_range == "1k-1w" & other_fans_range == "100-1k") ~ 8,
   (fans_range == "100-1k" & other_fans_range == "1w-10w") ~ 9,
   (fans_range == "1w-10w" & other_fans_range == "100-1k") ~ 9,
   (fans_range == "100-1k" & other_fans_range == "10w-100w") ~ 10,
   (fans_range == "10w-100w" & other_fans_range == "100-1k") ~ 10,
   (fans_range == "100-1k" & other_fans_range == "100w-1000w") ~ 11,
   (fans_range == "100w-1000w" & other_fans_range == "100-1k") ~ 11,

   # 组合3:粉丝范围在 1k-1w
   (fans_range == "1k-1w" & other_fans_range == "1k-1w") ~ 12,
   (fans_range == "1k-1w" & other_fans_range == "1w-10w") ~ 13,
   (fans_range == "1w-10w" & other_fans_range == "1k-1w") ~ 13,
   (fans_range == "1k-1w" & other_fans_range == "10w-100w") ~ 14,
   (fans_range == "10w-100w" & other_fans_range == "1k-1w") ~ 14,
   (fans_range == "1k-1w" & other_fans_range == "100w-1000w") ~ 15,
   (fans_range == "100w-1000w" & other_fans_range == "1k-1w") ~ 15,

   # 组合4:粉丝范围在 1w-10w
   (fans_range == "1w-10w" & other_fans_range == "1w-10w") ~ 16,
   (fans_range == "1w-10w" & other_fans_range == "10w-100w") ~ 17,
   (fans_range == "10w-100w" & other_fans_range == "1w-10w") ~ 17,
   (fans_range == "1w-10w" & other_fans_range == "100w-1000w") ~ 18,
   (fans_range == "100w-1000w" & other_fans_range == "1w-10w") ~ 18,

   # 组合5:粉丝范围在 10w-100w
   (fans_range == "10w-100w" & other_fans_range == "10w-100w") ~ 19,
   (fans_range == "10w-100w" & other_fans_range == "100w-1000w") ~ 20,
   (fans_range == "100w-1000w" & other_fans_range == "10w-100w") ~ 20,

   # 组合6:粉丝范围在 100w-1000w
   (fans_range == "100w-1000w" & other_fans_range == "100w-1000w") ~ 21
))
library(ggplot2)

ggplot(data, aes(x = fans_pair)) +
  geom_histogram(binwidth = 1, fill = "blue", color = "black") +  # 增加颜色填充和边框# 设置横轴刻度
  labs(x = "fans_pair", y = "Frequency", title = "Distribution of fans_pair") +  # 设置轴标签和标题
  theme_minimal()  # 使用简单的主题
## Warning: Removed 52 rows containing non-finite outside the scale range
## (`stat_bin()`).

### (2) 计算pair lift

# 定义计算 lift 的函数
calculate_lift <- function(fans_range_1, fans_range_2, data) {
  # 计算 A 的数量(第一个 fans_range 的数量)
  A <- length(data[data$fans_range == fans_range_1, "pk_id"])
  
  # 计算 B 的数量(第二个 fans_range 的数量)
  B <- length(data[data$other_fans_range == fans_range_2, "pk_id"])
  
  # 计算 A 和 B 共同出现的数量
  A_B <- length(data[(data$fans_range == fans_range_1) & (data$other_fans_range == fans_range_2), "pk_id"])
  
  # 防止除以零的情况
  if (A == 0 | B == 0) {
    lift_A_B <- NA
  } else {
    # 计算 lift
    lift_A_B <- A_B / (A * B)
  }
  
  return(lift_A_B)
}

# 定义要遍历的类别
fans_ranges <- c(
  "0-100", "100-1k", "1k-1w", "1w-10w", "10w-100w", "100w-1000w"
)

# 初始化空的 result 数据框
lift_result_3 <- data.frame(
  Fans_Range_1 = character(),
  Fans_Range_2 = character(),
  Lift_A_B = numeric()
)

# 嵌套循环遍历 fans_range_1 和 fans_range_2 的所有组合
for (fans_range_1 in fans_ranges) {
  for (fans_range_2 in fans_ranges) {
    lift_A_B <- calculate_lift(fans_range_1, fans_range_2, data)
    
    # 将结果追加到 result 数据框中
    lift_result_3 <- rbind(lift_result_3, data.frame(
      Fans_Range_1 = fans_range_1,
      Fans_Range_2 = fans_range_2,
      Lift_A_B = lift_A_B
    ))
  }
}
# 查看结果
lift_result_3%>%arrange(desc(Lift_A_B))
##    Fans_Range_1 Fans_Range_2     Lift_A_B
## 1         0-100        0-100 2.016332e-04
## 2        100-1k       100-1k 1.255296e-04
## 3        100-1k        0-100 8.588288e-05
## 4    100w-1000w   100w-1000w 8.350908e-05
## 5         0-100       100-1k 7.404162e-05
## 6      10w-100w   100w-1000w 7.110644e-05
## 7    100w-1000w     10w-100w 6.956798e-05
## 8         0-100        1k-1w 5.558131e-05
## 9      10w-100w     10w-100w 5.249008e-05
## 10        1k-1w        0-100 5.182194e-05
## 11        1k-1w       100-1k 4.645438e-05
## 12       100-1k        1k-1w 4.617547e-05
## 13        1k-1w        1k-1w 3.890120e-05
## 14       1w-10w       1w-10w 2.752716e-05
## 15       1w-10w     10w-100w 2.189046e-05
## 16     10w-100w       1w-10w 2.158578e-05
## 17        1k-1w       1w-10w 1.350689e-05
## 18       1w-10w        1k-1w 1.341504e-05
## 19   100w-1000w       1w-10w 1.312023e-05
## 20       1w-10w   100w-1000w 1.310108e-05
## 21        1k-1w   100w-1000w 4.281831e-06
## 22   100w-1000w        1k-1w 3.382532e-06
## 23       100-1k       1w-10w 3.217030e-06
## 24        1k-1w     10w-100w 3.189793e-06
## 25     10w-100w        1k-1w 3.178036e-06
## 26       1w-10w       100-1k 2.854139e-06
## 27       1w-10w        0-100 2.063498e-06
## 28   100w-1000w       100-1k 7.134464e-07
## 29       100-1k   100w-1000w 7.096144e-07
## 30        0-100       1w-10w 4.976682e-07
## 31     10w-100w       100-1k 2.875342e-07
## 32       100-1k     10w-100w 2.064978e-07
## 33        0-100     10w-100w 0.000000e+00
## 34        0-100   100w-1000w 0.000000e+00
## 35     10w-100w        0-100 0.000000e+00
## 36   100w-1000w        0-100 0.000000e+00
# AB pair 和 BA pair 进行合并
# 对 Live_Operation_Tag_1 和 Live_Operation_Tag_2 进行排序,生成无序组合的唯一标识符
lift_result_3 <- lift_result_3 %>%
  rowwise() %>%
  mutate(sorted_pair = paste(sort(c(Fans_Range_1, Fans_Range_2)), collapse = "_")) %>%
  ungroup()

# 根据 sorted_pair 分组,并对 Lift_A_B 进行求和
df <- lift_result_3 %>%
  group_by(sorted_pair) %>%
  summarise(Lift_A_B_sum = sum(Lift_A_B))

df %>% arrange(desc(Lift_A_B_sum))
## # A tibble: 21 × 2
##    sorted_pair           Lift_A_B_sum
##    <chr>                        <dbl>
##  1 0-100_0-100              0.000202 
##  2 0-100_100-1k             0.000160 
##  3 100w-1000w_10w-100w      0.000141 
##  4 100-1k_100-1k            0.000126 
##  5 0-100_1k-1w              0.000107 
##  6 100-1k_1k-1w             0.0000926
##  7 100w-1000w_100w-1000w    0.0000835
##  8 10w-100w_10w-100w        0.0000525
##  9 10w-100w_1w-10w          0.0000435
## 10 1k-1w_1k-1w              0.0000389
## # ℹ 11 more rows