setwd("C:/Users/ASUS/Desktop/快手")
数据: period: 20230101 to 2024010 条件:选择前一年有超过20次invite PK的主播,在2023年的invite PK数据
data = read.csv("Within_Invited_PK.csv")
# 修改列名:使用sub()函数移除列名前面的"data."前缀
names <- sub("^a\\.", "", names(data))
names(data) <- names
# 有两个PK的author
data = data %>% group_by(pk_id)%>% filter(n()==2)
data= read.csv("Within_Invited_PK_clean.csv")
table(data$live_operation_tag)
##
## UNKNOWN 才艺技能展示 个人爱好兴趣分享 科普教学
## 1773 2608 233 1907
## 媒体直播 情感互动 日常生活展示 商品售卖及推荐
## 127 3620 4199 5544
## 团队表演 无主题 闲聊互动 颜值
## 20 1752 24745 993
## 游戏直播 运动健身
## 113 172
table(data$other_live_operation_tag)
##
## UNKNOWN 才艺技能展示 个人爱好兴趣分享 科普教学
## 1755 2580 225 1884
## 媒体直播 情感互动 日常生活展示 商品售卖及推荐
## 160 3639 4196 5575
## 团队表演 无主题 闲聊互动 颜值
## 24 1804 24717 961
## 游戏直播 运动健身
## 118 168
unique(data$live_operation_tag)
## [1] "商品售卖及推荐" "闲聊互动" "才艺技能展示" "颜值"
## [5] "日常生活展示" "无主题" "UNKNOWN" "情感互动"
## [9] "科普教学" "个人爱好兴趣分享" "媒体直播" "游戏直播"
## [13] "运动健身" "团队表演"
data <- data %>%
mutate(both_live_operation_tag_pair = case_when(
# 组合1
(live_operation_tag == "才艺技能展示" & other_live_operation_tag == "个人爱好兴趣分享") ~ 1,
(live_operation_tag == "个人爱好兴趣分享" & other_live_operation_tag == "才艺技能展示") ~ 1,
# 组合2
(live_operation_tag == "才艺技能展示" & other_live_operation_tag == "科普教学") ~ 2,
(live_operation_tag == "科普教学" & other_live_operation_tag == "才艺技能展示") ~ 2,
# 组合3
(live_operation_tag == "才艺技能展示" & other_live_operation_tag == "媒体直播") ~ 3,
(live_operation_tag == "媒体直播" & other_live_operation_tag == "才艺技能展示") ~ 3,
# 组合4
(live_operation_tag == "才艺技能展示" & other_live_operation_tag == "情感互动") ~ 4,
(live_operation_tag == "情感互动" & other_live_operation_tag == "才艺技能展示") ~ 4,
# 组合5
(live_operation_tag == "才艺技能展示" & other_live_operation_tag == "日常生活展示") ~ 5,
(live_operation_tag == "日常生活展示" & other_live_operation_tag == "才艺技能展示") ~ 5,
# 组合6
(live_operation_tag == "才艺技能展示" & other_live_operation_tag == "商品售卖及推荐") ~ 6,
(live_operation_tag == "商品售卖及推荐" & other_live_operation_tag == "才艺技能展示") ~ 6,
# 组合7
(live_operation_tag == "才艺技能展示" & other_live_operation_tag == "团队表演") ~ 7,
(live_operation_tag == "团队表演" & other_live_operation_tag == "才艺技能展示") ~ 7,
# 组合8
(live_operation_tag == "才艺技能展示" & other_live_operation_tag == "闲聊互动") ~ 8,
(live_operation_tag == "闲聊互动" & other_live_operation_tag == "才艺技能展示") ~ 8,
# 组合9
(live_operation_tag == "才艺技能展示" & other_live_operation_tag == "颜值") ~ 9,
(live_operation_tag == "颜值" & other_live_operation_tag == "才艺技能展示") ~ 9,
# 组合10
(live_operation_tag == "才艺技能展示" & other_live_operation_tag == "游戏直播") ~ 10,
(live_operation_tag == "游戏直播" & other_live_operation_tag == "才艺技能展示") ~ 10,
# 组合11
(live_operation_tag == "才艺技能展示" & other_live_operation_tag == "运动健身") ~ 11,
(live_operation_tag == "运动健身" & other_live_operation_tag == "才艺技能展示") ~ 11,
# 组合12
(live_operation_tag == "个人爱好兴趣分享" & other_live_operation_tag == "科普教学") ~ 12,
(live_operation_tag == "科普教学" & other_live_operation_tag == "个人爱好兴趣分享") ~ 12,
# 组合13
(live_operation_tag == "个人爱好兴趣分享" & other_live_operation_tag == "媒体直播") ~ 13,
(live_operation_tag == "媒体直播" & other_live_operation_tag == "个人爱好兴趣分享") ~ 13,
# 组合14
(live_operation_tag == "个人爱好兴趣分享" & other_live_operation_tag == "情感互动") ~ 14,
(live_operation_tag == "情感互动" & other_live_operation_tag == "个人爱好兴趣分享") ~ 14,
# 组合15
(live_operation_tag == "个人爱好兴趣分享" & other_live_operation_tag == "日常生活展示") ~ 15,
(live_operation_tag == "日常生活展示" & other_live_operation_tag == "个人爱好兴趣分享") ~ 15,
# 组合16
(live_operation_tag == "个人爱好兴趣分享" & other_live_operation_tag == "商品售卖及推荐") ~ 16,
(live_operation_tag == "商品售卖及推荐" & other_live_operation_tag == "个人爱好兴趣分享") ~ 16,
# 组合17
(live_operation_tag == "个人爱好兴趣分享" & other_live_operation_tag == "团队表演") ~ 17,
(live_operation_tag == "团队表演" & other_live_operation_tag == "个人爱好兴趣分享") ~ 17,
# 组合18
(live_operation_tag == "个人爱好兴趣分享" & other_live_operation_tag == "闲聊互动") ~ 18,
(live_operation_tag == "闲聊互动" & other_live_operation_tag == "个人爱好兴趣分享") ~ 18,
# 组合19
(live_operation_tag == "个人爱好兴趣分享" & other_live_operation_tag == "颜值") ~ 19,
(live_operation_tag == "颜值" & other_live_operation_tag == "个人爱好兴趣分享") ~ 19,
# 组合20
(live_operation_tag == "个人爱好兴趣分享" & other_live_operation_tag == "游戏直播") ~ 20,
(live_operation_tag == "游戏直播" & other_live_operation_tag == "个人爱好兴趣分享") ~ 20,
# 组合21
(live_operation_tag == "个人爱好兴趣分享" & other_live_operation_tag == "运动健身") ~ 21,
(live_operation_tag == "运动健身" & other_live_operation_tag == "个人爱好兴趣分享") ~ 21,
# 组合22
(live_operation_tag == "科普教学" & other_live_operation_tag == "媒体直播") ~ 22,
(live_operation_tag == "媒体直播" & other_live_operation_tag == "科普教学") ~ 22,
# 组合23
(live_operation_tag == "科普教学" & other_live_operation_tag == "情感互动") ~ 23,
(live_operation_tag == "情感互动" & other_live_operation_tag == "科普教学") ~ 23,
# 组合24
(live_operation_tag == "科普教学" & other_live_operation_tag == "日常生活展示") ~ 24,
(live_operation_tag == "日常生活展示" & other_live_operation_tag == "科普教学") ~ 24,
# 组合25
(live_operation_tag == "科普教学" & other_live_operation_tag == "商品售卖及推荐") ~ 25,
(live_operation_tag == "商品售卖及推荐" & other_live_operation_tag == "科普教学") ~ 25,
# 组合26
(live_operation_tag == "科普教学" & other_live_operation_tag == "团队表演") ~ 26,
(live_operation_tag == "团队表演" & other_live_operation_tag == "科普教学") ~ 26,
# 组合27
(live_operation_tag == "科普教学" & other_live_operation_tag == "闲聊互动") ~ 27,
(live_operation_tag == "闲聊互动" & other_live_operation_tag == "科普教学") ~ 27,
# 组合28
(live_operation_tag == "科普教学" & other_live_operation_tag == "颜值") ~ 28,
(live_operation_tag == "颜值" & other_live_operation_tag == "科普教学") ~ 28,
# 组合29
(live_operation_tag == "科普教学" & other_live_operation_tag == "游戏直播") ~ 29,
(live_operation_tag == "游戏直播" & other_live_operation_tag == "科普教学") ~ 29,
# 组合30
(live_operation_tag == "科普教学" & other_live_operation_tag == "运动健身") ~ 30,
(live_operation_tag == "运动健身" & other_live_operation_tag == "科普教学") ~ 30,
# 组合31
(live_operation_tag == "媒体直播" & other_live_operation_tag == "情感互动") ~ 31,
(live_operation_tag == "情感互动" & other_live_operation_tag == "媒体直播") ~ 31,
# 组合32
(live_operation_tag == "媒体直播" & other_live_operation_tag == "日常生活展示") ~ 32,
(live_operation_tag == "日常生活展示" & other_live_operation_tag == "媒体直播") ~ 32,
# 组合33
(live_operation_tag == "媒体直播" & other_live_operation_tag == "商品售卖及推荐") ~ 33,
(live_operation_tag == "商品售卖及推荐" & other_live_operation_tag == "媒体直播") ~ 33,
# 组合34
(live_operation_tag == "媒体直播" & other_live_operation_tag == "团队表演") ~ 34,
(live_operation_tag == "团队表演" & other_live_operation_tag == "媒体直播") ~ 34,
# 组合35
(live_operation_tag == "媒体直播" & other_live_operation_tag == "闲聊互动") ~ 35,
(live_operation_tag == "闲聊互动" & other_live_operation_tag == "媒体直播") ~ 35,
# 组合36
(live_operation_tag == "媒体直播" & other_live_operation_tag == "颜值") ~ 36,
(live_operation_tag == "颜值" & other_live_operation_tag == "媒体直播") ~ 36,
# 组合37
(live_operation_tag == "媒体直播" & other_live_operation_tag == "游戏直播") ~ 37,
(live_operation_tag == "游戏直播" & other_live_operation_tag == "媒体直播") ~ 37,
# 组合38
(live_operation_tag == "媒体直播" & other_live_operation_tag == "运动健身") ~ 38,
(live_operation_tag == "运动健身" & other_live_operation_tag == "媒体直播") ~ 38,
# 组合39
(live_operation_tag == "情感互动" & other_live_operation_tag == "日常生活展示") ~ 39,
(live_operation_tag == "日常生活展示" & other_live_operation_tag == "情感互动") ~ 39,
# 组合40
(live_operation_tag == "情感互动" & other_live_operation_tag == "商品售卖及推荐") ~ 40,
(live_operation_tag == "商品售卖及推荐" & other_live_operation_tag == "情感互动") ~ 40,
# 组合41
(live_operation_tag == "情感互动" & other_live_operation_tag == "团队表演") ~ 41,
(live_operation_tag == "团队表演" & other_live_operation_tag == "情感互动") ~ 41,
# 组合42
(live_operation_tag == "情感互动" & other_live_operation_tag == "闲聊互动") ~ 42,
(live_operation_tag == "闲聊互动" & other_live_operation_tag == "情感互动") ~ 42,
# 组合43
(live_operation_tag == "情感互动" & other_live_operation_tag == "颜值") ~ 43,
(live_operation_tag == "颜值" & other_live_operation_tag == "情感互动") ~ 43,
# 组合44
(live_operation_tag == "情感互动" & other_live_operation_tag == "游戏直播") ~ 44,
(live_operation_tag == "游戏直播" & other_live_operation_tag == "情感互动") ~ 44,
# 组合45
(live_operation_tag == "情感互动" & other_live_operation_tag == "运动健身") ~ 45,
(live_operation_tag == "运动健身" & other_live_operation_tag == "情感互动") ~ 45,
# 组合46
(live_operation_tag == "日常生活展示" & other_live_operation_tag == "商品售卖及推荐") ~ 46,
(live_operation_tag == "商品售卖及推荐" & other_live_operation_tag == "日常生活展示") ~ 46,
# 组合47
(live_operation_tag == "日常生活展示" & other_live_operation_tag == "团队表演") ~ 47,
(live_operation_tag == "团队表演" & other_live_operation_tag == "日常生活展示") ~ 47,
# 组合48
(live_operation_tag == "日常生活展示" & other_live_operation_tag == "闲聊互动") ~ 48,
(live_operation_tag == "闲聊互动" & other_live_operation_tag == "日常生活展示") ~ 48,
# 组合49
(live_operation_tag == "日常生活展示" & other_live_operation_tag == "颜值") ~ 49,
(live_operation_tag == "颜值" & other_live_operation_tag == "日常生活展示") ~ 49,
# 组合50
(live_operation_tag == "日常生活展示" & other_live_operation_tag == "游戏直播") ~ 50,
(live_operation_tag == "游戏直播" & other_live_operation_tag == "日常生活展示") ~ 50,
# 组合51
(live_operation_tag == "日常生活展示" & other_live_operation_tag == "运动健身") ~ 51,
(live_operation_tag == "运动健身" & other_live_operation_tag == "日常生活展示") ~ 51,
# 组合52
(live_operation_tag == "商品售卖及推荐" & other_live_operation_tag == "团队表演") ~ 52,
(live_operation_tag == "团队表演" & other_live_operation_tag == "商品售卖及推荐") ~ 52,
# 组合53
(live_operation_tag == "商品售卖及推荐" & other_live_operation_tag == "闲聊互动") ~ 53,
(live_operation_tag == "闲聊互动" & other_live_operation_tag == "商品售卖及推荐") ~ 53,
# 组合54
(live_operation_tag == "商品售卖及推荐" & other_live_operation_tag == "颜值") ~ 54,
(live_operation_tag == "颜值" & other_live_operation_tag == "商品售卖及推荐") ~ 54,
# 组合55
(live_operation_tag == "商品售卖及推荐" & other_live_operation_tag == "游戏直播") ~ 55,
(live_operation_tag == "游戏直播" & other_live_operation_tag == "商品售卖及推荐") ~ 55,
# 组合56
(live_operation_tag == "商品售卖及推荐" & other_live_operation_tag == "运动健身") ~ 56,
(live_operation_tag == "运动健身" & other_live_operation_tag == "商品售卖及推荐") ~ 56,
# 组合57
(live_operation_tag == "团队表演" & other_live_operation_tag == "闲聊互动") ~ 57,
(live_operation_tag == "闲聊互动" & other_live_operation_tag == "团队表演") ~ 57,
# 组合58
(live_operation_tag == "团队表演" & other_live_operation_tag == "颜值") ~ 58,
(live_operation_tag == "颜值" & other_live_operation_tag == "团队表演") ~ 58,
# 组合59
(live_operation_tag == "团队表演" & other_live_operation_tag == "游戏直播") ~ 59,
(live_operation_tag == "游戏直播" & other_live_operation_tag == "团队表演") ~ 59,
# 组合60
(live_operation_tag == "团队表演" & other_live_operation_tag == "运动健身") ~ 60,
(live_operation_tag == "运动健身" & other_live_operation_tag == "团队表演") ~ 60,
# 组合61
(live_operation_tag == "闲聊互动" & other_live_operation_tag == "颜值") ~ 61,
(live_operation_tag == "颜值" & other_live_operation_tag == "闲聊互动") ~ 61,
# 组合62
(live_operation_tag == "闲聊互动" & other_live_operation_tag == "游戏直播") ~ 62,
(live_operation_tag == "游戏直播" & other_live_operation_tag == "闲聊互动") ~ 62,
# 组合63
(live_operation_tag == "闲聊互动" & other_live_operation_tag == "运动健身") ~ 63,
(live_operation_tag == "运动健身" & other_live_operation_tag == "闲聊互动") ~ 63,
# 组合64
(live_operation_tag == "颜值" & other_live_operation_tag == "游戏直播") ~ 64,
(live_operation_tag == "游戏直播" & other_live_operation_tag == "颜值") ~ 64,
# 组合65
(live_operation_tag == "颜值" & other_live_operation_tag == "运动健身") ~ 65,
(live_operation_tag == "运动健身" & other_live_operation_tag == "颜值") ~ 65,
# 组合66
(live_operation_tag == "游戏直播" & other_live_operation_tag == "运动健身") ~ 66,
(live_operation_tag == "运动健身" & other_live_operation_tag == "游戏直播") ~ 66,
TRUE ~ 0 # 其他未指定的组合默认值为0
))
library(ggplot2)
ggplot(data, aes(x = both_live_operation_tag_pair)) +
geom_histogram(binwidth = 1, fill = "blue", color = "black") + # 增加颜色填充和边框
scale_x_continuous(breaks = seq(min(data$both_live_operation_tag_pair), max(data$both_live_operation_tag_pair), by = 3)) + # 设置横轴刻度
labs(x = "Live Operation Tag Pair", y = "Frequency", title = "Distribution of Live Operation Tag Pairs") + # 设置轴标签和标题
theme_minimal() # 使用简单的主题
table(data$both_live_operation_tag_pair)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12
## 34225 7 16 1 75 242 96 2 1800 37 14 23 25
## 13 14 15 16 18 22 23 24 25 27 28 30 31
## 8 36 24 22 187 8 68 206 764 491 130 35 7
## 32 33 35 39 40 42 43 44 45 46 47 48 49
## 37 69 130 174 701 1385 8 2 1 655 6 3355 20
## 50 51 52 53 54 55 56 57 58 61 62 63 64
## 13 12 16 1598 41 15 20 16 1 795 146 39 2
# 计算 both_live_operation_tag_pair 的频率并按照频率从高到低排序
frequency_table <- data %>%
count(both_live_operation_tag_pair) %>% # 计算频率
arrange(desc(n)) # 按频率从高到低排序
# 显示结果
print(frequency_table)
## both_live_operation_tag_pair n
## 1 0 34225
## 2 48 3355
## 3 8 1800
## 4 53 1598
## 5 42 1385
## 6 61 795
## 7 25 764
## 8 40 701
## 9 46 655
## 10 27 491
## 11 5 242
## 12 24 206
## 13 18 187
## 14 39 174
## 15 62 146
## 16 28 130
## 17 35 130
## 18 6 96
## 19 4 75
## 20 33 69
## 21 23 68
## 22 54 41
## 23 63 39
## 24 9 37
## 25 32 37
## 26 14 36
## 27 30 35
## 28 12 25
## 29 15 24
## 30 11 23
## 31 16 22
## 32 49 20
## 33 56 20
## 34 2 16
## 35 52 16
## 36 57 16
## 37 55 15
## 38 10 14
## 39 50 13
## 40 51 12
## 41 13 8
## 42 22 8
## 43 43 8
## 44 1 7
## 45 31 7
## 46 47 6
## 47 7 2
## 48 44 2
## 49 64 2
## 50 3 1
## 51 45 1
## 52 58 1
# 定义计算 lift 的函数
calculate_lift <- function(live_operation_tag_1, live_operation_tag_2, data) {
# 计算A的数量(第一个类别的数量)
A <- length(data[data$live_operation_tag == live_operation_tag_1, "pk_id"])
# 计算B的数量(第二个类别的数量)
B <- length(data[data$other_live_operation_tag == live_operation_tag_2, "pk_id"])
# 计算A和B共同出现的数量
A_B <- length(data[(data$live_operation_tag == live_operation_tag_1) & (data$other_live_operation_tag == live_operation_tag_2), "pk_id"])
# 防止除以零的情况
if (A == 0 | B == 0) {
lift_A_B <- NA
} else {
# 计算lift
lift_A_B <- A_B / (A * B)
}
return(lift_A_B)
}
# 定义要遍历的类别
live_operation_tags <- c(
"商品售卖及推荐", "闲聊互动", "才艺技能展示", "颜值", "日常生活展示", "情感互动", "科普教学", "个人爱好兴趣分享", "游戏直播", "媒体直播",
"运动健身", "团队表演"
)
# 初始化空的 result 数据框
lift_result_2 <- data.frame(
Live_Operation_Tag_1 = character(),
Live_Operation_Tag_2 = character(),
Lift_A_B = numeric()
)
# 嵌套循环遍历 live_operation_tag_1 和 live_operation_tag_2 的所有组合
for (live_operation_tag_1 in live_operation_tags) {
for (live_operation_tag_2 in live_operation_tags) {
lift_A_B <- calculate_lift(live_operation_tag_1, live_operation_tag_2, data)
# 将结果追加到 result 数据框中
lift_result_2 <- rbind(lift_result_2, data.frame(
Live_Operation_Tag_1 = live_operation_tag_1,
Live_Operation_Tag_2 = live_operation_tag_2,
Lift_A_B = lift_A_B
))
}
}
# 查看结果
lift_result_2 %>% arrange(desc(Lift_A_B))
## Live_Operation_Tag_1 Live_Operation_Tag_2 Lift_A_B
## 1 运动健身 运动健身 3.356866e-03
## 2 个人爱好兴趣分享 个人爱好兴趣分享 1.068193e-03
## 3 游戏直播 游戏直播 5.999700e-04
## 4 颜值 颜值 4.464131e-04
## 5 科普教学 科普教学 2.624703e-04
## 6 媒体直播 媒体直播 2.460630e-04
## 7 媒体直播 个人爱好兴趣分享 2.099738e-04
## 8 才艺技能展示 才艺技能展示 1.880023e-04
## 9 情感互动 情感互动 1.667023e-04
## 10 商品售卖及推荐 商品售卖及推荐 1.089045e-04
## 11 日常生活展示 日常生活展示 9.001644e-05
## 12 商品售卖及推荐 团队表演 7.515633e-05
## 13 运动健身 科普教学 5.554733e-05
## 14 团队表演 商品售卖及推荐 5.381166e-05
## 15 个人爱好兴趣分享 媒体直播 5.364807e-05
## 16 科普教学 运动健身 5.306265e-05
## 17 商品售卖及推荐 媒体直播 4.960317e-05
## 18 颜值 团队表演 4.196039e-05
## 19 科普教学 颜值 3.874220e-05
## 20 商品售卖及推荐 科普教学 3.753029e-05
## 21 团队表演 日常生活展示 3.574833e-05
## 22 媒体直播 商品售卖及推荐 3.530949e-05
## 23 科普教学 商品售卖及推荐 3.499028e-05
## 24 才艺技能展示 团队表演 3.195297e-05
## 25 媒体直播 日常生活展示 3.190140e-05
## 26 颜值 科普教学 3.153711e-05
## 27 日常生活展示 媒体直播 2.976899e-05
## 28 日常生活展示 团队表演 2.976899e-05
## 29 个人爱好兴趣分享 科普教学 2.961465e-05
## 30 闲聊互动 闲聊互动 2.960324e-05
## 31 才艺技能展示 游戏直播 2.924509e-05
## 32 科普教学 个人爱好兴趣分享 2.796714e-05
## 33 才艺技能展示 运动健身 2.738826e-05
## 34 游戏直播 闲聊互动 2.613657e-05
## 35 闲聊互动 游戏直播 2.500077e-05
## 36 运动健身 才艺技能展示 2.478817e-05
## 37 情感互动 个人爱好兴趣分享 2.332719e-05
## 38 日常生活展示 游戏直播 2.018237e-05
## 39 个人爱好兴趣分享 情感互动 2.004984e-05
## 40 媒体直播 闲聊互动 1.879544e-05
## 41 闲聊互动 媒体直播 1.793292e-05
## 42 商品售卖及推荐 情感互动 1.779464e-05
## 43 游戏直播 才艺技能展示 1.715031e-05
## 44 颜值 闲聊互动 1.711214e-05
## 45 颜值 游戏直播 1.706863e-05
## 46 情感互动 商品售卖及推荐 1.694621e-05
## 47 闲聊互动 个人爱好兴趣分享 1.688332e-05
## 48 媒体直播 科普教学 1.671766e-05
## 49 日常生活展示 闲聊互动 1.625449e-05
## 50 团队表演 闲聊互动 1.618319e-05
## 51 个人爱好兴趣分享 闲聊互动 1.614847e-05
## 52 闲聊互动 日常生活展示 1.606472e-05
## 53 闲聊互动 颜值 1.576959e-05
## 54 科普教学 日常生活展示 1.487171e-05
## 55 才艺技能展示 闲聊互动 1.420994e-05
## 56 商品售卖及推荐 日常生活展示 1.405688e-05
## 57 日常生活展示 商品售卖及推荐 1.401145e-05
## 58 个人爱好兴趣分享 商品售卖及推荐 1.385708e-05
## 59 闲聊互动 才艺技能展示 1.384666e-05
## 60 闲聊互动 团队表演 1.347073e-05
## 61 科普教学 媒体直播 1.310960e-05
## 62 日常生活展示 个人爱好兴趣分享 1.270144e-05
## 63 游戏直播 商品售卖及推荐 1.269892e-05
## 64 个人爱好兴趣分享 日常生活展示 1.227411e-05
## 65 日常生活展示 才艺技能展示 1.209221e-05
## 66 商品售卖及推荐 运动健身 1.181028e-05
## 67 日常生活展示 科普教学 1.099746e-05
## 68 商品售卖及推荐 游戏直播 1.070022e-05
## 69 才艺技能展示 日常生活展示 1.014331e-05
## 70 日常生活展示 运动健身 9.922998e-06
## 71 运动健身 商品售卖及推荐 9.385755e-06
## 72 颜值 才艺技能展示 8.977572e-06
## 73 情感互动 媒体直播 8.632597e-06
## 74 情感互动 闲聊互动 7.812191e-06
## 75 闲聊互动 情感互动 7.618239e-06
## 76 运动健身 日常生活展示 6.927971e-06
## 77 个人爱好兴趣分享 才艺技能展示 6.654024e-06
## 78 游戏直播 日常生活展示 6.327138e-06
## 79 闲聊互动 商品售卖及推荐 5.987530e-06
## 80 情感互动 日常生活展示 5.727633e-06
## 81 日常生活展示 情感互动 5.693657e-06
## 82 商品售卖及推荐 闲聊互动 5.633760e-06
## 83 才艺技能展示 颜值 5.585950e-06
## 84 闲聊互动 科普教学 5.362554e-06
## 85 科普教学 情感互动 5.331740e-06
## 86 科普教学 闲聊互动 5.112939e-06
## 87 才艺技能展示 个人爱好兴趣分享 5.112474e-06
## 88 运动健身 闲聊互动 4.939638e-06
## 89 情感互动 科普教学 4.545401e-06
## 90 闲聊互动 运动健身 4.329879e-06
## 91 媒体直播 情感互动 4.327571e-06
## 92 才艺技能展示 情感互动 4.109367e-06
## 93 商品售卖及推荐 颜值 3.941601e-06
## 94 情感互动 才艺技能展示 3.854555e-06
## 95 才艺技能展示 商品售卖及推荐 3.782773e-06
## 96 颜值 商品售卖及推荐 3.612733e-06
## 97 日常生活展示 颜值 3.221618e-06
## 98 商品售卖及推荐 个人爱好兴趣分享 3.206670e-06
## 99 商品售卖及推荐 才艺技能展示 2.866427e-06
## 100 游戏直播 情感互动 2.431865e-06
## 101 才艺技能展示 媒体直播 2.396472e-06
## 102 情感互动 游戏直播 2.341043e-06
## 103 颜值 日常生活展示 1.680016e-06
## 104 情感互动 运动健身 1.644304e-06
## 105 才艺技能展示 科普教学 1.628177e-06
## 106 科普教学 才艺技能展示 1.625996e-06
## 107 情感互动 颜值 1.437269e-06
## 108 颜值 情感互动 8.302138e-07
## 109 颜值 个人爱好兴趣分享 0.000000e+00
## 110 颜值 媒体直播 0.000000e+00
## 111 颜值 运动健身 0.000000e+00
## 112 情感互动 团队表演 0.000000e+00
## 113 科普教学 游戏直播 0.000000e+00
## 114 科普教学 团队表演 0.000000e+00
## 115 个人爱好兴趣分享 颜值 0.000000e+00
## 116 个人爱好兴趣分享 游戏直播 0.000000e+00
## 117 个人爱好兴趣分享 运动健身 0.000000e+00
## 118 个人爱好兴趣分享 团队表演 0.000000e+00
## 119 游戏直播 颜值 0.000000e+00
## 120 游戏直播 科普教学 0.000000e+00
## 121 游戏直播 个人爱好兴趣分享 0.000000e+00
## 122 游戏直播 媒体直播 0.000000e+00
## 123 游戏直播 运动健身 0.000000e+00
## 124 游戏直播 团队表演 0.000000e+00
## 125 媒体直播 才艺技能展示 0.000000e+00
## 126 媒体直播 颜值 0.000000e+00
## 127 媒体直播 游戏直播 0.000000e+00
## 128 媒体直播 运动健身 0.000000e+00
## 129 媒体直播 团队表演 0.000000e+00
## 130 运动健身 颜值 0.000000e+00
## 131 运动健身 情感互动 0.000000e+00
## 132 运动健身 个人爱好兴趣分享 0.000000e+00
## 133 运动健身 游戏直播 0.000000e+00
## 134 运动健身 媒体直播 0.000000e+00
## 135 运动健身 团队表演 0.000000e+00
## 136 团队表演 才艺技能展示 0.000000e+00
## 137 团队表演 颜值 0.000000e+00
## 138 团队表演 情感互动 0.000000e+00
## 139 团队表演 科普教学 0.000000e+00
## 140 团队表演 个人爱好兴趣分享 0.000000e+00
## 141 团队表演 游戏直播 0.000000e+00
## 142 团队表演 媒体直播 0.000000e+00
## 143 团队表演 运动健身 0.000000e+00
## 144 团队表演 团队表演 0.000000e+00
# AB pair 和 BA pair 进行合并
# 对 Live_Operation_Tag_1 和 Live_Operation_Tag_2 进行排序,生成无序组合的唯一标识符
lift_result_2 <- lift_result_2 %>%
rowwise() %>%
mutate(sorted_pair = paste(sort(c(Live_Operation_Tag_1, Live_Operation_Tag_2)), collapse = "_")) %>%
ungroup()
# 根据 sorted_pair 分组,并对 Lift_A_B 进行求和
df <- lift_result_2 %>%
group_by(sorted_pair) %>%
summarise(Lift_A_B_sum = sum(Lift_A_B))
df %>% arrange(desc(Lift_A_B_sum))
## # A tibble: 78 × 2
## sorted_pair Lift_A_B_sum
## <chr> <dbl>
## 1 运动健身_运动健身 0.00336
## 2 个人爱好兴趣分享_个人爱好兴趣分享 0.00107
## 3 游戏直播_游戏直播 0.000600
## 4 颜值_颜值 0.000446
## 5 个人爱好兴趣分享_媒体直播 0.000264
## 6 科普教学_科普教学 0.000262
## 7 媒体直播_媒体直播 0.000246
## 8 才艺技能展示_才艺技能展示 0.000188
## 9 情感互动_情感互动 0.000167
## 10 商品售卖及推荐_团队表演 0.000129
## # ℹ 68 more rows
table(data$fans_range)
##
## 0-100 100-1k 1000w+ 100w-1000w 10w-100w 1k-1w 1w-10w
## 91 1816 23 787 7811 15048 22230
table(data$other_fans_range)
##
## 0-100 100-1k 1000w+ 100w-1000w 10w-100w 1k-1w 1w-10w
## 109 1781 33 776 8000 15026 22081
data = data %>% mutate(fans_pair = case_when(
# 组合1:粉丝范围在 0-100
(fans_range == "0-100" & other_fans_range == "0-100") ~ 1,
(fans_range == "0-100" & other_fans_range == "100-1k") ~ 2,
(fans_range == "100-1k" & other_fans_range == "0-100") ~ 2,
(fans_range == "0-100" & other_fans_range == "1k-1w") ~ 3,
(fans_range == "1k-1w" & other_fans_range == "0-100") ~ 3,
(fans_range == "0-100" & other_fans_range == "1w-10w") ~ 4,
(fans_range == "1w-10w" & other_fans_range == "0-100") ~ 4,
(fans_range == "0-100" & other_fans_range == "10w-100w") ~ 5,
(fans_range == "10w-100w" & other_fans_range == "0-100") ~ 5,
(fans_range == "0-100" & other_fans_range == "100w-1000w") ~ 6,
(fans_range == "100w-1000w" & other_fans_range == "0-100") ~ 6,
# 组合2:粉丝范围在 100-1k
(fans_range == "100-1k" & other_fans_range == "100-1k") ~ 7,
(fans_range == "100-1k" & other_fans_range == "1k-1w") ~ 8,
(fans_range == "1k-1w" & other_fans_range == "100-1k") ~ 8,
(fans_range == "100-1k" & other_fans_range == "1w-10w") ~ 9,
(fans_range == "1w-10w" & other_fans_range == "100-1k") ~ 9,
(fans_range == "100-1k" & other_fans_range == "10w-100w") ~ 10,
(fans_range == "10w-100w" & other_fans_range == "100-1k") ~ 10,
(fans_range == "100-1k" & other_fans_range == "100w-1000w") ~ 11,
(fans_range == "100w-1000w" & other_fans_range == "100-1k") ~ 11,
# 组合3:粉丝范围在 1k-1w
(fans_range == "1k-1w" & other_fans_range == "1k-1w") ~ 12,
(fans_range == "1k-1w" & other_fans_range == "1w-10w") ~ 13,
(fans_range == "1w-10w" & other_fans_range == "1k-1w") ~ 13,
(fans_range == "1k-1w" & other_fans_range == "10w-100w") ~ 14,
(fans_range == "10w-100w" & other_fans_range == "1k-1w") ~ 14,
(fans_range == "1k-1w" & other_fans_range == "100w-1000w") ~ 15,
(fans_range == "100w-1000w" & other_fans_range == "1k-1w") ~ 15,
# 组合4:粉丝范围在 1w-10w
(fans_range == "1w-10w" & other_fans_range == "1w-10w") ~ 16,
(fans_range == "1w-10w" & other_fans_range == "10w-100w") ~ 17,
(fans_range == "10w-100w" & other_fans_range == "1w-10w") ~ 17,
(fans_range == "1w-10w" & other_fans_range == "100w-1000w") ~ 18,
(fans_range == "100w-1000w" & other_fans_range == "1w-10w") ~ 18,
# 组合5:粉丝范围在 10w-100w
(fans_range == "10w-100w" & other_fans_range == "10w-100w") ~ 19,
(fans_range == "10w-100w" & other_fans_range == "100w-1000w") ~ 20,
(fans_range == "100w-1000w" & other_fans_range == "10w-100w") ~ 20,
# 组合6:粉丝范围在 100w-1000w
(fans_range == "100w-1000w" & other_fans_range == "100w-1000w") ~ 21
))
library(ggplot2)
ggplot(data, aes(x = fans_pair)) +
geom_histogram(binwidth = 1, fill = "blue", color = "black") + # 增加颜色填充和边框# 设置横轴刻度
labs(x = "fans_pair", y = "Frequency", title = "Distribution of fans_pair") + # 设置轴标签和标题
theme_minimal() # 使用简单的主题
## Warning: Removed 52 rows containing non-finite outside the scale range
## (`stat_bin()`).
### (2) 计算pair lift
# 定义计算 lift 的函数
calculate_lift <- function(fans_range_1, fans_range_2, data) {
# 计算 A 的数量(第一个 fans_range 的数量)
A <- length(data[data$fans_range == fans_range_1, "pk_id"])
# 计算 B 的数量(第二个 fans_range 的数量)
B <- length(data[data$other_fans_range == fans_range_2, "pk_id"])
# 计算 A 和 B 共同出现的数量
A_B <- length(data[(data$fans_range == fans_range_1) & (data$other_fans_range == fans_range_2), "pk_id"])
# 防止除以零的情况
if (A == 0 | B == 0) {
lift_A_B <- NA
} else {
# 计算 lift
lift_A_B <- A_B / (A * B)
}
return(lift_A_B)
}
# 定义要遍历的类别
fans_ranges <- c(
"0-100", "100-1k", "1k-1w", "1w-10w", "10w-100w", "100w-1000w"
)
# 初始化空的 result 数据框
lift_result_3 <- data.frame(
Fans_Range_1 = character(),
Fans_Range_2 = character(),
Lift_A_B = numeric()
)
# 嵌套循环遍历 fans_range_1 和 fans_range_2 的所有组合
for (fans_range_1 in fans_ranges) {
for (fans_range_2 in fans_ranges) {
lift_A_B <- calculate_lift(fans_range_1, fans_range_2, data)
# 将结果追加到 result 数据框中
lift_result_3 <- rbind(lift_result_3, data.frame(
Fans_Range_1 = fans_range_1,
Fans_Range_2 = fans_range_2,
Lift_A_B = lift_A_B
))
}
}
# 查看结果
lift_result_3%>%arrange(desc(Lift_A_B))
## Fans_Range_1 Fans_Range_2 Lift_A_B
## 1 0-100 0-100 2.016332e-04
## 2 100-1k 100-1k 1.255296e-04
## 3 100-1k 0-100 8.588288e-05
## 4 100w-1000w 100w-1000w 8.350908e-05
## 5 0-100 100-1k 7.404162e-05
## 6 10w-100w 100w-1000w 7.110644e-05
## 7 100w-1000w 10w-100w 6.956798e-05
## 8 0-100 1k-1w 5.558131e-05
## 9 10w-100w 10w-100w 5.249008e-05
## 10 1k-1w 0-100 5.182194e-05
## 11 1k-1w 100-1k 4.645438e-05
## 12 100-1k 1k-1w 4.617547e-05
## 13 1k-1w 1k-1w 3.890120e-05
## 14 1w-10w 1w-10w 2.752716e-05
## 15 1w-10w 10w-100w 2.189046e-05
## 16 10w-100w 1w-10w 2.158578e-05
## 17 1k-1w 1w-10w 1.350689e-05
## 18 1w-10w 1k-1w 1.341504e-05
## 19 100w-1000w 1w-10w 1.312023e-05
## 20 1w-10w 100w-1000w 1.310108e-05
## 21 1k-1w 100w-1000w 4.281831e-06
## 22 100w-1000w 1k-1w 3.382532e-06
## 23 100-1k 1w-10w 3.217030e-06
## 24 1k-1w 10w-100w 3.189793e-06
## 25 10w-100w 1k-1w 3.178036e-06
## 26 1w-10w 100-1k 2.854139e-06
## 27 1w-10w 0-100 2.063498e-06
## 28 100w-1000w 100-1k 7.134464e-07
## 29 100-1k 100w-1000w 7.096144e-07
## 30 0-100 1w-10w 4.976682e-07
## 31 10w-100w 100-1k 2.875342e-07
## 32 100-1k 10w-100w 2.064978e-07
## 33 0-100 10w-100w 0.000000e+00
## 34 0-100 100w-1000w 0.000000e+00
## 35 10w-100w 0-100 0.000000e+00
## 36 100w-1000w 0-100 0.000000e+00
# AB pair 和 BA pair 进行合并
# 对 Live_Operation_Tag_1 和 Live_Operation_Tag_2 进行排序,生成无序组合的唯一标识符
lift_result_3 <- lift_result_3 %>%
rowwise() %>%
mutate(sorted_pair = paste(sort(c(Fans_Range_1, Fans_Range_2)), collapse = "_")) %>%
ungroup()
# 根据 sorted_pair 分组,并对 Lift_A_B 进行求和
df <- lift_result_3 %>%
group_by(sorted_pair) %>%
summarise(Lift_A_B_sum = sum(Lift_A_B))
df %>% arrange(desc(Lift_A_B_sum))
## # A tibble: 21 × 2
## sorted_pair Lift_A_B_sum
## <chr> <dbl>
## 1 0-100_0-100 0.000202
## 2 0-100_100-1k 0.000160
## 3 100w-1000w_10w-100w 0.000141
## 4 100-1k_100-1k 0.000126
## 5 0-100_1k-1w 0.000107
## 6 100-1k_1k-1w 0.0000926
## 7 100w-1000w_100w-1000w 0.0000835
## 8 10w-100w_10w-100w 0.0000525
## 9 10w-100w_1w-10w 0.0000435
## 10 1k-1w_1k-1w 0.0000389
## # ℹ 11 more rows