rm(list=ls())
set.seed(1234)
getwd()
## [1] "D:/Projects/Live-PK/0.DataCleaning/0.Code"
library("tidyverse")
library("fixest")
library("arrow")
library(DoubleML)
library(mlr3)
library(mlr3learners)
library(data.table)
library(ggplot2)
library(ranger)
library(xgboost)
library(plotly)
library(dplyr)
# suppress messages during fitting
lgr::get_logger("mlr3")$set_threshold("warn")
df <- read_parquet("../../0.DataCleaning/1.Input/synthetic_data2.parquet") %>%
# # mutate(
# # p_date = as_date(p_date), # Convert to Date format
# # day = format(p_date, "%Y-%m-%d"), # Extract day in "YYYY-MM-DD" format
# # month = format(p_date, "%Y-%m"), # Extract month in "YYYY-MM" format
# # year = format(p_date, "%Y"), # Extract year in "YYYY" format
# # quarter = paste0(year(p_date), "-Q", quarter(p_date)) # Extract quarter in "YYYY-QN" format
# # ) %>%
# # mutate(
# # reference_date = as_date("2022-12-31"),
# # relative_day = as.integer(difftime(as_date(day), reference_date, units = "days")) # Difference in days
# # ) %>%
# # mutate(
# # gender = as.factor(gender),
# # author_type = as.factor(author_type),
# # author_income_range = as.factor(author_income_range),
# # age_range = as.factor(age_range),
# # fre_country_region = as.factor(fre_country_region),
# # fre_city_level = as.factor(fre_city_level),
# # is_big_v = as.factor(is_big_v),
# # relative_day = as.factor(relative_day)
# # ) %>%
mutate(A_fan_count = fans_user_num,
B_fan_count = other_fans_user_num,
log_A_fan_count = log(fans_user_num),
log_B_fan_count = log(other_fans_user_num))
# gc()
Model-free
# Step 1: Group the data by A_fan_count and B_fan_count, and calculate the average money for A
agg_df <- df %>%
group_by(A_fan_count, B_fan_count) %>%
summarize(avg_money_A = mean(avg_fan_total_cost_amt, na.rm = TRUE))
## `summarise()` has grouped output by 'A_fan_count'. You can override using the
## `.groups` argument.
# Step 2: Create the 3D surface plot
fig <- plot_ly(agg_df,
x = ~log(A_fan_count),
y = ~log(B_fan_count),
z = ~avg_money_A,
type = 'surface',
colorscale = 'Viridis')
# Step 3: Customize the layout for better visualization
fig <- fig %>%
layout(title = '3D Surface Plot of Avg Money for A',
scene = list(
xaxis = list(title = "Log(A's Fan Count)"),
yaxis = list(title = "Log(B's Fan Count)"),
zaxis = list(title = "Avg Money for A")
))
# Display the plot
fig