library(ggplot2)
library(dplyr)
library(readr)
library(Amelia)
library(DMwR)
library(mice)
library(randomForest)
library(VIM)
library(psych)
library(corrplot)
library(sjPlot)
library(ggplot2)
library(polycor)
library(mlbench)
library(caret)
library(MASS)
library(boot)
library(e1071)
library(caTools)
library(car)
library(caret)
library(kableExtra)
library(sjlabelled)
library(sjmisc)
library(sjstats)
library(ggeffects)
library(margins)
library(vip)
library(data.table)
df_yandex <- read_delim("df_yandex.csv",
";", escape_double = FALSE, trim_ws = TRUE)
df$Client = as.factor(df$Client)
df$Category = as.factor(df$Category)
df$Device = as.factor(df$Device)
df$Place=as.factor(df$Place)
df$BannerType=as.factor(df$BannerType)
df$TargetingType = as.factor(df$TargetingType)
df$Cost_rub_wo_NDS = as.numeric(gsub(",", ".", gsub("\\.", "", df$Cost_rub_wo_NDS)))
df1 = df %>% dplyr::filter(Category == "ПОТРЕБИТЕЛЬСКОЕ КРЕДИТОВАНИЕ")
df1$Month <- gsub("\\.", "/", df1$Month)
df1$date = as.Date(df1$Month, format = "%d/%m/%Y")
#df1[df1$date>="2020-01-01" & df1$date<="2020-09-01"]
df2 = df1 %>% dplyr::filter(date>="2020-01-01" & date<"2020-09-01") %>% dplyr::select(-Month)
## Client client_id Category
## Гамма :193 Min. :234679 ПОТРЕБИТЕЛЬСКОЕ КРЕДИТОВАНИЕ:1161
## Псилон :172 1st Qu.:285755 ВКЛАДЫ : 0
## Бета :152 Median :357953 ДЕБЕТОВЫЕ КАРТЫ : 0
## Зета :129 Mean :416259 ИНВЕСТИЦИИ : 0
## Лямбда :128 3rd Qu.:555392 ИПОТЕЧНОЕ КРЕДИТОВАНИЕ : 0
## Сигма :126 Max. :855060 КАСКО : 0
## (Other):261 (Other) : 0
## Device Place BannerType QueryType (search only)
## Desktop:581 Networks:494 cpc_video : 4 Length:1161
## Mobile :580 Search :667 cpm_banner : 95 Class :character
## dynamic : 0 Mode :character
## image_ad :146
## mcbanner :169
## performance: 1
## text :746
## TargetingType Shows Clicks Cost_rub_wo_NDS
## Autotargeting: 79 Min. : 0 Min. : 0 Min. : 0
## Phrase :838 1st Qu.: 0 1st Qu.: 0 1st Qu.: 0
## Retargeting :244 Median : 14533 Median : 333 Median : 12068
## Mean : 1480906 Mean : 12648 Mean : 371213
## 3rd Qu.: 177331 3rd Qu.: 7859 3rd Qu.: 339873
## Max. :243282742 Max. :567033 Max. :8847957
##
## Сonversions date
## Min. : 0.0 Min. :2020-01-01
## 1st Qu.: 0.0 1st Qu.:2020-03-01
## Median : 19.0 Median :2020-05-01
## Mean : 943.2 Mean :2020-04-21
## 3rd Qu.: 507.0 3rd Qu.:2020-07-01
## Max. :20487.0 Max. :2020-08-01
##
Variable Importance
library(stats)
df2$conv = df2$Сonversions
model1=lm(conv~Device+Place+BannerType+TargetingType+Shows+Clicks, data=df2)
summary(model1)
##
## Call:
## lm(formula = conv ~ Device + Place + BannerType + TargetingType +
## Shows + Clicks, data = df2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15036.9 -531.2 -35.1 138.7 14046.2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.252e+03 9.044e+02 -1.384 0.167
## DeviceMobile 1.703e+02 1.044e+02 1.630 0.103
## PlaceSearch 1.431e+03 1.598e+02 8.953 < 2e-16 ***
## BannerTypecpm_banner 4.757e+01 8.955e+02 0.053 0.958
## BannerTypeimage_ad 8.850e+01 8.870e+02 0.100 0.921
## BannerTypemcbanner -1.222e+03 8.962e+02 -1.364 0.173
## BannerTypeperformance 2.606e+02 1.959e+03 0.133 0.894
## BannerTypetext -1.240e+01 8.826e+02 -0.014 0.989
## TargetingTypePhrase 9.914e+02 2.131e+02 4.653 3.65e-06 ***
## TargetingTypeRetargeting 1.127e+03 2.635e+02 4.275 2.07e-05 ***
## Shows -3.776e-05 5.436e-06 -6.946 6.29e-12 ***
## Clicks 4.446e-02 1.681e-03 26.452 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1750 on 1149 degrees of freedom
## Multiple R-squared: 0.4941, Adjusted R-squared: 0.4893
## F-statistic: 102 on 11 and 1149 DF, p-value: < 2.2e-16
model2 <- stepAIC(model1, trace = 0)
summary(model2)
##
## Call:
## lm(formula = conv ~ Device + Place + BannerType + TargetingType +
## Shows + Clicks, data = df2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15036.9 -531.2 -35.1 138.7 14046.2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.252e+03 9.044e+02 -1.384 0.167
## DeviceMobile 1.703e+02 1.044e+02 1.630 0.103
## PlaceSearch 1.431e+03 1.598e+02 8.953 < 2e-16 ***
## BannerTypecpm_banner 4.757e+01 8.955e+02 0.053 0.958
## BannerTypeimage_ad 8.850e+01 8.870e+02 0.100 0.921
## BannerTypemcbanner -1.222e+03 8.962e+02 -1.364 0.173
## BannerTypeperformance 2.606e+02 1.959e+03 0.133 0.894
## BannerTypetext -1.240e+01 8.826e+02 -0.014 0.989
## TargetingTypePhrase 9.914e+02 2.131e+02 4.653 3.65e-06 ***
## TargetingTypeRetargeting 1.127e+03 2.635e+02 4.275 2.07e-05 ***
## Shows -3.776e-05 5.436e-06 -6.946 6.29e-12 ***
## Clicks 4.446e-02 1.681e-03 26.452 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1750 on 1149 degrees of freedom
## Multiple R-squared: 0.4941, Adjusted R-squared: 0.4893
## F-statistic: 102 on 11 and 1149 DF, p-value: < 2.2e-16

Comparison
Conversions
data3=df2 %>% dplyr::select(Client, conv) %>% dplyr::group_by(Client) %>% summarise(mean_conv = mean(conv))
data3
area.color <- c(NA, NA, NA, NA, NA, NA, "red",NA,NA)
ggplot(data=data3, aes(x=Client, y=mean_conv, fill=area.color)) + geom_bar(show.legend = F, stat="identity") + coord_flip()+ theme_minimal()+ggtitle("Среднее количество конверсий") + xlab("Клиент") + ylab("Среднее количество конверсий") + geom_text(aes(label=round(mean_conv)), colour = "black", size =4, position = position_stack(vjust = 0.5))

Payments
df2$cost = df2$Cost_rub_wo_NDS
data7=df2 %>% dplyr::select(Client,cost ) %>% dplyr::group_by(Client) %>% summarise(mean_cost = mean(cost))
data7
area.color <- c(NA, NA, NA, NA, NA, NA, "red",NA,NA)
ggplot(data=data7, aes(x=Client, y=mean_cost, fill=area.color)) + geom_bar(show.legend = F, stat="identity") + coord_flip()+ theme_minimal()+ggtitle("Среднее количество затрат") + xlab("Клиент") + ylab("Среднее количество затрат") + geom_text(aes(label=round(mean_cost)), colour = "black", size =4, position = position_stack(vjust = 0.5))

Clicks
data1=df2 %>% dplyr::select(Client, Clicks) %>% dplyr::group_by(Client) %>% summarise(mean_clicks = mean(Clicks))
data1
area.color <- c(NA, NA, NA, NA, NA, NA, "red",NA,NA)
ggplot(data=data1, aes(x=Client, y=mean_clicks, fill=area.color)) + geom_bar(show.legend = F, stat="identity") + coord_flip()+ theme_minimal()+ggtitle("Среднее количество кликов") + xlab("Клиент") + ylab("Среднее количество кликов") + geom_text(aes(label=round(mean_clicks)), colour = "black", size =4, position = position_stack(vjust = 0.5))

Shows
data2=df2 %>% dplyr::select(Client, Shows) %>% dplyr::group_by(Client) %>% summarise(mean_shows = mean(Shows))
data2
options(scipen=9999999)
area.color <- c(NA, NA, NA, NA, NA, NA, "red",NA,NA)
ggplot(data=data2, aes(x=Client, y=mean_shows, fill=area.color)) + geom_bar(show.legend = F, stat="identity") + coord_flip()+ theme_minimal()+ggtitle("Среднее количество показов") + xlab("Клиент") + ylab("Среднее количество показов") + geom_text(aes(label=round(mean_shows)), colour = "black", size =4, position = position_stack(vjust = 0.5))

Describing situation
Targeting
data4=df2 %>% dplyr::select(Client, TargetingType)
ggplot(data=data4) + geom_bar(aes(x=Client, fill=TargetingType), position='fill')+ theme_minimal()+ggtitle("Типы таргета") + xlab("Клиент") + ylab("Количество") + theme_minimal()

Place
data5=df2 %>% dplyr::select(Client, Place)
ggplot(data=data5) + geom_bar(aes(Client, fill=Place), position='fill') + theme_minimal()+ggtitle("Место размещения") + xlab("Клиент") + ylab("Количество") + theme_minimal()

Device
data6=df2 %>% dplyr::select(Client, Device)
ggplot(data=data6) + geom_bar(aes(Client, fill=Device), position='fill') + theme_minimal()+ggtitle("Девайсы") + xlab("Клиент") + ylab("Количество") +theme_minimal()
