library(ggplot2)
library(dplyr)
library(readr)
library(Amelia)
library(DMwR)
library(mice)
library(randomForest)
library(VIM)
library(psych)
library(corrplot)
library(sjPlot)
library(ggplot2)
library(polycor)
library(mlbench)
library(caret)
library(MASS)
library(boot)
library(e1071)
library(caTools)
library(car)
library(caret)
library(kableExtra)
library(sjlabelled)
library(sjmisc) 
library(sjstats) 
library(ggeffects)  
library(margins)
library(vip)
library(data.table)
df_yandex <- read_delim("df_yandex.csv", 
    ";", escape_double = FALSE, trim_ws = TRUE)
df=df_yandex
df$Client = as.factor(df$Client)
df$Category = as.factor(df$Category)
df$Device = as.factor(df$Device)
df$Place=as.factor(df$Place)
df$BannerType=as.factor(df$BannerType)
df$TargetingType = as.factor(df$TargetingType)
df$Cost_rub_wo_NDS = as.numeric(gsub(",", ".", gsub("\\.", "", df$Cost_rub_wo_NDS)))
df1 = df %>% dplyr::filter(Category == "ПОТРЕБИТЕЛЬСКОЕ КРЕДИТОВАНИЕ")
df1$Month <- gsub("\\.", "/", df1$Month)
df1$date = as.Date(df1$Month, format =  "%d/%m/%Y")

#df1[df1$date>="2020-01-01" & df1$date<="2020-09-01"]
df2 = df1 %>% dplyr::filter(date>="2020-01-01" & date<"2020-09-01") %>% dplyr::select(-Month)
summary(df2)
##      Client      client_id                              Category   
##  Гамма  :193   Min.   :234679   ПОТРЕБИТЕЛЬСКОЕ КРЕДИТОВАНИЕ:1161  
##  Псилон :172   1st Qu.:285755   ВКЛАДЫ                      :   0  
##  Бета   :152   Median :357953   ДЕБЕТОВЫЕ КАРТЫ             :   0  
##  Зета   :129   Mean   :416259   ИНВЕСТИЦИИ                  :   0  
##  Лямбда :128   3rd Qu.:555392   ИПОТЕЧНОЕ КРЕДИТОВАНИЕ      :   0  
##  Сигма  :126   Max.   :855060   КАСКО                       :   0  
##  (Other):261                    (Other)                     :   0  
##      Device         Place           BannerType  QueryType (search only)
##  Desktop:581   Networks:494   cpc_video  :  4   Length:1161            
##  Mobile :580   Search  :667   cpm_banner : 95   Class :character       
##                               dynamic    :  0   Mode  :character       
##                               image_ad   :146                          
##                               mcbanner   :169                          
##                               performance:  1                          
##                               text       :746                          
##        TargetingType     Shows               Clicks       Cost_rub_wo_NDS  
##  Autotargeting: 79   Min.   :        0   Min.   :     0   Min.   :      0  
##  Phrase       :838   1st Qu.:        0   1st Qu.:     0   1st Qu.:      0  
##  Retargeting  :244   Median :    14533   Median :   333   Median :  12068  
##                      Mean   :  1480906   Mean   : 12648   Mean   : 371213  
##                      3rd Qu.:   177331   3rd Qu.:  7859   3rd Qu.: 339873  
##                      Max.   :243282742   Max.   :567033   Max.   :8847957  
##                                                                            
##   Сonversions           date           
##  Min.   :    0.0   Min.   :2020-01-01  
##  1st Qu.:    0.0   1st Qu.:2020-03-01  
##  Median :   19.0   Median :2020-05-01  
##  Mean   :  943.2   Mean   :2020-04-21  
##  3rd Qu.:  507.0   3rd Qu.:2020-07-01  
##  Max.   :20487.0   Max.   :2020-08-01  
## 

Variable Importance

library(stats)
df2$conv = df2$Сonversions

model1=lm(conv~Device+Place+BannerType+TargetingType+Shows+Clicks, data=df2)
summary(model1)
## 
## Call:
## lm(formula = conv ~ Device + Place + BannerType + TargetingType + 
##     Shows + Clicks, data = df2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15036.9   -531.2    -35.1    138.7  14046.2 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              -1.252e+03  9.044e+02  -1.384    0.167    
## DeviceMobile              1.703e+02  1.044e+02   1.630    0.103    
## PlaceSearch               1.431e+03  1.598e+02   8.953  < 2e-16 ***
## BannerTypecpm_banner      4.757e+01  8.955e+02   0.053    0.958    
## BannerTypeimage_ad        8.850e+01  8.870e+02   0.100    0.921    
## BannerTypemcbanner       -1.222e+03  8.962e+02  -1.364    0.173    
## BannerTypeperformance     2.606e+02  1.959e+03   0.133    0.894    
## BannerTypetext           -1.240e+01  8.826e+02  -0.014    0.989    
## TargetingTypePhrase       9.914e+02  2.131e+02   4.653 3.65e-06 ***
## TargetingTypeRetargeting  1.127e+03  2.635e+02   4.275 2.07e-05 ***
## Shows                    -3.776e-05  5.436e-06  -6.946 6.29e-12 ***
## Clicks                    4.446e-02  1.681e-03  26.452  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1750 on 1149 degrees of freedom
## Multiple R-squared:  0.4941, Adjusted R-squared:  0.4893 
## F-statistic:   102 on 11 and 1149 DF,  p-value: < 2.2e-16
model2 <- stepAIC(model1, trace = 0) 

summary(model2) 
## 
## Call:
## lm(formula = conv ~ Device + Place + BannerType + TargetingType + 
##     Shows + Clicks, data = df2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -15036.9   -531.2    -35.1    138.7  14046.2 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              -1.252e+03  9.044e+02  -1.384    0.167    
## DeviceMobile              1.703e+02  1.044e+02   1.630    0.103    
## PlaceSearch               1.431e+03  1.598e+02   8.953  < 2e-16 ***
## BannerTypecpm_banner      4.757e+01  8.955e+02   0.053    0.958    
## BannerTypeimage_ad        8.850e+01  8.870e+02   0.100    0.921    
## BannerTypemcbanner       -1.222e+03  8.962e+02  -1.364    0.173    
## BannerTypeperformance     2.606e+02  1.959e+03   0.133    0.894    
## BannerTypetext           -1.240e+01  8.826e+02  -0.014    0.989    
## TargetingTypePhrase       9.914e+02  2.131e+02   4.653 3.65e-06 ***
## TargetingTypeRetargeting  1.127e+03  2.635e+02   4.275 2.07e-05 ***
## Shows                    -3.776e-05  5.436e-06  -6.946 6.29e-12 ***
## Clicks                    4.446e-02  1.681e-03  26.452  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1750 on 1149 degrees of freedom
## Multiple R-squared:  0.4941, Adjusted R-squared:  0.4893 
## F-statistic:   102 on 11 and 1149 DF,  p-value: < 2.2e-16
vip(model2)

Comparison

Conversions

data3=df2 %>% dplyr::select(Client, conv) %>% dplyr::group_by(Client) %>% summarise(mean_conv = mean(conv))
data3
area.color <- c(NA, NA, NA, NA, NA, NA, "red",NA,NA)
ggplot(data=data3, aes(x=Client, y=mean_conv, fill=area.color)) + geom_bar(show.legend = F, stat="identity") + coord_flip()+ theme_minimal()+ggtitle("Среднее количество конверсий") + xlab("Клиент") + ylab("Среднее количество конверсий") + geom_text(aes(label=round(mean_conv)), colour = "black", size =4, position = position_stack(vjust = 0.5))

Payments

df2$cost = df2$Cost_rub_wo_NDS
data7=df2 %>% dplyr::select(Client,cost ) %>% dplyr::group_by(Client) %>% summarise(mean_cost = mean(cost))
data7
area.color <- c(NA, NA, NA, NA, NA, NA, "red",NA,NA)
ggplot(data=data7, aes(x=Client, y=mean_cost, fill=area.color)) + geom_bar(show.legend = F, stat="identity") + coord_flip()+ theme_minimal()+ggtitle("Среднее количество затрат") + xlab("Клиент") + ylab("Среднее количество затрат") + geom_text(aes(label=round(mean_cost)), colour = "black", size =4, position = position_stack(vjust = 0.5))

Clicks

data1=df2 %>% dplyr::select(Client, Clicks) %>% dplyr::group_by(Client) %>% summarise(mean_clicks = mean(Clicks))
data1
area.color <- c(NA, NA, NA, NA, NA, NA, "red",NA,NA)
ggplot(data=data1, aes(x=Client, y=mean_clicks, fill=area.color)) + geom_bar(show.legend = F, stat="identity") + coord_flip()+ theme_minimal()+ggtitle("Среднее количество кликов") + xlab("Клиент") + ylab("Среднее количество кликов") + geom_text(aes(label=round(mean_clicks)), colour = "black", size =4, position = position_stack(vjust = 0.5))

Shows

data2=df2 %>% dplyr::select(Client, Shows) %>% dplyr::group_by(Client) %>% summarise(mean_shows = mean(Shows))
data2
options(scipen=9999999)
area.color <- c(NA, NA, NA, NA, NA, NA, "red",NA,NA)
ggplot(data=data2, aes(x=Client, y=mean_shows, fill=area.color)) + geom_bar(show.legend = F, stat="identity") + coord_flip()+ theme_minimal()+ggtitle("Среднее количество показов") + xlab("Клиент") + ylab("Среднее количество показов") + geom_text(aes(label=round(mean_shows)), colour = "black", size =4, position = position_stack(vjust = 0.5))

Describing situation

Targeting

data4=df2 %>% dplyr::select(Client, TargetingType) 
ggplot(data=data4) + geom_bar(aes(x=Client, fill=TargetingType), position='fill')+ theme_minimal()+ggtitle("Типы таргета") + xlab("Клиент") + ylab("Количество") + theme_minimal()

Place

data5=df2 %>% dplyr::select(Client, Place) 
ggplot(data=data5) + geom_bar(aes(Client, fill=Place), position='fill') + theme_minimal()+ggtitle("Место размещения") + xlab("Клиент") + ylab("Количество") + theme_minimal()

Device

data6=df2 %>% dplyr::select(Client, Device) 
ggplot(data=data6) + geom_bar(aes(Client, fill=Device), position='fill') + theme_minimal()+ggtitle("Девайсы") + xlab("Клиент") + ylab("Количество") +theme_minimal()