#Bases

library(ggplot2)
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readxl)
library(stringr)
library(openxlsx)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(treemapify)
VentasUS = read.xlsx("SP_Endeavor_Base1 Papas_VF_CLEAN.xlsx")
VentasMex = read.xlsx("SP_Endeavor_Base2Papas_VF_CLEAN.xlsx")
WageMex = read.xlsx("WagesMexClean.xlsx")
WageUS = read.xlsx("WagesUSClean.xlsx")
OrdMonth = c("January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December")
OrdMonthEs = c("Enero", "Febrero", "Marzo", "Abril", "Mayo", "Junio", "Julio", "Agosto", "Septiembre", "Octubre", "Noviembre", "Diciembre")
apply(is.na(VentasMex), 2, sum)>=1
##     Month    Region  CrujiCos  PicanCos SaladoCos    BBQCos  SalsaCos 
##     FALSE     FALSE      TRUE     FALSE      TRUE      TRUE     FALSE
Media_CC= mean(VentasMex$CrujiCos,na.rm=T)
VentasMex$CrujiCos[is.na(VentasMex$CrujiCos)] = Media_CC

Media_SC= mean(VentasMex$SaladoCos,na.rm=T)
VentasMex$SaladoCos[is.na(VentasMex$SaladoCos)] = Media_SC

Media_BC= mean(VentasMex$BBQCos,na.rm=T)
VentasMex$BBQCos[is.na(VentasMex$BBQCos)] = Media_BC

#The product that sells the most

VentasMex$RM = paste(VentasMex$Month, VentasMex$Region)
meses_en_df = VentasMex$meses
numeros_meses = match(meses_en_df, VentasMex$Month)
borrar= c("Region", "Month")
#Mex
vm_transpose= VentasMex[ , !(names(VentasMex) %in% borrar)]
dM_transpose = data.frame(t(vm_transpose[-6]))
colnames(dM_transpose) = vm_transpose[, 6]
dM_transpose$Sumative= rowSums((dM_transpose[, 1:36]))
dM_transpose$Sabores =rownames(dM_transpose)

#US
VentasUS$RM =paste(VentasUS$Month, VentasUS$Region)
vu_transpose= VentasUS[ , !(names(VentasUS) %in% borrar)]
dU_transpose = data.frame(t(vu_transpose[-6]))
colnames(dU_transpose) = vu_transpose[, 6]
dU_transpose$Sumative= rowSums((dU_transpose[, 1:36]))
dU_transpose$Sabores =rownames(dU_transpose)

##US

ggplot2::ggplot(data = dU_transpose, mapping = aes(x= Sabores,y = Sumative))+
geom_col(color ="blue", fill ="red") +  theme_classic() + 
  labs(title = "US Sales by Product",
       x = "Products",
       y = "Sales")

##MEX

ggplot2::ggplot(data = dM_transpose, mapping = aes(x= Sabores,y = Sumative))+
geom_col(color ="red", fill ="green") +  theme_classic() +
  labs(title = "Mex Sales by Product",
       x = "Products",
       y = "Sales")

#Mensual Sakes

VentasMexTotales= VentasMex

VentasMexTotales$MesesNum = match(VentasMex$Month,OrdMonthEs)

VentasMexTotales$VentasTotal = VentasMex$CrujiCos + VentasMex$PicanCos + VentasMex$SaladoCos + VentasMex$BBQCos + VentasMex$SalsaCos

ventas_por_mesMex = VentasMexTotales %>%
  group_by(MesesNum) %>%
  summarise(Ventas = sum(VentasTotal))

VentasUSTotales= VentasUS

VentasUSTotales$MesesNum = match(VentasUS$Month,OrdMonth)

  
VentasUSTotales$VentasTotal = VentasUS$CrunchyCo + VentasUS$SpicyCo + VentasUS$SaltCo + VentasUS$BBQCo + VentasUS$SalsaCo

ventas_por_mesUS = VentasUSTotales %>%
  group_by(MesesNum) %>%
  summarise(Ventas = sum(VentasTotal))

ventas_por_mesUS$Variable = "US"
ventas_por_mesMex$Variable = "MEX"

datos_combinados = rbind(ventas_por_mesUS, ventas_por_mesMex)

##MEX AND US

ggplot2::ggplot(data = datos_combinados, mapping = aes(x= MesesNum,y = Ventas, color = Variable))+
geom_line() +  theme_classic() + scale_x_continuous(limits = c(1, 12), breaks = seq(1, 12, by = 1))

###US

ggplot2::ggplot(data = ventas_por_mesUS, mapping = aes(x= MesesNum,y = Ventas))+
geom_line(color ="blue") + geom_point(color ="red") +  theme_classic()+ scale_x_continuous(limits = c(1, 12), breaks = seq(1, 12, by = 1))

###MEX

ggplot2::ggplot(data = ventas_por_mesMex, mapping = aes(x= MesesNum,y = Ventas))+
geom_line(color ="green") + geom_point(color ="red") + theme_classic() + scale_x_continuous(limits = c(1, 12), breaks = seq(1, 12, by = 1))

#The months where each products sells the most

ventas_producto_mesUS = VentasUSTotales %>%
  group_by(MesesNum) %>%
  reframe(Venta = c(sum(CrunchyCo), sum(SaltCo), sum(BBQCo), sum(SalsaCo), sum(SpicyCo)))
num_filas = (12*5)
Marca = c("CrunchyCo", "SaltCo", "BBQCo","SalsaCo", "SpicyCo")
ventas_producto_mesUS$Sabores =  rep(Marca, length.out = num_filas)

ventas_producto_mesMEX = VentasMexTotales %>%
  group_by(MesesNum) %>%
  reframe(Venta =c(sum(CrujiCos),sum(SaladoCos), sum(BBQCos), sum(SalsaCos), sum(PicanCos)))
Marca = c("CrujiCos", "SaladoCos", "BBQCos","SalsaCos", "PicanCos")
ventas_producto_mesMEX$Sabores =  rep(Marca, length.out = num_filas)

0.1 US

ggplot(ventas_producto_mesUS, aes(x = factor(MesesNum), y = Venta, fill = Sabores)) +
  geom_bar(stat = "identity") +
  facet_wrap(~Sabores) +
  labs(
    title = "Mensual Sales by Flavor",
    x = "Month",
    y = "Sales"
  )

##MEX

ggplot(ventas_producto_mesMEX, aes(x = factor(MesesNum), y = Venta, fill = Sabores)) +
  geom_bar(stat = "identity") +
  facet_wrap(~Sabores) +
  labs(
    title = "Mensual Sales by Flavor",
    x = "Month",
    y = "Sales")

#Patterns in wages

WageUS_R = WageUS %>%
  group_by(Wage) %>%
  slice(rep(1:n(), Workers))

WUpromedio = mean(WageUS_R$Wage)
WUsd = sd(WageUS_R$Wage)
WUA1sd = WUpromedio + WUsd
WUS1sd = WUpromedio - WUsd
WUA2sd = WUpromedio + (2*WUsd)
WUS2sd = WUpromedio - (2*WUsd)
WUA3sd = WUpromedio + (3*WUsd)
WUS3sd = WUpromedio - (3*WUsd)

WageMEX_R = WageMex %>%
  group_by(Wage) %>%
  slice(rep(1:n(), Workers))

WMpromedio = mean(WageMEX_R$Wage)
WMsd = sd(WageMEX_R$Wage)
WMA1sd = WMpromedio + WMsd
WMS1sd = WMpromedio - WMsd
WMA2sd = WMpromedio + (2*WMsd)
WMS2sd = WMpromedio - (2*WMsd)
WMA3sd = WMpromedio + (3*WMsd)
WMS3sd = WMpromedio - (3*WMsd)

##US

ggplot2::ggplot(WageUS_R, aes(x = Wage)) +
  geom_histogram(binwidth = 500, fill = "blue", color = "yellow") +
  geom_vline(xintercept = WUpromedio, linetype = "dashed", color = "red", size = 1) +
  geom_vline(xintercept = WUA1sd, linetype = "dashed", color = "red", size = 1) +
  geom_vline(xintercept = WUS1sd, linetype = "dashed", color = "red", size = 1)+   geom_vline(xintercept = WUA2sd, linetype = "dashed", color = "red", size = 1) +
  geom_vline(xintercept = WUS2sd, linetype = "dashed", color = "red", size = 1)+
  theme_classic()+
  
  labs(title = "Histogram US WAGE",
       x = "Wage",
       y = "Frequency")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

##MEX

ggplot2::ggplot(WageMEX_R, aes(x = Wage)) +
  geom_histogram(binwidth = 500, fill = "green", color = "yellow") +
  geom_vline(xintercept = WMpromedio, linetype = "dashed", color = "red", size = 1) +
  geom_vline(xintercept = WMA1sd, linetype = "dashed", color = "red", size = 1) +
  geom_vline(xintercept = WMS1sd, linetype = "dashed", color = "red", size = 1)+   geom_vline(xintercept = WMA2sd, linetype = "dashed", color = "red", size = 1) +
  geom_vline(xintercept = WMS2sd, linetype = "dashed", color = "red", size = 1)+ theme_classic()+
  
  labs(title = "Histogram MEX WAGE",
       x = "Wage",
       y = "Frequency")

#Patterns in products

ventas_producto_mesUS = VentasUSTotales %>%
  group_by(MesesNum) %>%
  reframe(Venta = c(sum(CrunchyCo), sum(SaltCo), sum(BBQCo), sum(SalsaCo), sum(SpicyCo)))
num_filas = (12*5)
Marca = c("CrunchyCo", "SaltCo", "BBQCo","SalsaCo", "SpicyCo")
ventas_producto_mesUS$Sabores =  rep(Marca, length.out = num_filas)

ventas_producto_mesMEX = VentasMexTotales %>%
  group_by(MesesNum) %>%
  reframe(Venta =c(sum(CrujiCos),sum(SaladoCos), sum(BBQCos), sum(SalsaCos), sum(PicanCos)))
Marca = c("CrujiCos", "SaladoCos", "BBQCos","SalsaCos", "PicanCos")
ventas_producto_mesMEX$Sabores =  rep(Marca, length.out = num_filas)

0.2 US

ggplot2::ggplot(data = ventas_producto_mesUS, mapping = aes(x= MesesNum,y = Venta, color = Sabores))+
geom_line() +  theme_classic() + scale_x_continuous(limits = c(1, 12), breaks = seq(1, 12, by = 1))

##MEX

ggplot2::ggplot(data = ventas_producto_mesMEX, mapping = aes(x= MesesNum,y = Venta, color = Sabores))+
geom_line() +  theme_classic() + scale_x_continuous(limits = c(1, 12), breaks = seq(1, 12, by = 1)) 

#Information about the regions

ventas_regionUS = VentasUSTotales %>%
  group_by(Region) %>%
  summarise(Venta = sum(VentasTotal))
ventas_regionMEX = VentasMexTotales %>%
  group_by(Region) %>%
  summarise(Venta = sum(VentasTotal))

##US

ggplot2::ggplot(data = ventas_regionUS, mapping = aes(area=Venta, fill= Region))+
geom_treemap() +  theme_classic() + 
  labs(title = "US Sales by Region",
       x = "Region",
       y = "Sales")

##MEX

ggplot2::ggplot(data = ventas_regionMEX, mapping = aes(area=Venta, fill= Region))+
geom_treemap() +  theme_classic() + 
  labs(title = "MEX Sales by Region",
       x = "Region",
       y = "Sales")