Este documento proporciona un análisis de los datos financieros de la empresa Nala.
Índice
Cargar bibliotecas necesarias
Leer los datos de gastos y ventas de NALA 2020-2024
Combinar los datos
Análisis financiero
4.1 Ventas totales vs. gastos
Análisis de las ventas
5.1 Gráficas: Ventas a lo largo del tiempo
5.2 Tendencias de ventas por plataforma
5.3 Modelos lineales y resultados
Estado de Resultados
6.1 Cálculo de Utilidades
6.2 Cálculo de márgenes y porcentajes
1. Cargar bibliotecas
Code
library(tidyverse) # For data manipulation and visualization; includes readr, dplyr, ggplot2library(janitor) # For cleaning data and making tabular outputs cleanerlibrary(scales) # For scaling graphical outputslibrary(RColorBrewer) # For using color palettes in visualizationslibrary(plotly) # For interactive web-based graphicslibrary(stringr) # For string manipulation (also part of tidyverse but listed for clarity)library(knitr) # For dynamic report generation in Rlibrary(kableExtra) # For enhancing 'kable' tables in R Markdownlibrary(broom) # For converting statistical analysis objects into tidy data frameslibrary(DT) # For creating interactive tables in R
2. Cargar los datos
2.1 Leer datos de gastos
Code
# Step 1: Read and clean the raw data, and rename it to raw_expense_dataexpenses_file_path <-"/Users/javier.patron/Downloads/pagos Nala - Expenses Data Analisis (8).csv"raw_expense_data <-read_csv(expenses_file_path) %>%clean_names() %>%# Set the first row as column names and remove the first row { setNames(.[-1,], as.character(.[1,])) } %>%clean_names() %>%select_if(~!all(is.na(.)))# Step 2: Create three separate data frames for each set of columnsdf1 <- raw_expense_data %>%select(clasificacion, fecha, empresa, cantidad, producto)df2 <- raw_expense_data %>%select(clasificacion = clasificacion_2, fecha = fecha_2, empresa = empresa_2, cantidad = cantidad_2, producto = producto_2)df3 <- raw_expense_data %>%select(clasificacion = clasificacion_3, fecha = fecha_3, empresa = empresa_3, cantidad = cantidad_3, producto = producto_3)df4 <- raw_expense_data %>%select(clasificacion = clasificacion_4, fecha = fecha_4, empresa = empresa_4, cantidad = cantidad_4, producto = producto_4)# Step 3: Combine all three data frames and name it as expenses_dataexpenses_data <-bind_rows(df1, df2, df3, df4) %>%# Remove rows where all columns are NA (if any)filter(!if_all(everything(), is.na))# Clean the dataexpenses_data <- expenses_data %>%mutate(fecha = lubridate::dmy(fecha), año = lubridate::year(fecha),mes = lubridate::month(fecha, label =TRUE),quarter = lubridate::quarter(fecha),source =case_when(str_detect(clasificacion, "Gastos") ~"Gastos",str_detect(clasificacion, "Costos") ~"Costos",TRUE~"other"# This handles any other cases, if they exist )) %>%mutate(cantidad =gsub("\\$", "", cantidad), cantidad =gsub(",", "", cantidad), cantidad =as.numeric(cantidad)) %>%select("source", "clasificacion", "fecha", "año", "mes", "quarter", "empresa", "cantidad", "producto") %>%filter(!str_detect(año, "2025"))
2.2 Leer datos de ventas
Code
# Read the CSV filesales_file_path <-"/Users/javier.patron/Downloads/pagos Nala - Sales Data Analisis (2).csv"sales_data <-read_csv(sales_file_path) %>%clean_names()# Clean and Organize the datasales_data_long <- sales_data %>%pivot_longer(cols = jan:dec, names_to ="month", values_to ="cantidad") %>%rename(año = ano, tipo_venta = tipo) %>%mutate(month =match(tolower(month), tolower(month.abb)), # Convert month abbreviation to numbermes =month(month, label =TRUE, abbr =TRUE), # Convert to lubridate month formatsource ="Ventas",fecha =make_date(year = año, month =month(month), day =days_in_month(month(month, label =FALSE))), # Add last day of the monthclasificacion = tipo_venta,quarter =quarter(month),cantidad =as.numeric(gsub("[$,]", "", cantidad))) %>%select("source", "clasificacion", "fecha", "año", "mes", "quarter" , "cantidad")#sales_data_long <- sales_data_long %>% # arrange(fecha) %>% # slice(1:169)
3. Combinar los datos
Code
combined_data <-full_join( expenses_data %>%select(-empresa, -producto), sales_data_long) %>%mutate(clasificacion =case_when( clasificacion =="Deposito"~"Ventas en Deposito", clasificacion =="Efectivo"~"Ventas en Efectivo", clasificacion =="Pagina"~"Ventas en Pagina",TRUE~ clasificacion))combined_data <-full_join( expenses_data %>%select(-empresa, -producto), sales_data_long) %>%mutate(clasificacion =case_when( clasificacion =="Gastos Admin"~"Gastos de Admin", clasificacion =="Gastos Ventas"~"Gastos de Ventas", clasificacion =="Costos Ventas"~"Costos de Ventas", clasificacion =="Gastos Financieros"~"Gastos Financieros", clasificacion =="Deposito"~"Ventas en Deposito", clasificacion =="Efectivo"~"Ventas en Efectivo", clasificacion =="Pagina"~"Ventas en Pagina",TRUE~ clasificacion))
3.1 Tabla de Gastos
3.2 Tabla de Ventas
Code
# Create an interactive table for expenses_data with smaller fontdatatable(sales_data, options =list(autoWidth =TRUE, searching =TRUE), caption ='Expenses Data') %>%formatStyle(columns =names(sales_data),fontSize ='10px')
4. Análisis Financiero
4.1 Ventas Totales vs. Gastos
Code
# Summarize total sales and expenses across the entire datasettotal_summary <- combined_data %>%group_by(source) %>%summarize(total_amount =sum(as.numeric(cantidad), na.rm =TRUE))# Define a custom color palette similar to Set1 or Set2custom_palette <-c("#E41A1C", "#377EB8", "#4DAF4A")# Create the ggplot objectp <-ggplot(total_summary, aes(x = source, y = total_amount, fill = source)) +geom_bar(stat ="identity") +scale_fill_manual(values = custom_palette) +# Apply the custom palettelabs(title ="Total Sales vs. Expenses (Overall History)",y ="Total Amount (MXN)",x ="") +scale_y_continuous(labels = scales::dollar_format(prefix ="$")) +theme_minimal()# Convert the ggplot2 plot to an interactive plotly graphggplotly(p)
Code
# Define custom colors based on classificationcustom_colors <-c("Ventas en Deposito"="#6EBF74", # Green"Ventas en Efectivo"="#499F54", # Green"Ventas en Pagina"="#28703B", # Green"Costos de Ventas"="#FFDBA9", # Pink"Gastos de Admin"="#FF9F40", # Coral"Gastos de Ventas"="#FF8347", # Tomato"Gastos Financieros"="#FF5100")# Explicitly categorize each 'clasificacion' into 'sales' or 'expenses'combined_data <- combined_data %>%mutate(source_grouped =case_when( clasificacion =="Ventas en Deposito"~"sales", clasificacion =="Ventas en Efectivo"~"sales", clasificacion =="Ventas en Pagina"~"sales", clasificacion =="Costos de Ventas"~"expenses", clasificacion =="Gastos de Admin"~"expenses", clasificacion =="Gastos de Ventas"~"expenses", clasificacion =="Gastos Financieros"~"expenses",TRUE~"other"# This is just a fallback case ))# Summarize total sales and combined expenses across the entire datasettotal_summary <- combined_data %>%group_by(source_grouped, clasificacion) %>%summarize(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%mutate(total_sum =sum(total_amount))total_sums <- total_summary %>%group_by(source_grouped) %>%summarize(total_sum =sum(total_amount))# Bar graph of total sales vs. stacked combined expenses (Overall History) with custom colorsggplot(total_summary, aes(x = source_grouped,y = total_amount,fill = clasificacion)) +geom_bar(stat ="identity",position ="stack") +labs(title ="Total Sales vs. Expenses (Overall History)",y ="Total Amount (MXN)",x ="") +scale_fill_manual(values = custom_colors) +scale_y_continuous(labels =dollar_format(prefix ="$",big.mark =",")) +theme_minimal() +geom_text(aes(label = scales::dollar(total_amount, prefix ="$", big.mark =",")), position =position_stack(vjust =0.5), size =3, color ="white") +# Add specific text for each total sum from the total_sums tableannotate("text", x =1, y = total_sums$total_sum[1], label = scales::dollar(total_sums$total_sum[1],prefix ="$",big.mark =","), vjust =-0.5,size =2.5,fontface ="bold",color ="#FF7347") +annotate("text",x =2,y = total_sums$total_sum[2], label = scales::dollar(total_sums$total_sum[2],prefix ="$",big.mark =","), vjust =-0.2,size =2.5,fontface ="bold",color ="darkgreen")
Code
# Summarize total sales and expenses per yearyearly_summary <- combined_data %>%group_by(año, source_grouped, clasificacion) %>%summarize(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%ungroup()# Bar graph of total sales vs. expenses per year with facets and annotationsggplot(yearly_summary, aes(x = source_grouped, y = total_amount, fill = clasificacion)) +geom_bar(stat ="identity", position ="stack") +labs(title ="Total Sales vs. Expenses per Year", y ="Total Amount (MXN)", x ="") +scale_fill_manual(values = custom_colors) +scale_y_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +theme_minimal() +facet_wrap(~año)
Code
# Assuming combined_data has a date column named "fecha"month_filter <-13# POR REVISAR CON REGINA# Filter data for the last XX months#filtered_data <- combined_data %>%# mutate(fecha = as.Date(fecha, format = "%d/%m/%Y")) %>% # Ensure "fecha" is in Date format# filter(fecha >= as.Date(Sys.Date() %m-% months(month_filter))) # Filter last XXX months# Ensure combined_data has a properly formatted "fecha" columnfiltered_data <- combined_data %>%mutate(fecha =as.Date(fecha, format ="%d/%m/%Y")) %>%# Convert to Date formatfilter(year(fecha) *12+month(fecha) >=year(Sys.Date()) *12+month(Sys.Date()) - month_filter)# Summarize total sales and expenses per year and other groupingsfiltered_summary <- filtered_data %>%mutate(año =year(fecha)) %>%# Extract year from fechagroup_by(source_grouped, clasificacion) %>%summarize(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%ungroup()filtered_sums <- filtered_summary %>%group_by(source_grouped) %>%summarize(total_sum =sum(total_amount))# Assuming filtered_data has already been created and summarized as 'yearly_summary'ggplot(filtered_summary, aes(x = source_grouped, y = total_amount, fill = clasificacion)) +geom_bar(stat ="identity", position ="stack") +labs(title =paste0("Total Sales vs. Expenses (Last ", month_filter," Months)"), y ="Total Amount (MXN)", x ="") +scale_fill_manual(values = custom_colors) +scale_y_continuous(labels = scales::dollar_format(prefix ="$", big.mark =",")) +theme_minimal() +# Adding text labels inside the barsgeom_text(aes(label = scales::dollar(total_amount, prefix ="$", big.mark =",")), position =position_stack(vjust =0.5), size =2.5, color ="white") +# Annotate specific sums on the chart (e.g., for each category)annotate("text", x =1, y = filtered_sums$total_sum[1], label = scales::dollar(filtered_sums$total_sum[1], prefix ="$", big.mark =","), vjust =-0.5, size =3, fontface ="bold", color ="#FF7347") +annotate("text", x =2, y = filtered_sums$total_sum[2], label = scales::dollar(filtered_sums$total_sum[2], prefix ="$", big.mark =","), vjust =-0.2, size =3, fontface ="bold", color ="darkgreen")
# Create a filtered monthly summary using the already-filtered data (filtered_data)filtered_monthly_summary <- filtered_data %>%rename(month = mes) %>%# Assuming 'mes' exists as the month column in the datagroup_by(source_grouped, año, month) %>%summarise(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%# Add month number and abbreviationmutate(month_num =match(tolower(month), tolower(month.abb)),month_abbr =month(month_num, label =TRUE, abbr =TRUE),fecha =make_date(year = año, month = month_num, day =days_in_month(month_num)))# Box plot graph using the filtered monthly summaryggplot(filtered_monthly_summary, aes(x = total_amount, y = source_grouped, fill = source_grouped)) +geom_boxplot() +scale_x_continuous(labels = scales::dollar_format(prefix ="$", big.mark =",")) +labs(title =paste0("Distribution of Monthly Amounts by Source Group (Last ", month_filter," Months)"),x ="Total Amount",y ="Source Group",fill ="Source Group") +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))
Code
# Box plot graph with vertical orientation x<-ggplot(filtered_monthly_summary, aes(x = source_grouped, y = total_amount, fill = source_grouped)) +geom_boxplot() +scale_y_continuous(labels = scales::dollar_format(prefix ="$", big.mark =",")) +labs(title =paste0("Distribution of Monthly Amounts by Source Group (Last ", month_filter, " Months)"),x ="Source Group",y ="Total Amount",fill ="Source Group") +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1)) # Optional: Rotate x-axis labels for readability
Caja (IQR): La caja representa el rango intercuartílico. Contiene el 50% central de los datos.
Parte inferior de la caja (Q1): Este es el primer cuartil, o el percentil 25. Esto significa que el 25% de los datos están por debajo de este valor.
Parte superior de la caja (Q3): Este es el tercer cuartil, o el percentil 75. Esto significa que el 75% de los datos están por debajo de este valor, y el 25% están por encima.
Línea dentro de la caja: Esta línea representa la mediana.
Code
# Create a filtered monthly summary using the already-filtered data (filtered_data)filtered_monthly_summary <- filtered_data %>%rename(month = mes) %>%# Assuming 'mes' exists as the month column in the datagroup_by(source_grouped, source, año, month) %>%summarise(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%# Add month number and abbreviationmutate(month_num =match(tolower(month), tolower(month.abb)),month_abbr =month(month_num, label =TRUE, abbr =TRUE),fecha =make_date(year = año, month = month_num, day =days_in_month(month_num)))# Improved boxplot using the existing monthly_summary dataframeggplot(filtered_monthly_summary, aes(x = total_amount, y = source, fill = source)) +geom_boxplot(outlier.colour ="red", outlier.shape =16, outlier.size =2) +scale_x_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +labs(title ="Distribution of Expenses by Classification",subtitle =paste0("(Last ", month_filter," Months)"),x ="Total Monthly Sum (MXN)",y ="Classification",fill ="Classification") +theme_minimal(base_size =14) +theme(plot.title =element_text(face ="bold"),legend.position ="none") +scale_fill_brewer(palette ="Set2")
Code
# Group and summarize monthly_summarymonthly_grouped_summary <- monthly_summary %>%group_by(source_grouped, año, month_num, month_abbr, fecha) %>%summarise(total_amount =sum(total_amount, na.rm =TRUE), .groups ="drop") %>%arrange(año, month_num, source_grouped)t <- monthly_grouped_summary %>%ggplot(aes(x = fecha, y = total_amount, fill = source_grouped, color = source_grouped)) +geom_area(alpha =0.8, position ="identity") +# Prevent stacking of areasgeom_line(size =0.2) +# Ensure the lines are visiblescale_y_continuous(labels = scales::dollar_format(prefix ="$", big.mark =",")) +scale_fill_manual(values =c("expenses"="#FC9272", "sales"="#9ECBEA")) +# Match area colorsscale_color_manual(values =c("expenses"="#DE2D26", "sales"="#3182BD")) +# Match line colorslabs(title ="Monthly Amounts Over Time by Source Group",x ="Date",y ="Total Amount (MXN)",fill ="Source Group",color ="Source Group" ) +theme_minimal()# Use ggplotly with custom tooltipsggplotly(tooltip =c("x", "y", "color"))
Code
# Bar graph comparing sales and expenses per month, faceted by yearggplot(monthly_grouped_summary, aes(x = month_abbr, y = total_amount, fill = source_grouped)) +geom_bar(stat ="identity", position ="dodge", alpha =0.85) +scale_y_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +labs(title ="Monthly Comparison of Sales and Expenses Faceted by Year",x ="Month",y ="Total Amount",fill ="Category") +facet_wrap(~year(fecha), ncol =1) +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))
Monthly Expenses and Sales with Expenses Per Dollar
año
month_num
month_abbr
fecha
expenses
sales
expenses_per_dollar
2021
1
Jan
2021-01-31
$10,092
$16,138
0.63
2021
2
Feb
2021-02-28
$17,366
$15,599
1.11
2021
3
Mar
2021-03-31
$33,747
$65,745
0.51
2021
4
Apr
2021-04-30
NA
$16,094
NA
2021
5
May
2021-05-31
$25,636
$13,846
1.85
2021
6
Jun
2021-06-30
$9,940
$17,782
0.56
2021
7
Jul
2021-07-31
$11,078
$13,330
0.83
2021
8
Aug
2021-08-31
$15,627
$14,657
1.07
2021
9
Sep
2021-09-30
$26,127
$30,215
0.86
2021
10
Oct
2021-10-31
$20,620
$23,204
0.89
2021
11
Nov
2021-11-30
$17,931
$20,345
0.88
2021
12
Dec
2021-12-31
$17,456
$27,468
0.64
2022
1
Jan
2022-01-31
$43,699
$25,735
1.70
2022
2
Feb
2022-02-28
$35,667
$38,230
0.93
2022
3
Mar
2022-03-31
$74,804
$45,141
1.66
2022
4
Apr
2022-04-30
$48,851
$35,789
1.36
2022
5
May
2022-05-31
$52,711
$99,972
0.53
2022
6
Jun
2022-06-30
$104,539
$70,673
1.48
2022
7
Jul
2022-07-31
$65,317
$49,731
1.31
2022
8
Aug
2022-08-31
$41,297
$60,794
0.68
2022
9
Sep
2022-09-30
$80,735
$84,970
0.95
2022
10
Oct
2022-10-31
$91,353
$83,774
1.09
2022
11
Nov
2022-11-30
$74,580
$75,113
0.99
2022
12
Dec
2022-12-31
$58,496
$68,834
0.85
2023
1
Jan
2023-01-31
$125,896
$99,104
1.27
2023
2
Feb
2023-02-28
$70,993
$100,890
0.70
2023
3
Mar
2023-03-31
$80,916
$75,015
1.08
2023
4
Apr
2023-04-30
$85,522
$99,376
0.86
2023
5
May
2023-05-31
$128,135
$143,716
0.89
2023
6
Jun
2023-06-30
$119,896
$117,458
1.02
2023
7
Jul
2023-07-31
$75,074
$99,082
0.76
2023
8
Aug
2023-08-31
$131,614
$94,163
1.40
2023
9
Sep
2023-09-30
$99,544
$127,758
0.78
2023
10
Oct
2023-10-31
$110,080
$113,776
0.97
2023
11
Nov
2023-11-30
$115,573
$136,382
0.85
2023
12
Dec
2023-12-31
$115,372
$96,979
1.19
2024
1
Jan
2024-01-31
$129,854
$139,020
0.93
2024
2
Feb
2024-02-28
$83,361
$103,985
0.80
2024
3
Mar
2024-03-31
$107,710
$86,405
1.25
2024
4
Apr
2024-04-30
$87,312
$155,039
0.56
2024
5
May
2024-05-31
$105,009
$138,622
0.76
2024
6
Jun
2024-06-30
$175,354
$145,424
1.21
2024
7
Jul
2024-07-31
$101,181
$143,531
0.70
2024
8
Aug
2024-08-31
$147,002
$134,550
1.09
2024
9
Sep
2024-09-30
$233,820
$136,194
1.72
2024
10
Oct
2024-10-31
$81,552
$160,062
0.51
2024
11
Nov
2024-11-30
$208,344
$171,318
1.22
2024
12
Dec
2024-12-31
$243,364
$112,737
2.16
Dollar Expense Graph Ratio
Code
plot_data <- monthly_expenses_sales %>%mutate(adjusted_expenses_per_dollar = expenses_per_dollar -1, # Subtract 1bar_color =ifelse(adjusted_expenses_per_dollar >0, "darkred", "darkgreen"), # Assign colorsmonth_label =paste(month_abbr, año) # Create a combined label for months ) %>%arrange(fecha) %>%# Sort by fecha (chronological order)mutate(month_label =factor(month_label, levels =unique(month_label))) # Ensure chronological orderunique_years <- plot_data %>%distinct(año) %>%arrange(año)year_start_positions <-which(levels(fct_rev(plot_data$month_label)) %in%paste("Jan", unique_years$año))offset_positions <- year_start_positions +0.5# Shift the lines slightly upward# Step 2: Create the bar graph with reversed y-axisggplot(plot_data, aes(x = adjusted_expenses_per_dollar,y =fct_rev(month_label),fill = bar_color)) +geom_bar(stat ="identity",show.legend =FALSE,alpha =0.8) +# Horizontal barsgeom_vline(xintercept =0,linetype ="solid",color ="gray20",size =1) +geom_hline(yintercept = offset_positions,,linetype ="dotted",alpha =0.5,color ="gray10",size =0.7) +geom_text(aes(label =round(adjusted_expenses_per_dollar, 2),hjust =ifelse(adjusted_expenses_per_dollar >0, -0.2, 1.2)),color ="black",size =2.8) +# Add labels to the barsscale_fill_identity() +#scale_y_discrete(breaks = plot_data$month_label[seq(1, nrow(plot_data), by = 3)]) +scale_x_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +labs(title ="Expense Per Dollar Sold",x ="Net per dolar sold",y ="Month",caption ="Bars to the left (green) indicate profits; bars to the right (red) indicate losses." ) +theme_minimal() +theme(plot.title =element_text(hjust =0.3, size =14), # Center and style titleaxis.title.x =element_text(hjust =0.3, size =12), # Center x-axis titleaxis.text.y =element_text(size =7), # Adjust size of month labelsaxis.title.y =element_text(size =12),plot.caption =element_text(hjust =0))
Product Graph
Code
# Data processing and filteringexpenses_details <- expenses_data %>%rename(month = mes) %>%group_by(clasificacion, año, month, producto) %>%summarise(total_amount =sum(cantidad)) %>%mutate(month_num =match(tolower(month), tolower(month.abb)),mes =month(month_num, label =TRUE, abbr =TRUE),fecha =make_date(year = año, month = month_num, day =days_in_month(month_num)) )# Create the ggplotexpense_plot <- expenses_details %>%ggplot(aes(x = fecha, y = total_amount, color = producto)) +geom_line() +scale_y_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +labs(title ="Monthly Expenses by Product Category",subtitle ="Comparison of key expense categories over time",x ="Date",y ="Total Amount (MXN)",color ="Product Category") +theme_minimal(base_size =14) +theme(plot.title =element_text(face ="bold", hjust =0.5),plot.subtitle =element_text(face ="italic", hjust =0.5),axis.text.x =element_text(angle =45, hjust =1),legend.position ="bottom" )# Convert the ggplot2 plot to an interactive plotly graphinteractive_plot <-ggplotly(expense_plot)# Display the interactive plotinteractive_plot
5. Análisis de las ventas
5.1 Gráficas: Ventas a lo largo del tiempo
Code
# Calculate total sales and add necessary columnstotal_sales <- sales_data_long %>%group_by(fecha) %>%summarize(cantidad =sum(cantidad, na.rm =TRUE)) %>%mutate(clasificacion ="Total",source ="Ventas", año =year(fecha), mes =month(fecha, label =TRUE, abbr =TRUE), quarter =quarter(fecha))# Combine the total sales with the filtered datasales_data_plot <-bind_rows(sales_data_long, total_sales) %>%filter(!(año ==2020& mes %in%c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug"))) %>%filter(!(año ==2024& mes %in%c("Sep", "Oct", "Nov", "Dec"))) # Choose a color palettepalette <-brewer.pal(n =4, name ="Set1") p <-ggplot(sales_data_plot, aes(x = fecha, y = cantidad, color = clasificacion, group = clasificacion)) +geom_line(data = . %>%filter(clasificacion =="Total"), size =0.8) +geom_line(data = . %>%filter(clasificacion !="Total"), size =0.5, alpha =0.5) +scale_color_manual(values = palette) +# Apply the chosen color palettelabs(title ="Sales Over Time",x =" ",y ="Sales (MXN)",color ="Sales Type") +scale_y_continuous(labels = scales::dollar_format(prefix ="$")) +theme_minimal() # Convert the ggplot2 plot to an interactive plotly graphggplotly(p)
5.2 Tendencias de ventas por plataforma
Code
sales_data_plot <- sales_data_plot %>%group_by(clasificacion) %>%arrange(fecha) %>%mutate(month_count =row_number()) %>%ungroup()# Create linear models for each 'clasificacion'models <- sales_data_plot %>%group_by(clasificacion) %>%do(model =lm(cantidad ~as.numeric(month_count), data = .))# Extract model coefficients and create equation stringsmodels <- models %>%mutate(slope =round(coef(model)[2], 2),intercept =round(coef(model)[1], 2),equation =paste0("y = ", intercept, " + ", slope, " * x"),y_position =seq(from =max(sales_data_plot$cantidad), to =max(sales_data_plot$cantidad) *2000, # Spread out the y positionslength.out =n()),x_position =as.Date("2021-01-01") # Set all x positions to the same date )# Choose a color palettepalette <-brewer.pal(n =4, name ="Set1") # Create ggplotp <-ggplot(sales_data_plot, aes(x = fecha, y = cantidad, color = clasificacion)) +geom_line(data = . %>%filter(clasificacion =="Total"), size =0.8) +geom_line(data = . %>%filter(clasificacion !="Total"), size =0.5, alpha =0.5) +geom_smooth(method ="lm", se = F, size =0.5) +scale_color_manual(values = palette) +geom_text(data = models,aes(x = x_position, y = y_position, label = equation, color = clasificacion), hjust =0, vjust =0, size =3.5, inherit.aes =FALSE) +scale_y_continuous(labels = scales::dollar_format(prefix ="$")) +labs(title ="Sales Trends by Platform", x ="Date", y ="Sales Quantity") +theme_minimal()# Convert to interactive plotly plotggplotly(p)
5.3 Modelos lineales y resultados
Code
# Create a sequential count column for each classificationsales_data_plot <- sales_data_plot %>%group_by(clasificacion) %>%arrange(fecha) %>%mutate(month_count =row_number()) %>%ungroup()# Create the linear model using the new count columnmodel_table_filtered <- sales_data_plot %>%group_by(clasificacion) %>%do(tidy(lm(cantidad ~ month_count, data = .))) %>%filter(term =="month_count") %>%mutate(significance =case_when( p.value <0.001~"***", p.value <0.01~"**", p.value <0.05~"*", p.value <0.1~".",TRUE~"" ) ) %>%select(Classification = clasificacion,Estimate = estimate,`Std. Error`= std.error,`t-value`= statistic,`p-value`= p.value,Significance = significance )# Print the table with stylingkable(model_table_filtered, format ="html", caption ="Modelos lineales por clasificación") %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed"), full_width = F)
Modelos lineales por clasificación
Classification
Estimate
Std. Error
t-value
p-value
Significance
Deposito
1767.54556
211.01173
8.376528
0.000000
***
Efectivo
-63.14348
22.70274
-2.781315
0.008072
**
Pagina
1525.73976
128.69518
11.855454
0.000000
***
Total
3230.14184
226.65003
14.251672
0.000000
***
Resúmen de Estadística de Ventas
Depósito: Aumento estadísticamente significativo de xxxxxx MXN por mes (p < 0.001).
Efectivo: Disminución estadísticamente significativa de xxxxxx MXN por mes (p < 0.001).
Página: Aumento estadísticamente significativo de xxxxxx MXN por mes (p < 0.001).
Total: Aumento estadísticamente significativo de XXXXXX.XX MXN por mes (p < 0.001).
5.4 Modelos futuros de venta.
Code
# Step 1: Create a new column with year and quarter combinedcombined_data <- combined_data %>%mutate(year_quarter =paste(año, quarter, sep ="-Q"))# Step 2: Summarize data by year_quarter and sourcequarterly_data <- combined_data %>%group_by(source_grouped, source, year_quarter) %>%summarize(total_amount =sum(cantidad, na.rm =TRUE)) %>%ungroup() %>%filter(year_quarter !="2024-Q3")
Creating the model
Code
# Step 3: Split year_quarter into separate columns for modelingquarterly_data <- quarterly_data %>%separate(year_quarter, into =c("año", "quarter"), sep ="-Q") %>%mutate(año =as.numeric(año), quarter =as.numeric(quarter))# Step 4: Create a model for each source typemodels <- quarterly_data %>%group_by(source) %>%do(model =lm(total_amount ~ año + quarter, data = .))# Step 5: Generate projections for the next 5 years (20 quarters)future_years <-rep(seq(max(quarterly_data$año) +1, by =1, length.out =6), each =4)future_quarters <-rep(1:4, 6)projected_data <- models %>%rowwise() %>%do(data.frame(año = future_years, quarter = future_quarters,total_amount =predict(.$model, newdata =data.frame(año = future_years, quarter = future_quarters)),source = .$source)) %>%mutate(year_quarter =paste(año, quarter, sep ="-Q"))# Combine real and projected dataquarterly_data <- quarterly_data %>%mutate(year_quarter =paste(año, quarter, sep ="-Q"), type ="Real")projected_data <- projected_data %>%mutate(type ="Projected",source_grouped =case_when( source =="Costos"~"expenses", source =="Gastos"~"expenses", source =="Ventas"~"sales"))# Reorder columns in `projected_data` to match `quarterly_data`projected_data <- projected_data[, c("source_grouped","source","total_amount", "año", "quarter","type" , "year_quarter")]plot_data <-bind_rows(quarterly_data, projected_data)
Code
x <-ggplot(plot_data, aes(x = year_quarter, # Use 'año' for the x-axis (year)y = total_amount, color = source, linetype = type, group =interaction(source, type))) +# Ensure the grouping works across yearsgeom_line(size =0.5) +geom_point(size =0.5) +scale_y_continuous(labels = scales::dollar_format()) +labs(title ="Real and Projected Quarterly Data by Source",x ="Year", # Use "Year" for the x-axis labely ="Total Amount",color ="Source",linetype ="Data Type") +theme_minimal() +theme(axis.text.x =element_text(angle =50,color ="gray80",hjust =1, size =10)) +# Adjust label size for readabilityscale_x_discrete(breaks =function(x) x[seq(1, length(x), by =1)]) # Show fewer labels (every 4th)# Display the plotx
\(\beta_0\) is the intercept (the starting value of total_amount when both año and quarter are zero).
\(\beta_1\) is the coefficient for año, representing the expected change in total_amount for each one-unit increase in año, holding quarter constant.
\(\beta_2\) is the coefficient for quarter, representing the expected change in total_amount for each one-unit increase in quarter, holding año constant.
\(\epsilon\) represents the error term, which accounts for the variability in total_amount that cannot be explained by año and quarter.
Code
# Group Costos and Gastos into 'Expenses', and leave Ventas as 'Sales'plot_data_grouped <- plot_data %>%mutate(source_grouped =case_when( source %in%c("Costos", "Gastos") ~"Expenses", source =="Ventas"~"Sales" )) %>%group_by(year_quarter, source_grouped, type) %>%summarize(total_amount =sum(total_amount, na.rm =TRUE)) %>%ungroup()x <-ggplot(plot_data_grouped, aes(x = year_quarter, y = total_amount, color = source_grouped, # Use 'source_grouped' for colorlinetype = type, group =interaction(source_grouped, type))) +# Ensure the grouping worksgeom_line(size =0.5) +geom_point(size =0.5) +scale_y_continuous(labels = scales::dollar_format()) +labs(title ="Real and Projected Quarterly Data (Expenses vs. Sales)",x ="Year",y ="Total Amount",color ="Category",linetype ="Data Type") +theme_minimal() +theme(axis.text.x =element_text(angle =50, color ="gray80",hjust =1, size =10)) +scale_x_discrete(breaks =function(x) x[seq(1, length(x), by =1)])# Display the plotx
6. Estado de Resultados
La tabla de estado de resultados general muestra un resumen financiero de Nala. Muestra las ventas, costos, y la utilidad neta. Esta tabla ayuda para ver como los ingresos y gastos afectan las ganancias finales de la empresa.
6.1 Cálculo de Utilidades
Code
# Step 1: Process and calculate the financial metricsestado_resultados <- combined_data %>%group_by(año, clasificacion) %>%summarize(total_cantidad =sum(cantidad, na.rm =TRUE)) %>%pivot_wider(names_from = clasificacion, values_from = total_cantidad, values_fill =0) %>%mutate(`Ventas en Deposito`=`Ventas en Deposito`,`Ventas en Efectivo`=`Ventas en Efectivo`,`Ventas en Pagina`=`Ventas en Pagina`,Ventas =`Ventas en Deposito`+`Ventas en Efectivo`+`Ventas en Pagina`,Costo_de_Ventas =`Costos de Ventas`,Utilidad_Bruta = Ventas - Costo_de_Ventas,Gastos_de_ventas =`Gastos de Ventas`,Gastos_administrativos =`Gastos de Admin`,Utilidad_de_Operacion = Utilidad_Bruta - Gastos_de_ventas - Gastos_administrativos,Gastos_financieros =`Gastos Financieros`,Otros_gastos =0, Utilidad_Periodo = Utilidad_de_Operacion - Gastos_financieros,Utilidad_Acumulada =0,Utilidad_Neta = Utilidad_de_Operacion)# Step 2: Calculate Utilidad Acumulada and Utilidad Netafor (i in2:nrow(estado_resultados)) { estado_resultados$Utilidad_Acumulada[i] <- estado_resultados$Utilidad_Neta[i -1] estado_resultados$Utilidad_Neta[i] <- estado_resultados$Utilidad_Periodo[i] + estado_resultados$Utilidad_Acumulada[i]}# Step 3: Format numbers as currencyestado_resultados <- estado_resultados %>%mutate(across(where(is.numeric), ~paste0("$ ", formatC(round(.), format ="f", big.mark =",", digits =0))))# Step 4: Transpose the table and reorder the rowsdesired_order <-c("Ventas en Deposito","Ventas en Efectivo","Ventas en Pagina","Ventas", "Costo_de_Ventas", "Utilidad_Bruta", "Gastos_de_ventas", "Gastos_administrativos", "Utilidad_de_Operacion", "Gastos_financieros", "Otros_gastos", "Utilidad_Periodo", "Utilidad_Acumulada", "Utilidad_Neta")estado_resultados <- estado_resultados %>%pivot_longer(-año, names_to ="Metrica", values_to ="Value") %>%mutate(Metrica =factor(Metrica, levels = desired_order)) %>%arrange(Metrica) %>%pivot_wider(names_from = año, values_from = Value) %>%na.omit() %>%mutate(across(where(is.numeric), scales::dollar_format(prefix ="$")))
La tabla de márgenes presenta los márgenes financieros como porcentaje de las ventas. Estos márgenes ayudan a evaluar la eficiencia y rentabilidad de Nala, mostrando cómo los costos y gastos impactan las ganancias.
Code
# Convert all monetary values to numericestado_resultados <- estado_resultados %>%mutate(across(`2021`:`2024`, ~as.numeric(str_remove_all(.x, "\\$|,"))))# Now, calculate the percentages using the numeric valuesventas <- estado_resultados %>%filter(Metrica =="Ventas") %>%select(-Metrica)# Function to calculate percentages for each row based on 'Ventas'calculate_percentages <-function(row, ventas) {if (row["Metrica"] =="Ventas") {# Set Ventas percentage to 100 for display purposes percentages <-rep(100, length(row) -1) } else {# Calculate percentages percentages <-as.numeric(row[-1]) / ventas *100 percentages <-round(percentages, 1) # Round to one decimal }# Return the row with percentages formatted with a percentage sign percentages <-paste0(percentages, "%")return(c(row[1], percentages))}# Apply the function to each rowpercentage_results <-t(apply(estado_resultados, 1, calculate_percentages, ventas =as.numeric(ventas)))# Convert the results back to a dataframe and update column namespercentage_results_df <-as.data.frame(percentage_results)colnames(percentage_results_df) <-colnames(estado_resultados)# Print the percentage results dataframe with kablekable(percentage_results_df, format ="html", caption ="Margen de Resultados") %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed"), full_width = F) %>%row_spec(c(6, 9, 12), background ="lightyellow", bold =TRUE)
Margen de Resultados
Metrica
2021
2022
2023
2024
Ventas en Deposito
41%
65.5%
56%
50%
Ventas en Efectivo
18.4%
6.3%
2.4%
2.2%
Ventas en Pagina
40.6%
28.2%
41.6%
47.8%
Ventas
100%
100%
100%
100%
Costo_de_Ventas
64.4%
58.2%
56.3%
50.9%
Utilidad_Bruta
35.6%
41.8%
43.7%
49.1%
Gastos_de_ventas
3.7%
4.7%
8%
10.1%
Gastos_administrativos
6.8%
41.7%
32.3%
38%
Utilidad_de_Operacion
25.1%
-4.5%
3.5%
1%
Gastos_financieros
0%
0%
0%
5.7%
Otros_gastos
0%
0%
0%
0%
Utilidad_Periodo
25.1%
-4.5%
3.5%
-4.7%
Utilidad_Acumulada
0%
9.3%
2.7%
5%
Utilidad_Neta
25.1%
4.8%
6.2%
0.2%
6.3 Crecimiento de Nala porcentual
Code
# Calculate year-over-year growth for all relevant metricsventas_growth <- estado_resultados %>%filter(Metrica =="Ventas") %>%mutate(Growth_2022 = scales::percent((`2022`-`2021`) /`2021`),Growth_2023 = scales::percent((`2023`-`2022`) /`2022`),Growth_2024 = scales::percent((`2024`-`2023`) /`2023`) )# Cost of Goods Sold (Costo_de_Ventas)cost_growth <- estado_resultados %>%filter(Metrica =="Costo_de_Ventas") %>%mutate(Growth_2022 = scales::percent((`2022`-`2021`) /`2021`),Growth_2023 = scales::percent((`2023`-`2022`) /`2022`),Growth_2024 = scales::percent((`2024`-`2023`) /`2023`) )# Expenses (Gastos_de_ventas and Gastos_administrativos)expenses_growth <- estado_resultados %>%filter(Metrica %in%c("Gastos_de_ventas", "Gastos_administrativos")) %>%mutate(Growth_2022 = scales::percent((`2022`-`2021`) /`2021`),Growth_2023 = scales::percent((`2023`-`2022`) /`2022`),Growth_2024 = scales::percent((`2024`-`2023`) /`2023`) )# Utility (Utilidad_Bruta, Utilidad_de_Operacion, Utilidad_Neta)utility_growth <- estado_resultados %>%filter(Metrica %in%c("Utilidad_Bruta", "Utilidad_de_Operacion", "Utilidad_Neta")) %>%mutate(Growth_2022 = scales::percent((`2022`-`2021`) /`2021`),Growth_2023 = scales::percent((`2023`-`2022`) /`2022`),Growth_2024 = scales::percent((`2024`-`2023`) /`2023`) )# Combine all into a single data framecombined_growth <-bind_rows(ventas_growth, cost_growth, expenses_growth, utility_growth) %>%select(Metrica, Growth_2022, Growth_2023, Growth_2024)# Display the combined growth tablecombined_growth %>%kable(format ="html", caption ="Year-over-Year Growth for Costs, Expenses, and Utility Metrics (as Percentage)", align ='c') %>%kable_styling(bootstrap_options =c("hover", "condensed"), full_width = F)
Year-over-Year Growth for Costs, Expenses, and Utility Metrics (as Percentage)
Metrica
Growth_2022
Growth_2023
Growth_2024
Ventas
169%
76%
25%
Costo_de_Ventas
143%
71%
13%
Gastos_de_ventas
241%
203%
57%
Gastos_administrativos
1 545%
37%
47%
Utilidad_Bruta
217%
84%
40%
Utilidad_de_Operacion
-148%
-235%
-65%
Utilidad_Neta
-48%
127%
-96%
7.0 Next Steps
Organizar una session en enero 2025, o diciembre 2024
Empezar el Doc para temas de impacto
Empezar a organizar y pensar en emisiones
Source Code
---title: "Financial Data Analysis of Nala"author: "Javier Patrón"date: "`r Sys.Date()`"format: html: toc: true toc_depth: 2 code-fold: true code-tools: trueexecute: warning: false message: false---# Análisis Financiero de NalaEste documento proporciona un análisis de los datos financieros de la empresa Nala.## Índice1. **Cargar bibliotecas necesarias**2. **Leer los datos de gastos y ventas de NALA 2020-2024**3. **Combinar los datos**4. **Análisis financiero** 4.1 Ventas totales vs. gastos<!-- -->5. **Análisis de las ventas** 5.1 Gráficas: Ventas a lo largo del tiempo 5.2 Tendencias de ventas por plataforma 5.3 Modelos lineales y resultados6. **Estado de Resultados** 6.1 Cálculo de Utilidades 6.2 Cálculo de márgenes y porcentajes## 1. Cargar bibliotecas```{r}library(tidyverse) # For data manipulation and visualization; includes readr, dplyr, ggplot2library(janitor) # For cleaning data and making tabular outputs cleanerlibrary(scales) # For scaling graphical outputslibrary(RColorBrewer) # For using color palettes in visualizationslibrary(plotly) # For interactive web-based graphicslibrary(stringr) # For string manipulation (also part of tidyverse but listed for clarity)library(knitr) # For dynamic report generation in Rlibrary(kableExtra) # For enhancing 'kable' tables in R Markdownlibrary(broom) # For converting statistical analysis objects into tidy data frameslibrary(DT) # For creating interactive tables in R```## 2. Cargar los datos#### 2.1 Leer datos de gastos```{r}# Step 1: Read and clean the raw data, and rename it to raw_expense_dataexpenses_file_path <-"/Users/javier.patron/Downloads/pagos Nala - Expenses Data Analisis (8).csv"raw_expense_data <-read_csv(expenses_file_path) %>%clean_names() %>%# Set the first row as column names and remove the first row { setNames(.[-1,], as.character(.[1,])) } %>%clean_names() %>%select_if(~!all(is.na(.)))# Step 2: Create three separate data frames for each set of columnsdf1 <- raw_expense_data %>%select(clasificacion, fecha, empresa, cantidad, producto)df2 <- raw_expense_data %>%select(clasificacion = clasificacion_2, fecha = fecha_2, empresa = empresa_2, cantidad = cantidad_2, producto = producto_2)df3 <- raw_expense_data %>%select(clasificacion = clasificacion_3, fecha = fecha_3, empresa = empresa_3, cantidad = cantidad_3, producto = producto_3)df4 <- raw_expense_data %>%select(clasificacion = clasificacion_4, fecha = fecha_4, empresa = empresa_4, cantidad = cantidad_4, producto = producto_4)# Step 3: Combine all three data frames and name it as expenses_dataexpenses_data <-bind_rows(df1, df2, df3, df4) %>%# Remove rows where all columns are NA (if any)filter(!if_all(everything(), is.na))# Clean the dataexpenses_data <- expenses_data %>%mutate(fecha = lubridate::dmy(fecha), año = lubridate::year(fecha),mes = lubridate::month(fecha, label =TRUE),quarter = lubridate::quarter(fecha),source =case_when(str_detect(clasificacion, "Gastos") ~"Gastos",str_detect(clasificacion, "Costos") ~"Costos",TRUE~"other"# This handles any other cases, if they exist )) %>%mutate(cantidad =gsub("\\$", "", cantidad), cantidad =gsub(",", "", cantidad), cantidad =as.numeric(cantidad)) %>%select("source", "clasificacion", "fecha", "año", "mes", "quarter", "empresa", "cantidad", "producto") %>%filter(!str_detect(año, "2025"))```#### 2.2 Leer datos de ventas```{r}# Read the CSV filesales_file_path <-"/Users/javier.patron/Downloads/pagos Nala - Sales Data Analisis (2).csv"sales_data <-read_csv(sales_file_path) %>%clean_names()# Clean and Organize the datasales_data_long <- sales_data %>%pivot_longer(cols = jan:dec, names_to ="month", values_to ="cantidad") %>%rename(año = ano, tipo_venta = tipo) %>%mutate(month =match(tolower(month), tolower(month.abb)), # Convert month abbreviation to numbermes =month(month, label =TRUE, abbr =TRUE), # Convert to lubridate month formatsource ="Ventas",fecha =make_date(year = año, month =month(month), day =days_in_month(month(month, label =FALSE))), # Add last day of the monthclasificacion = tipo_venta,quarter =quarter(month),cantidad =as.numeric(gsub("[$,]", "", cantidad))) %>%select("source", "clasificacion", "fecha", "año", "mes", "quarter" , "cantidad")#sales_data_long <- sales_data_long %>% # arrange(fecha) %>% # slice(1:169)```## 3. Combinar los datos```{r}combined_data <-full_join( expenses_data %>%select(-empresa, -producto), sales_data_long) %>%mutate(clasificacion =case_when( clasificacion =="Deposito"~"Ventas en Deposito", clasificacion =="Efectivo"~"Ventas en Efectivo", clasificacion =="Pagina"~"Ventas en Pagina",TRUE~ clasificacion))combined_data <-full_join( expenses_data %>%select(-empresa, -producto), sales_data_long) %>%mutate(clasificacion =case_when( clasificacion =="Gastos Admin"~"Gastos de Admin", clasificacion =="Gastos Ventas"~"Gastos de Ventas", clasificacion =="Costos Ventas"~"Costos de Ventas", clasificacion =="Gastos Financieros"~"Gastos Financieros", clasificacion =="Deposito"~"Ventas en Deposito", clasificacion =="Efectivo"~"Ventas en Efectivo", clasificacion =="Pagina"~"Ventas en Pagina",TRUE~ clasificacion))```#### 3.1 Tabla de Gastos```{r, echo=FALSE, collapse = T}# Create an interactive table for expenses_data with smaller fontdatatable(expenses_data, options = list(autoWidth = TRUE, searching = TRUE), caption = 'Expenses Data') %>% formatStyle( columns = names(expenses_data), fontSize = '10px')```#### 3.2 Tabla de Ventas```{r}# Create an interactive table for expenses_data with smaller fontdatatable(sales_data, options =list(autoWidth =TRUE, searching =TRUE), caption ='Expenses Data') %>%formatStyle(columns =names(sales_data),fontSize ='10px') ```## 4. Análisis Financiero### 4.1 Ventas Totales vs. Gastos```{r}# Summarize total sales and expenses across the entire datasettotal_summary <- combined_data %>%group_by(source) %>%summarize(total_amount =sum(as.numeric(cantidad), na.rm =TRUE))# Define a custom color palette similar to Set1 or Set2custom_palette <-c("#E41A1C", "#377EB8", "#4DAF4A")# Create the ggplot objectp <-ggplot(total_summary, aes(x = source, y = total_amount, fill = source)) +geom_bar(stat ="identity") +scale_fill_manual(values = custom_palette) +# Apply the custom palettelabs(title ="Total Sales vs. Expenses (Overall History)",y ="Total Amount (MXN)",x ="") +scale_y_continuous(labels = scales::dollar_format(prefix ="$")) +theme_minimal()# Convert the ggplot2 plot to an interactive plotly graphggplotly(p)``````{r}# Define custom colors based on classificationcustom_colors <-c("Ventas en Deposito"="#6EBF74", # Green"Ventas en Efectivo"="#499F54", # Green"Ventas en Pagina"="#28703B", # Green"Costos de Ventas"="#FFDBA9", # Pink"Gastos de Admin"="#FF9F40", # Coral"Gastos de Ventas"="#FF8347", # Tomato"Gastos Financieros"="#FF5100")# Explicitly categorize each 'clasificacion' into 'sales' or 'expenses'combined_data <- combined_data %>%mutate(source_grouped =case_when( clasificacion =="Ventas en Deposito"~"sales", clasificacion =="Ventas en Efectivo"~"sales", clasificacion =="Ventas en Pagina"~"sales", clasificacion =="Costos de Ventas"~"expenses", clasificacion =="Gastos de Admin"~"expenses", clasificacion =="Gastos de Ventas"~"expenses", clasificacion =="Gastos Financieros"~"expenses",TRUE~"other"# This is just a fallback case ))# Summarize total sales and combined expenses across the entire datasettotal_summary <- combined_data %>%group_by(source_grouped, clasificacion) %>%summarize(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%mutate(total_sum =sum(total_amount))total_sums <- total_summary %>%group_by(source_grouped) %>%summarize(total_sum =sum(total_amount))# Bar graph of total sales vs. stacked combined expenses (Overall History) with custom colorsggplot(total_summary, aes(x = source_grouped,y = total_amount,fill = clasificacion)) +geom_bar(stat ="identity",position ="stack") +labs(title ="Total Sales vs. Expenses (Overall History)",y ="Total Amount (MXN)",x ="") +scale_fill_manual(values = custom_colors) +scale_y_continuous(labels =dollar_format(prefix ="$",big.mark =",")) +theme_minimal() +geom_text(aes(label = scales::dollar(total_amount, prefix ="$", big.mark =",")), position =position_stack(vjust =0.5), size =3, color ="white") +# Add specific text for each total sum from the total_sums tableannotate("text", x =1, y = total_sums$total_sum[1], label = scales::dollar(total_sums$total_sum[1],prefix ="$",big.mark =","), vjust =-0.5,size =2.5,fontface ="bold",color ="#FF7347") +annotate("text",x =2,y = total_sums$total_sum[2], label = scales::dollar(total_sums$total_sum[2],prefix ="$",big.mark =","), vjust =-0.2,size =2.5,fontface ="bold",color ="darkgreen")``````{r}# Summarize total sales and expenses per yearyearly_summary <- combined_data %>%group_by(año, source_grouped, clasificacion) %>%summarize(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%ungroup()# Bar graph of total sales vs. expenses per year with facets and annotationsggplot(yearly_summary, aes(x = source_grouped, y = total_amount, fill = clasificacion)) +geom_bar(stat ="identity", position ="stack") +labs(title ="Total Sales vs. Expenses per Year", y ="Total Amount (MXN)", x ="") +scale_fill_manual(values = custom_colors) +scale_y_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +theme_minimal() +facet_wrap(~año) ``````{r}# Assuming combined_data has a date column named "fecha"month_filter <-13# POR REVISAR CON REGINA# Filter data for the last XX months#filtered_data <- combined_data %>%# mutate(fecha = as.Date(fecha, format = "%d/%m/%Y")) %>% # Ensure "fecha" is in Date format# filter(fecha >= as.Date(Sys.Date() %m-% months(month_filter))) # Filter last XXX months# Ensure combined_data has a properly formatted "fecha" columnfiltered_data <- combined_data %>%mutate(fecha =as.Date(fecha, format ="%d/%m/%Y")) %>%# Convert to Date formatfilter(year(fecha) *12+month(fecha) >=year(Sys.Date()) *12+month(Sys.Date()) - month_filter)# Summarize total sales and expenses per year and other groupingsfiltered_summary <- filtered_data %>%mutate(año =year(fecha)) %>%# Extract year from fechagroup_by(source_grouped, clasificacion) %>%summarize(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%ungroup()filtered_sums <- filtered_summary %>%group_by(source_grouped) %>%summarize(total_sum =sum(total_amount))# Assuming filtered_data has already been created and summarized as 'yearly_summary'ggplot(filtered_summary, aes(x = source_grouped, y = total_amount, fill = clasificacion)) +geom_bar(stat ="identity", position ="stack") +labs(title =paste0("Total Sales vs. Expenses (Last ", month_filter," Months)"), y ="Total Amount (MXN)", x ="") +scale_fill_manual(values = custom_colors) +scale_y_continuous(labels = scales::dollar_format(prefix ="$", big.mark =",")) +theme_minimal() +# Adding text labels inside the barsgeom_text(aes(label = scales::dollar(total_amount, prefix ="$", big.mark =",")), position =position_stack(vjust =0.5), size =2.5, color ="white") +# Annotate specific sums on the chart (e.g., for each category)annotate("text", x =1, y = filtered_sums$total_sum[1], label = scales::dollar(filtered_sums$total_sum[1], prefix ="$", big.mark =","), vjust =-0.5, size =3, fontface ="bold", color ="#FF7347") +annotate("text", x =2, y = filtered_sums$total_sum[2], label = scales::dollar(filtered_sums$total_sum[2], prefix ="$", big.mark =","), vjust =-0.2, size =3, fontface ="bold", color ="darkgreen")```### 4.2 Análisis de Gastos#### Box Plots```{r}# Cleaned dataframe manipulationmonthly_summary <- combined_data %>%rename(month = mes) %>%group_by(source_grouped, source, año, month) %>%summarise(total_amount =sum(cantidad)) %>%mutate(month_num =match(tolower(month), tolower(month.abb)),month_abbr =month(month_num, label =TRUE, abbr =TRUE), fecha =make_date(year = año, month = month_num, day =days_in_month(month_num))) %>%arrange(año, month_num, source_grouped)``````{r}# Create a filtered monthly summary using the already-filtered data (filtered_data)filtered_monthly_summary <- filtered_data %>%rename(month = mes) %>%# Assuming 'mes' exists as the month column in the datagroup_by(source_grouped, año, month) %>%summarise(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%# Add month number and abbreviationmutate(month_num =match(tolower(month), tolower(month.abb)),month_abbr =month(month_num, label =TRUE, abbr =TRUE),fecha =make_date(year = año, month = month_num, day =days_in_month(month_num)))# Box plot graph using the filtered monthly summaryggplot(filtered_monthly_summary, aes(x = total_amount, y = source_grouped, fill = source_grouped)) +geom_boxplot() +scale_x_continuous(labels = scales::dollar_format(prefix ="$", big.mark =",")) +labs(title =paste0("Distribution of Monthly Amounts by Source Group (Last ", month_filter," Months)"),x ="Total Amount",y ="Source Group",fill ="Source Group") +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1)) # Box plot graph with vertical orientation x<-ggplot(filtered_monthly_summary, aes(x = source_grouped, y = total_amount, fill = source_grouped)) +geom_boxplot() +scale_y_continuous(labels = scales::dollar_format(prefix ="$", big.mark =",")) +labs(title =paste0("Distribution of Monthly Amounts by Source Group (Last ", month_filter, " Months)"),x ="Source Group",y ="Total Amount",fill ="Source Group") +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1)) # Optional: Rotate x-axis labels for readability```**Caja (IQR): La caja representa el rango intercuartílico. Contiene el 50% central de los datos.**Parte inferior de la caja (Q1): Este es el primer cuartil, o el percentil 25. Esto significa que el 25% de los datos están por debajo de este valor.Parte superior de la caja (Q3): Este es el tercer cuartil, o el percentil 75. Esto significa que el 75% de los datos están por debajo de este valor, y el 25% están por encima.Línea dentro de la caja: Esta línea representa la mediana.```{r}# Create a filtered monthly summary using the already-filtered data (filtered_data)filtered_monthly_summary <- filtered_data %>%rename(month = mes) %>%# Assuming 'mes' exists as the month column in the datagroup_by(source_grouped, source, año, month) %>%summarise(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%# Add month number and abbreviationmutate(month_num =match(tolower(month), tolower(month.abb)),month_abbr =month(month_num, label =TRUE, abbr =TRUE),fecha =make_date(year = año, month = month_num, day =days_in_month(month_num)))# Improved boxplot using the existing monthly_summary dataframeggplot(filtered_monthly_summary, aes(x = total_amount, y = source, fill = source)) +geom_boxplot(outlier.colour ="red", outlier.shape =16, outlier.size =2) +scale_x_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +labs(title ="Distribution of Expenses by Classification",subtitle =paste0("(Last ", month_filter," Months)"),x ="Total Monthly Sum (MXN)",y ="Classification",fill ="Classification") +theme_minimal(base_size =14) +theme(plot.title =element_text(face ="bold"),legend.position ="none") +scale_fill_brewer(palette ="Set2")``````{r}# Group and summarize monthly_summarymonthly_grouped_summary <- monthly_summary %>%group_by(source_grouped, año, month_num, month_abbr, fecha) %>%summarise(total_amount =sum(total_amount, na.rm =TRUE), .groups ="drop") %>%arrange(año, month_num, source_grouped)t <- monthly_grouped_summary %>%ggplot(aes(x = fecha, y = total_amount, fill = source_grouped, color = source_grouped)) +geom_area(alpha =0.8, position ="identity") +# Prevent stacking of areasgeom_line(size =0.2) +# Ensure the lines are visiblescale_y_continuous(labels = scales::dollar_format(prefix ="$", big.mark =",")) +scale_fill_manual(values =c("expenses"="#FC9272", "sales"="#9ECBEA")) +# Match area colorsscale_color_manual(values =c("expenses"="#DE2D26", "sales"="#3182BD")) +# Match line colorslabs(title ="Monthly Amounts Over Time by Source Group",x ="Date",y ="Total Amount (MXN)",fill ="Source Group",color ="Source Group" ) +theme_minimal()# Use ggplotly with custom tooltipsggplotly(tooltip =c("x", "y", "color"))``````{r}# Bar graph comparing sales and expenses per month, faceted by yearggplot(monthly_grouped_summary, aes(x = month_abbr, y = total_amount, fill = source_grouped)) +geom_bar(stat ="identity", position ="dodge", alpha =0.85) +scale_y_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +labs(title ="Monthly Comparison of Sales and Expenses Faceted by Year",x ="Month",y ="Total Amount",fill ="Category") +facet_wrap(~year(fecha), ncol =1) +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))```#### Tablas Resumen```{r}# Calculate the yearly totals and profityearly_summary_table <- monthly_summary %>%group_by(year =year(fecha)) %>%summarise(total_expenses =sum(total_amount[source_grouped =="expenses"]),total_sales =sum(total_amount[source_grouped =="sales"]),profit = total_sales - total_expenses) %>%mutate(total_expenses = scales::dollar(total_expenses, prefix ="$", big.mark =","),total_sales = scales::dollar(total_sales, prefix ="$", big.mark =","),profit = scales::dollar(profit, prefix ="$", big.mark =","))kable(yearly_summary_table, align ="c", caption ="Year-by-Year Financial Summary") %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed"), full_width = F)``````{r}# Calculate the monthly totals for all yearsmonthly_summary_table <- monthly_summary %>%group_by(year =year(fecha), month_abbr) %>%summarise(total_expenses =sum(total_amount[source_grouped =="expenses"]),total_sales =sum(total_amount[source_grouped =="sales"]),profit = total_sales - total_expenses ) %>%arrange(year, match(month_abbr, month.abb)) %>%mutate(total_expenses = scales::dollar(total_expenses, prefix ="$", big.mark =","),total_sales = scales::dollar(total_sales, prefix ="$", big.mark =","),profit = scales::dollar(profit, prefix ="$", big.mark =","))```#### Tabla resumen de ganancias y perdidas mes con mes```{r}# Ensure that 'profit' is numericmonthly_summary_table <- monthly_summary_table %>%mutate(profit_numeric =as.numeric(gsub("[$,]", "", profit)),profit_formatted = scales::comma(profit_numeric), # Format with commasdate =as.Date(paste(year, month_abbr, "01", sep ="-"), format ="%Y-%b-%d"))# Create the ggplot objectp <-ggplot(monthly_summary_table, aes(x = date, y = profit_numeric, fill = profit_numeric >0, text =paste("Date: ", format(date, "%b %Y"), "<br>Profit: $", profit_formatted))) +# Use formatted profitgeom_bar(stat ="identity") +scale_y_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +scale_x_date(date_labels ="%Y", date_breaks ="1 year", minor_breaks =NULL) +scale_fill_manual(values =c("TRUE"="darkgreen", "FALSE"="coral")) +labs(title ="Monthly Profits Over Time (2020-2024)",x ="Year",y ="Profit ($)" ) +theme_minimal() +theme(axis.text.x =element_text(angle =0, hjust =1, vjust =0.5),legend.position ="none")# Convert the ggplot object to an interactive plotly objectp_interactive <-ggplotly(p, tooltip ="text")# Display the interactive plotp_interactive``````{r}## SUPER IMPORTANT FACTORS# Function to remove dollar signs and commas, and convert to numericclean_numeric <-function(x) {as.numeric(gsub("[\\$,]", "", x))}yearly_summary_table <- yearly_summary_table %>%mutate(total_expenses =clean_numeric(total_expenses),total_sales =clean_numeric(total_sales),profit =clean_numeric(profit))yearly_summary_table <- yearly_summary_table %>%mutate(year =as.character(year))yearly_summary_table <- yearly_summary_table %>%mutate(expenses_per_dollar =ifelse(total_sales ==0, NA, total_expenses / total_sales))summary_row <- yearly_summary_table %>%summarise(year ="All", total_expenses =sum(total_expenses, na.rm =TRUE),total_sales =sum(total_sales, na.rm =TRUE),profit =sum(profit, na.rm =TRUE)) %>%mutate(expenses_per_dollar =ifelse(total_sales ==0, NA, total_expenses / total_sales))# Step 5: Add the summary row to the original data frameyearly_summary_table_with_total <- yearly_summary_table %>%bind_rows(summary_row)# Format the numeric columns with $ and commasformatted_table <- yearly_summary_table_with_total %>%mutate(total_expenses =dollar(total_expenses),total_sales =dollar(total_sales),profit =dollar(profit))# Simple formatted table with kableformatted_table %>%kable(format ="html", caption ="Yearly Summary Table with Total", align ="c") %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed"), full_width = F)``````{r}# Step 1: Calculate expenses_per_dollar by pivoting and performing divisionmonthly_expenses_sales <- monthly_grouped_summary %>%pivot_wider(names_from = source_grouped, values_from = total_amount) %>%mutate(expenses_per_dollar =ifelse(sales ==0, NA, expenses / sales))monthly_expenses_sales <- monthly_expenses_sales %>%arrange(año, month_num)formatted_monthly_expenses_sales <- monthly_expenses_sales %>%mutate(expenses =dollar(expenses),sales =dollar(sales),expenses_per_dollar =round(expenses_per_dollar, 2))formatted_monthly_expenses_sales %>%kable(format ="html", caption ="Monthly Expenses and Sales with Expenses Per Dollar", align ="c") %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed"), full_width = F)```## Dollar Expense Graph Ratio```{r}plot_data <- monthly_expenses_sales %>%mutate(adjusted_expenses_per_dollar = expenses_per_dollar -1, # Subtract 1bar_color =ifelse(adjusted_expenses_per_dollar >0, "darkred", "darkgreen"), # Assign colorsmonth_label =paste(month_abbr, año) # Create a combined label for months ) %>%arrange(fecha) %>%# Sort by fecha (chronological order)mutate(month_label =factor(month_label, levels =unique(month_label))) # Ensure chronological orderunique_years <- plot_data %>%distinct(año) %>%arrange(año)year_start_positions <-which(levels(fct_rev(plot_data$month_label)) %in%paste("Jan", unique_years$año))offset_positions <- year_start_positions +0.5# Shift the lines slightly upward# Step 2: Create the bar graph with reversed y-axisggplot(plot_data, aes(x = adjusted_expenses_per_dollar,y =fct_rev(month_label),fill = bar_color)) +geom_bar(stat ="identity",show.legend =FALSE,alpha =0.8) +# Horizontal barsgeom_vline(xintercept =0,linetype ="solid",color ="gray20",size =1) +geom_hline(yintercept = offset_positions,,linetype ="dotted",alpha =0.5,color ="gray10",size =0.7) +geom_text(aes(label =round(adjusted_expenses_per_dollar, 2),hjust =ifelse(adjusted_expenses_per_dollar >0, -0.2, 1.2)),color ="black",size =2.8) +# Add labels to the barsscale_fill_identity() +#scale_y_discrete(breaks = plot_data$month_label[seq(1, nrow(plot_data), by = 3)]) +scale_x_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +labs(title ="Expense Per Dollar Sold",x ="Net per dolar sold",y ="Month",caption ="Bars to the left (green) indicate profits; bars to the right (red) indicate losses." ) +theme_minimal() +theme(plot.title =element_text(hjust =0.3, size =14), # Center and style titleaxis.title.x =element_text(hjust =0.3, size =12), # Center x-axis titleaxis.text.y =element_text(size =7), # Adjust size of month labelsaxis.title.y =element_text(size =12),plot.caption =element_text(hjust =0))```## Product Graph```{r}# Data processing and filteringexpenses_details <- expenses_data %>%rename(month = mes) %>%group_by(clasificacion, año, month, producto) %>%summarise(total_amount =sum(cantidad)) %>%mutate(month_num =match(tolower(month), tolower(month.abb)),mes =month(month_num, label =TRUE, abbr =TRUE),fecha =make_date(year = año, month = month_num, day =days_in_month(month_num)) )# Create the ggplotexpense_plot <- expenses_details %>%ggplot(aes(x = fecha, y = total_amount, color = producto)) +geom_line() +scale_y_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +labs(title ="Monthly Expenses by Product Category",subtitle ="Comparison of key expense categories over time",x ="Date",y ="Total Amount (MXN)",color ="Product Category") +theme_minimal(base_size =14) +theme(plot.title =element_text(face ="bold", hjust =0.5),plot.subtitle =element_text(face ="italic", hjust =0.5),axis.text.x =element_text(angle =45, hjust =1),legend.position ="bottom" )# Convert the ggplot2 plot to an interactive plotly graphinteractive_plot <-ggplotly(expense_plot)# Display the interactive plotinteractive_plot```## 5. Análisis de las ventas5.1 Gráficas: Ventas a lo largo del tiempo```{r}# Calculate total sales and add necessary columnstotal_sales <- sales_data_long %>%group_by(fecha) %>%summarize(cantidad =sum(cantidad, na.rm =TRUE)) %>%mutate(clasificacion ="Total",source ="Ventas", año =year(fecha), mes =month(fecha, label =TRUE, abbr =TRUE), quarter =quarter(fecha))# Combine the total sales with the filtered datasales_data_plot <-bind_rows(sales_data_long, total_sales) %>%filter(!(año ==2020& mes %in%c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug"))) %>%filter(!(año ==2024& mes %in%c("Sep", "Oct", "Nov", "Dec"))) # Choose a color palettepalette <-brewer.pal(n =4, name ="Set1") p <-ggplot(sales_data_plot, aes(x = fecha, y = cantidad, color = clasificacion, group = clasificacion)) +geom_line(data = . %>%filter(clasificacion =="Total"), size =0.8) +geom_line(data = . %>%filter(clasificacion !="Total"), size =0.5, alpha =0.5) +scale_color_manual(values = palette) +# Apply the chosen color palettelabs(title ="Sales Over Time",x =" ",y ="Sales (MXN)",color ="Sales Type") +scale_y_continuous(labels = scales::dollar_format(prefix ="$")) +theme_minimal() # Convert the ggplot2 plot to an interactive plotly graphggplotly(p)```### 5.2 Tendencias de ventas por plataforma```{r}sales_data_plot <- sales_data_plot %>%group_by(clasificacion) %>%arrange(fecha) %>%mutate(month_count =row_number()) %>%ungroup()# Create linear models for each 'clasificacion'models <- sales_data_plot %>%group_by(clasificacion) %>%do(model =lm(cantidad ~as.numeric(month_count), data = .))# Extract model coefficients and create equation stringsmodels <- models %>%mutate(slope =round(coef(model)[2], 2),intercept =round(coef(model)[1], 2),equation =paste0("y = ", intercept, " + ", slope, " * x"),y_position =seq(from =max(sales_data_plot$cantidad), to =max(sales_data_plot$cantidad) *2000, # Spread out the y positionslength.out =n()),x_position =as.Date("2021-01-01") # Set all x positions to the same date )# Choose a color palettepalette <-brewer.pal(n =4, name ="Set1") # Create ggplotp <-ggplot(sales_data_plot, aes(x = fecha, y = cantidad, color = clasificacion)) +geom_line(data = . %>%filter(clasificacion =="Total"), size =0.8) +geom_line(data = . %>%filter(clasificacion !="Total"), size =0.5, alpha =0.5) +geom_smooth(method ="lm", se = F, size =0.5) +scale_color_manual(values = palette) +geom_text(data = models,aes(x = x_position, y = y_position, label = equation, color = clasificacion), hjust =0, vjust =0, size =3.5, inherit.aes =FALSE) +scale_y_continuous(labels = scales::dollar_format(prefix ="$")) +labs(title ="Sales Trends by Platform", x ="Date", y ="Sales Quantity") +theme_minimal()# Convert to interactive plotly plotggplotly(p)```### 5.3 Modelos lineales y resultados```{r}# Create a sequential count column for each classificationsales_data_plot <- sales_data_plot %>%group_by(clasificacion) %>%arrange(fecha) %>%mutate(month_count =row_number()) %>%ungroup()# Create the linear model using the new count columnmodel_table_filtered <- sales_data_plot %>%group_by(clasificacion) %>%do(tidy(lm(cantidad ~ month_count, data = .))) %>%filter(term =="month_count") %>%mutate(significance =case_when( p.value <0.001~"***", p.value <0.01~"**", p.value <0.05~"*", p.value <0.1~".",TRUE~"" ) ) %>%select(Classification = clasificacion,Estimate = estimate,`Std. Error`= std.error,`t-value`= statistic,`p-value`= p.value,Significance = significance )# Print the table with stylingkable(model_table_filtered, format ="html", caption ="Modelos lineales por clasificación") %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed"), full_width = F)```### Resúmen de Estadística de Ventas- **Depósito**: Aumento estadísticamente significativo de xxxxxx MXN por mes (p \< 0.001).- **Efectivo**: Disminución estadísticamente significativa de xxxxxx MXN por mes (p \< 0.001).- **Página**: Aumento estadísticamente significativo de xxxxxx MXN por mes (p \< 0.001).**Total: Aumento estadísticamente significativo de XXXXXX.XX MXN por mes (p \< 0.001).**### 5.4 Modelos futuros de venta.```{r}# Step 1: Create a new column with year and quarter combinedcombined_data <- combined_data %>%mutate(year_quarter =paste(año, quarter, sep ="-Q"))# Step 2: Summarize data by year_quarter and sourcequarterly_data <- combined_data %>%group_by(source_grouped, source, year_quarter) %>%summarize(total_amount =sum(cantidad, na.rm =TRUE)) %>%ungroup() %>%filter(year_quarter !="2024-Q3")```Creating the model```{r}# Step 3: Split year_quarter into separate columns for modelingquarterly_data <- quarterly_data %>%separate(year_quarter, into =c("año", "quarter"), sep ="-Q") %>%mutate(año =as.numeric(año), quarter =as.numeric(quarter))# Step 4: Create a model for each source typemodels <- quarterly_data %>%group_by(source) %>%do(model =lm(total_amount ~ año + quarter, data = .))# Step 5: Generate projections for the next 5 years (20 quarters)future_years <-rep(seq(max(quarterly_data$año) +1, by =1, length.out =6), each =4)future_quarters <-rep(1:4, 6)projected_data <- models %>%rowwise() %>%do(data.frame(año = future_years, quarter = future_quarters,total_amount =predict(.$model, newdata =data.frame(año = future_years, quarter = future_quarters)),source = .$source)) %>%mutate(year_quarter =paste(año, quarter, sep ="-Q"))# Combine real and projected dataquarterly_data <- quarterly_data %>%mutate(year_quarter =paste(año, quarter, sep ="-Q"), type ="Real")projected_data <- projected_data %>%mutate(type ="Projected",source_grouped =case_when( source =="Costos"~"expenses", source =="Gastos"~"expenses", source =="Ventas"~"sales"))# Reorder columns in `projected_data` to match `quarterly_data`projected_data <- projected_data[, c("source_grouped","source","total_amount", "año", "quarter","type" , "year_quarter")]plot_data <-bind_rows(quarterly_data, projected_data)``````{r}x <-ggplot(plot_data, aes(x = year_quarter, # Use 'año' for the x-axis (year)y = total_amount, color = source, linetype = type, group =interaction(source, type))) +# Ensure the grouping works across yearsgeom_line(size =0.5) +geom_point(size =0.5) +scale_y_continuous(labels = scales::dollar_format()) +labs(title ="Real and Projected Quarterly Data by Source",x ="Year", # Use "Year" for the x-axis labely ="Total Amount",color ="Source",linetype ="Data Type") +theme_minimal() +theme(axis.text.x =element_text(angle =50,color ="gray80",hjust =1, size =10)) +# Adjust label size for readabilityscale_x_discrete(breaks =function(x) x[seq(1, length(x), by =1)]) # Show fewer labels (every 4th)# Display the plotx```$$\text{total_amount} = \beta_0 + \beta_1 \times \text{año} + \beta_2 \times \text{quarter} + \epsilon$$Here:- $\beta_0$ is the intercept (the starting value of `total_amount` when both `año` and `quarter` are zero).- $\beta_1$ is the coefficient for `año`, representing the expected change in `total_amount` for each one-unit increase in `año`, holding `quarter` constant.- $\beta_2$ is the coefficient for `quarter`, representing the expected change in `total_amount` for each one-unit increase in `quarter`, holding `año` constant.- $\epsilon$ represents the error term, which accounts for the variability in `total_amount` that cannot be explained by `año` and `quarter`.```{r}# Group Costos and Gastos into 'Expenses', and leave Ventas as 'Sales'plot_data_grouped <- plot_data %>%mutate(source_grouped =case_when( source %in%c("Costos", "Gastos") ~"Expenses", source =="Ventas"~"Sales" )) %>%group_by(year_quarter, source_grouped, type) %>%summarize(total_amount =sum(total_amount, na.rm =TRUE)) %>%ungroup()x <-ggplot(plot_data_grouped, aes(x = year_quarter, y = total_amount, color = source_grouped, # Use 'source_grouped' for colorlinetype = type, group =interaction(source_grouped, type))) +# Ensure the grouping worksgeom_line(size =0.5) +geom_point(size =0.5) +scale_y_continuous(labels = scales::dollar_format()) +labs(title ="Real and Projected Quarterly Data (Expenses vs. Sales)",x ="Year",y ="Total Amount",color ="Category",linetype ="Data Type") +theme_minimal() +theme(axis.text.x =element_text(angle =50, color ="gray80",hjust =1, size =10)) +scale_x_discrete(breaks =function(x) x[seq(1, length(x), by =1)])# Display the plotx```## 6. Estado de ResultadosLa tabla de estado de resultados general muestra un resumen financiero de Nala. Muestra las ventas, costos, y la utilidad neta. Esta tabla ayuda para ver como los ingresos y gastos afectan las ganancias finales de la empresa.#### 6.1 Cálculo de Utilidades```{r}# Step 1: Process and calculate the financial metricsestado_resultados <- combined_data %>%group_by(año, clasificacion) %>%summarize(total_cantidad =sum(cantidad, na.rm =TRUE)) %>%pivot_wider(names_from = clasificacion, values_from = total_cantidad, values_fill =0) %>%mutate(`Ventas en Deposito`=`Ventas en Deposito`,`Ventas en Efectivo`=`Ventas en Efectivo`,`Ventas en Pagina`=`Ventas en Pagina`,Ventas =`Ventas en Deposito`+`Ventas en Efectivo`+`Ventas en Pagina`,Costo_de_Ventas =`Costos de Ventas`,Utilidad_Bruta = Ventas - Costo_de_Ventas,Gastos_de_ventas =`Gastos de Ventas`,Gastos_administrativos =`Gastos de Admin`,Utilidad_de_Operacion = Utilidad_Bruta - Gastos_de_ventas - Gastos_administrativos,Gastos_financieros =`Gastos Financieros`,Otros_gastos =0, Utilidad_Periodo = Utilidad_de_Operacion - Gastos_financieros,Utilidad_Acumulada =0,Utilidad_Neta = Utilidad_de_Operacion)# Step 2: Calculate Utilidad Acumulada and Utilidad Netafor (i in2:nrow(estado_resultados)) { estado_resultados$Utilidad_Acumulada[i] <- estado_resultados$Utilidad_Neta[i -1] estado_resultados$Utilidad_Neta[i] <- estado_resultados$Utilidad_Periodo[i] + estado_resultados$Utilidad_Acumulada[i]}# Step 3: Format numbers as currencyestado_resultados <- estado_resultados %>%mutate(across(where(is.numeric), ~paste0("$ ", formatC(round(.), format ="f", big.mark =",", digits =0))))# Step 4: Transpose the table and reorder the rowsdesired_order <-c("Ventas en Deposito","Ventas en Efectivo","Ventas en Pagina","Ventas", "Costo_de_Ventas", "Utilidad_Bruta", "Gastos_de_ventas", "Gastos_administrativos", "Utilidad_de_Operacion", "Gastos_financieros", "Otros_gastos", "Utilidad_Periodo", "Utilidad_Acumulada", "Utilidad_Neta")estado_resultados <- estado_resultados %>%pivot_longer(-año, names_to ="Metrica", values_to ="Value") %>%mutate(Metrica =factor(Metrica, levels = desired_order)) %>%arrange(Metrica) %>%pivot_wider(names_from = año, values_from = Value) %>%na.omit() %>%mutate(across(where(is.numeric), scales::dollar_format(prefix ="$")))``````{r}estado_resultados %>%kable(format ="html", caption ="Estado de Resultados") %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed"), full_width = F) %>%row_spec(c(4), bold =TRUE) %>%row_spec(c(4), background ="#E6F9E6", bold =TRUE) %>%row_spec(c(6, 9, 12), background ="lightyellow", bold =TRUE)```#### 6.2 Cálculo de márgenes y porcentajesLa tabla de márgenes presenta los márgenes financieros como porcentaje de las ventas. Estos márgenes ayudan a evaluar la eficiencia y rentabilidad de Nala, mostrando cómo los costos y gastos impactan las ganancias.```{r}# Convert all monetary values to numericestado_resultados <- estado_resultados %>%mutate(across(`2021`:`2024`, ~as.numeric(str_remove_all(.x, "\\$|,"))))# Now, calculate the percentages using the numeric valuesventas <- estado_resultados %>%filter(Metrica =="Ventas") %>%select(-Metrica)# Function to calculate percentages for each row based on 'Ventas'calculate_percentages <-function(row, ventas) {if (row["Metrica"] =="Ventas") {# Set Ventas percentage to 100 for display purposes percentages <-rep(100, length(row) -1) } else {# Calculate percentages percentages <-as.numeric(row[-1]) / ventas *100 percentages <-round(percentages, 1) # Round to one decimal }# Return the row with percentages formatted with a percentage sign percentages <-paste0(percentages, "%")return(c(row[1], percentages))}# Apply the function to each rowpercentage_results <-t(apply(estado_resultados, 1, calculate_percentages, ventas =as.numeric(ventas)))# Convert the results back to a dataframe and update column namespercentage_results_df <-as.data.frame(percentage_results)colnames(percentage_results_df) <-colnames(estado_resultados)# Print the percentage results dataframe with kablekable(percentage_results_df, format ="html", caption ="Margen de Resultados") %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed"), full_width = F) %>%row_spec(c(6, 9, 12), background ="lightyellow", bold =TRUE)```#### 6.3 Crecimiento de Nala porcentual```{r}# Calculate year-over-year growth for all relevant metricsventas_growth <- estado_resultados %>%filter(Metrica =="Ventas") %>%mutate(Growth_2022 = scales::percent((`2022`-`2021`) /`2021`),Growth_2023 = scales::percent((`2023`-`2022`) /`2022`),Growth_2024 = scales::percent((`2024`-`2023`) /`2023`) )# Cost of Goods Sold (Costo_de_Ventas)cost_growth <- estado_resultados %>%filter(Metrica =="Costo_de_Ventas") %>%mutate(Growth_2022 = scales::percent((`2022`-`2021`) /`2021`),Growth_2023 = scales::percent((`2023`-`2022`) /`2022`),Growth_2024 = scales::percent((`2024`-`2023`) /`2023`) )# Expenses (Gastos_de_ventas and Gastos_administrativos)expenses_growth <- estado_resultados %>%filter(Metrica %in%c("Gastos_de_ventas", "Gastos_administrativos")) %>%mutate(Growth_2022 = scales::percent((`2022`-`2021`) /`2021`),Growth_2023 = scales::percent((`2023`-`2022`) /`2022`),Growth_2024 = scales::percent((`2024`-`2023`) /`2023`) )# Utility (Utilidad_Bruta, Utilidad_de_Operacion, Utilidad_Neta)utility_growth <- estado_resultados %>%filter(Metrica %in%c("Utilidad_Bruta", "Utilidad_de_Operacion", "Utilidad_Neta")) %>%mutate(Growth_2022 = scales::percent((`2022`-`2021`) /`2021`),Growth_2023 = scales::percent((`2023`-`2022`) /`2022`),Growth_2024 = scales::percent((`2024`-`2023`) /`2023`) )# Combine all into a single data framecombined_growth <-bind_rows(ventas_growth, cost_growth, expenses_growth, utility_growth) %>%select(Metrica, Growth_2022, Growth_2023, Growth_2024)# Display the combined growth tablecombined_growth %>%kable(format ="html", caption ="Year-over-Year Growth for Costs, Expenses, and Utility Metrics (as Percentage)", align ='c') %>%kable_styling(bootstrap_options =c("hover", "condensed"), full_width = F)```#### 7.0 Next Steps1. Organizar una session en enero 2025, o diciembre 20242. Empezar el Doc para temas de impacto3. Empezar a organizar y pensar en emisiones