Este documento proporciona un análisis de los datos financieros de la empresa Nala.
Índice
Cargar bibliotecas necesarias
Leer los datos de gastos y ventas de NALA 2020-2024
Combinar los datos
Análisis financiero
4.1-4.3 Ventas totales vs. gastos
4.4 Análisis de variación mensual
4.5 Razón de Gastos
Análisis de las ventas
5.1 Gráficas: Ventas a lo largo del tiempo
5.2 Tendencias de ventas por plataforma
5.3 Modelos lineales y resultados
Estado de Resultados
6.1 Cálculo de utilidades
6.2 Cálculo de márgenes y porcentajes
1. Cargar Bibliotecas
Code
library(tidyverse) # For data manipulation and visualization; includes readr, dplyr, ggplot2library(janitor) # For cleaning data and making tabular outputs cleanerlibrary(scales) # For scaling graphical outputslibrary(RColorBrewer) # For using color palettes in visualizationslibrary(plotly) # For interactive web-based graphicslibrary(stringr) # For string manipulation (also part of tidyverse but listed for clarity)library(knitr) # For dynamic report generation in Rlibrary(kableExtra) # For enhancing 'kable' tables in R Markdownlibrary(broom) # For converting statistical analysis objects into tidy data frameslibrary(DT) # For creating interactive tables in R
2. Leer Datos
2.1 Leer datos de gastos
Code
# Step 1: Read and clean the raw data, and rename it to raw_expense_dataexpenses_file_path <-"/Users/javier.patron/Downloads/pagos Nala - Expenses Data Analisis (8).csv"raw_expense_data <-read_csv(expenses_file_path) %>%clean_names() %>%# Set the first row as column names and remove the first row { setNames(.[-1,], as.character(.[1,])) } %>%clean_names() %>%select_if(~!all(is.na(.)))# Step 2: Create three separate data frames for each set of columnsdf1 <- raw_expense_data %>%select(clasificacion, fecha, empresa, cantidad, producto)df2 <- raw_expense_data %>%select(clasificacion = clasificacion_2, fecha = fecha_2, empresa = empresa_2, cantidad = cantidad_2, producto = producto_2)df3 <- raw_expense_data %>%select(clasificacion = clasificacion_3, fecha = fecha_3, empresa = empresa_3, cantidad = cantidad_3, producto = producto_3)df4 <- raw_expense_data %>%select(clasificacion = clasificacion_4, fecha = fecha_4, empresa = empresa_4, cantidad = cantidad_4, producto = producto_4)# Step 3: Combine all three data frames and name it as expenses_dataexpenses_data <-bind_rows(df1, df2, df3, df4) %>%# Remove rows where all columns are NA (if any)filter(!if_all(everything(), is.na))# Clean the dataexpenses_data <- expenses_data %>%mutate(fecha = lubridate::dmy(fecha), año = lubridate::year(fecha),mes = lubridate::month(fecha, label =TRUE),quarter = lubridate::quarter(fecha),source =case_when(str_detect(clasificacion, "Gastos") ~"Gastos",str_detect(clasificacion, "Costos") ~"Costos",TRUE~"other"# This handles any other cases, if they exist )) %>%mutate(cantidad =gsub("\\$", "", cantidad), cantidad =gsub(",", "", cantidad), cantidad =as.numeric(cantidad)) %>%select("source", "clasificacion", "fecha", "año", "mes", "quarter", "empresa", "cantidad", "producto") %>%filter(!str_detect(año, "2025"))
2.2 Leer datos de ventas
Code
# Read the CSV filesales_file_path <-"/Users/javier.patron/Downloads/pagos Nala - Sales Data Analisis (2).csv"sales_data <-read_csv(sales_file_path) %>%clean_names()# Clean and Organize the datasales_data_long <- sales_data %>%pivot_longer(cols = jan:dec, names_to ="month", values_to ="cantidad") %>%rename(año = ano, tipo_venta = tipo) %>%mutate(month =match(tolower(month), tolower(month.abb)), # Convert month abbreviation to numbermes =month(month, label =TRUE, abbr =TRUE), # Convert to lubridate month formatsource ="Ventas",fecha =make_date(year = año, month =month(month), day =days_in_month(month(month, label =FALSE))), # Add last day of the monthclasificacion = tipo_venta,quarter =quarter(month),cantidad =as.numeric(gsub("[$,]", "", cantidad))) %>%select("source", "clasificacion", "fecha", "año", "mes", "quarter" , "cantidad")#sales_data_long <- sales_data_long %>% # arrange(fecha) %>% # slice(1:169)
3. Combinar los datos
La tabla 2 y 3 están ocultas por temas internos de Nala.
Code
combined_data <-full_join( expenses_data %>%select(-empresa, -producto), sales_data_long) %>%mutate(clasificacion =case_when( clasificacion =="Deposito"~"Ventas en Deposito", clasificacion =="Efectivo"~"Ventas en Efectivo", clasificacion =="Pagina"~"Ventas en Pagina",TRUE~ clasificacion))combined_data <-full_join( expenses_data %>%select(-empresa, -producto), sales_data_long) %>%mutate(clasificacion =case_when( clasificacion =="Gastos Admin"~"Gastos de Admin", clasificacion =="Gastos Ventas"~"Gastos de Ventas", clasificacion =="Costos Ventas"~"Costos de Ventas", clasificacion =="Gastos Financieros"~"Gastos Financieros", clasificacion =="Deposito"~"Ventas en Deposito", clasificacion =="Efectivo"~"Ventas en Efectivo", clasificacion =="Pagina"~"Ventas en Pagina",TRUE~ clasificacion))
Tabla 1: Gastos
Tabla 2: Ventas
4. Análisis Histórico
4.1 Gráfico 1: Ventas totales vs. gastos
Code
# Define custom colors based on classificationcustom_colors <-c("Ventas en Deposito"="#6EBF74", # Green"Ventas en Efectivo"="#499F54", # Green"Ventas en Pagina"="#28703B", # Green"Costos de Ventas"="#FFDBA9", # Pink"Gastos de Admin"="#FF9F40", # Coral"Gastos de Ventas"="#FF8347", # Tomato"Gastos Financieros"="#FF5100")# Explicitly categorize each 'clasificacion' into 'sales' or 'expenses'combined_data <- combined_data %>%mutate(source_grouped =case_when( clasificacion =="Ventas en Deposito"~"sales", clasificacion =="Ventas en Efectivo"~"sales", clasificacion =="Ventas en Pagina"~"sales", clasificacion =="Costos de Ventas"~"expenses", clasificacion =="Gastos de Admin"~"expenses", clasificacion =="Gastos de Ventas"~"expenses", clasificacion =="Gastos Financieros"~"expenses",TRUE~"other"# This is just a fallback case ))# Summarize total sales and combined expenses across the entire datasettotal_summary <- combined_data %>%group_by(source_grouped, clasificacion) %>%summarize(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%mutate(total_sum =sum(total_amount))total_sums <- total_summary %>%group_by(source_grouped) %>%summarize(total_sum =sum(total_amount))# Bar graph of total sales vs. stacked combined expenses (Overall History) with custom colorsggplot(total_summary, aes(x = source_grouped,y = total_amount,fill = clasificacion)) +geom_bar(stat ="identity",position ="stack") +labs(title ="Graph 1: Total Sales vs. Expenses (Overall History)",y ="Total Amount (MXN)",x ="") +scale_fill_manual(values = custom_colors) +scale_y_continuous(labels =dollar_format(prefix ="$",big.mark =",")) +theme_minimal() +geom_text(aes(label = scales::dollar(total_amount, prefix ="$", big.mark =",")), position =position_stack(vjust =0.5), size =3, color ="white") +# Add specific text for each total sum from the total_sums tableannotate("text", x =1, y = total_sums$total_sum[1], label = scales::dollar(total_sums$total_sum[1],prefix ="$",big.mark =","), vjust =-0.5,size =2.5,fontface ="bold",color ="#FF7347") +annotate("text",x =2,y = total_sums$total_sum[2], label = scales::dollar(total_sums$total_sum[2],prefix ="$",big.mark =","), vjust =-0.2,size =2.5,fontface ="bold",color ="darkgreen")
4.2 Gráfico 2: Ventas totales vs. gastos por año
Code
# Summarize total sales and expenses per yearyearly_summary <- combined_data %>%group_by(año, source_grouped, clasificacion) %>%summarize(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%ungroup()# Bar graph of total sales vs. expenses per year with facets and annotationsggplot(yearly_summary, aes(x = source_grouped, y = total_amount, fill = clasificacion)) +geom_bar(stat ="identity", position ="stack") +labs(title ="Graph 2: Total Sales vs. Expenses per Year", y ="Total Amount (MXN)", x ="") +scale_fill_manual(values = custom_colors) +scale_y_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +theme_minimal() +facet_wrap(~año)
4.3 Gráfico 3: Ventas totales vs. gastos por un tiempo determinado
Code
# Assuming combined_data has a date column named "fecha"month_filter <-13# POR REVISAR CON REGINA# Filter data for the last XX months#filtered_data <- combined_data %>%# mutate(fecha = as.Date(fecha, format = "%d/%m/%Y")) %>% # Ensure "fecha" is in Date format# filter(fecha >= as.Date(Sys.Date() %m-% months(month_filter))) # Filter last XXX months# Ensure combined_data has a properly formatted "fecha" columnfiltered_data <- combined_data %>%mutate(fecha =as.Date(fecha, format ="%d/%m/%Y")) %>%# Convert to Date formatfilter(year(fecha) *12+month(fecha) >=year(Sys.Date()) *12+month(Sys.Date()) - month_filter)# Summarize total sales and expenses per year and other groupingsfiltered_summary <- filtered_data %>%mutate(año =year(fecha)) %>%# Extract year from fechagroup_by(source_grouped, clasificacion) %>%summarize(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%ungroup()filtered_sums <- filtered_summary %>%group_by(source_grouped) %>%summarize(total_sum =sum(total_amount))# Assuming filtered_data has already been created and summarized as 'yearly_summary'ggplot(filtered_summary, aes(x = source_grouped, y = total_amount, fill = clasificacion)) +geom_bar(stat ="identity", position ="stack") +labs(title =paste0("Graph 3: Total Sales vs. Expenses (Last ", month_filter," Months)"), y ="Total Amount (MXN)", x ="") +scale_fill_manual(values = custom_colors) +scale_y_continuous(labels = scales::dollar_format(prefix ="$", big.mark =",")) +theme_minimal() +# Adding text labels inside the barsgeom_text(aes(label = scales::dollar(total_amount, prefix ="$", big.mark =",")), position =position_stack(vjust =0.5), size =2.5, color ="white") +# Annotate specific sums on the chart (e.g., for each category)annotate("text", x =1, y = filtered_sums$total_sum[1], label = scales::dollar(filtered_sums$total_sum[1], prefix ="$", big.mark =","), vjust =-0.5, size =3, fontface ="bold", color ="#FF7347") +annotate("text", x =2, y = filtered_sums$total_sum[2], label = scales::dollar(filtered_sums$total_sum[2], prefix ="$", big.mark =","), vjust =-0.2, size =3, fontface ="bold", color ="darkgreen")
# Create a filtered monthly summary using the already-filtered data (filtered_data)filtered_monthly_summary <- filtered_data %>%rename(month = mes) %>%# Assuming 'mes' exists as the month column in the datagroup_by(source_grouped, source, año, month) %>%summarise(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%# Add month number and abbreviationmutate(month_num =match(tolower(month), tolower(month.abb)),month_abbr =month(month_num, label =TRUE, abbr =TRUE),fecha =make_date(year = año, month = month_num, day =days_in_month(month_num)))# Create the interactive boxplot with Plotlyp <-plot_ly(data = filtered_monthly_summary,x =~total_amount,y =~source, type ="box",color =~source, boxpoints ="outliers", # Show only outliersjitter =0.3, pointpos =-1.8) %>%layout(title =list(text =paste0("Graph 4: Distribution of Expenses by Classification<br><sup>(Last ", month_filter, " Months)</sup>"),font =list(size =18, family ="Arial", color ="black")),xaxis =list(title ="Total Monthly Sum (MXN)", tickformat ="$,.0f"),yaxis =list(title ="Classification"),showlegend =FALSE)# Display the interactive plotp
Caja (IQR): La caja representa el rango intercuartílico. Contiene el 50% central de los datos.
Parte izquierda de la caja (Q1): Este es el primer cuartil, o el percentil 25. Esto significa que el 25% de los datos están sobre la linea de la izauierda (entre la caja y la última linea representando el valor mínimo).
Parte derecha de la caja (Q3): Este es el tercer cuartil, o el percentil 75. Esto significa que el 75% de los datos están sobre la linea del lado derecgo (entre la caja y la última linea vertical representandp el valor máximo).
La línea dentro de la caja representa la mediana.
Gráfico 5: Ventas y gastos a lo largo del tiempo
Code
# Group and summarize monthly_summarymonthly_grouped_summary <- monthly_summary %>%group_by(source_grouped, año, month_num, month_abbr, fecha) %>%summarise(total_amount =sum(total_amount, na.rm =TRUE), .groups ="drop") %>%arrange(año, month_num, source_grouped)t <- monthly_grouped_summary %>%ggplot(aes(x = fecha, y = total_amount, fill = source_grouped, color = source_grouped)) +geom_area(alpha =0.8, position ="identity") +# Prevent stacking of areasgeom_line(size =0.2) +# Ensure the lines are visiblescale_y_continuous(labels = scales::dollar_format(prefix ="$", big.mark =",")) +scale_fill_manual(values =c("expenses"="#FC9272", "sales"="#9ECBEA")) +# Match area colorsscale_color_manual(values =c("expenses"="#DE2D26", "sales"="#3182BD")) +# Match line colorslabs(title ="Graph 5: Monthly Amounts Over Time by Source Group",x ="Date",y ="Total Amount (MXN)",fill ="Source Group",color ="Source Group" ) +theme_minimal()# Use ggplotly with custom tooltipsggplotly(tooltip =c("x", "y", "color"))
Gráfico 6: Comparación Mensual
Code
# Bar graph comparing sales and expenses per month, faceted by yearggplot(monthly_grouped_summary, aes(x = month_abbr, y = total_amount, fill = source_grouped)) +geom_bar(stat ="identity", position ="dodge", alpha =0.85) +scale_y_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +labs(title ="Graph 6: Monthly Comparison of Sales and Expenses Faceted by Year",x ="Month",y ="Total Amount",fill ="Category") +facet_wrap(~year(fecha), ncol =1) +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))
plot_data <- monthly_expenses_sales %>%mutate(adjusted_expenses_per_dollar = expenses_per_dollar -1, # Subtract 1bar_color =ifelse(adjusted_expenses_per_dollar >0, "darkred", "darkgreen"), # Assign colorsmonth_label =paste(month_abbr, año) # Create a combined label for months ) %>%arrange(fecha) %>%# Sort by fecha (chronological order)mutate(month_label =factor(month_label, levels =unique(month_label))) # Ensure chronological orderunique_years <- plot_data %>%distinct(año) %>%arrange(año)year_start_positions <-which(levels(fct_rev(plot_data$month_label)) %in%paste("Jan", unique_years$año))offset_positions <- year_start_positions +0.5# Shift the lines slightly upward# Step 2: Create the bar graph with reversed y-axisggplot(plot_data, aes(x = adjusted_expenses_per_dollar,y =fct_rev(month_label),fill = bar_color)) +geom_bar(stat ="identity",show.legend =FALSE,alpha =0.8) +# Horizontal barsgeom_vline(xintercept =0,linetype ="solid",color ="gray20",size =1) +geom_hline(yintercept = offset_positions,,linetype ="dotted",alpha =0.5,color ="gray10",size =0.7) +geom_text(aes(label =round(adjusted_expenses_per_dollar, 2),hjust =ifelse(adjusted_expenses_per_dollar >0, -0.2, 1.2)),color ="black",size =2.8) +# Add labels to the barsscale_fill_identity() +#scale_y_discrete(breaks = plot_data$month_label[seq(1, nrow(plot_data), by = 3)]) +scale_x_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +labs(title ="Graph 8: Expense Per Dollar Sold",x ="Net per dolar sold",y ="Month",caption ="Bars to the left (green) indicate profits; bars to the right (red) indicate losses." ) +theme_minimal() +theme(plot.title =element_text(hjust =0.3, size =14), # Center and style titleaxis.title.x =element_text(hjust =0.3, size =12), # Center x-axis titleaxis.text.y =element_text(size =7), # Adjust size of month labelsaxis.title.y =element_text(size =12),plot.caption =element_text(hjust =0))
4.6 Gráfico 9: Gastos por producto a lo largo del tiempo
Code
# Data processing and filteringexpenses_details <- expenses_data %>%rename(month = mes) %>%group_by(clasificacion, año, month, producto) %>%summarise(total_amount =sum(cantidad), .groups ="drop") %>%mutate(month_num =match(tolower(month), tolower(month.abb)),mes =month(month_num, label =TRUE, abbr =TRUE),fecha =make_date(year = año, month = month_num, day =days_in_month(month_num)))# Select a color paletten_colors <-length(unique(expenses_details$producto)) color_palette <-colorRampPalette(brewer.pal(9, "Set3"))(n_colors) # Create the ggplotexpense_plot <-ggplot(expenses_details, aes(x = fecha, y = total_amount, color = producto, group = producto)) +geom_line() +scale_color_manual(values = color_palette) +scale_y_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +# Keep it dynamiclabs(title ="Graph 9: Monthly Expenses by Product Category",subtitle ="Comparison of key expense categories over time",x ="Date",y ="Total Amount (MXN)",color ="Product Category") +theme_minimal() +theme(plot.title =element_text(face ="bold",hjust =0.5),plot.subtitle =element_text(face ="italic",hjust =0.5),axis.text.x =element_text(angle =45,hjust =1),legend.position ="bottom" )# Convert to interactive Plotly plot with fully dynamic y-axisinteractive_plot <-ggplotly(expense_plot) %>%layout(autosize =TRUE,yaxis =list(autorange =TRUE, # **Dynamic y-axis scaling**tickformat ="$,.0f", # Ensures currency formattingshowgrid =TRUE, # Keeps gridlines visibleticks ="outside", # Ensures tick marks are visibletickmode ="auto"# Automatically adjusts tick spacing ) )# Display the interactive plotinteractive_plot
5. Análisis de Ventas
Code
# Calculate total sales and add necessary columnstotal_sales <- sales_data_long %>%group_by(fecha) %>%summarize(cantidad =sum(cantidad, na.rm =TRUE)) %>%mutate(clasificacion ="Total",source ="Ventas", año =year(fecha), mes =month(fecha, label =TRUE, abbr =TRUE), quarter =quarter(fecha))# Combine the total sales with the filtered datasales_data_plot <-bind_rows(sales_data_long, total_sales) %>%filter(!(año ==2020& mes %in%c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug"))) %>%filter(!(año ==2024& mes %in%c("Sep", "Oct", "Nov", "Dec"))) # Choose a color palettepalette <-brewer.pal(n =4, name ="Set1")
5.1 Gráfico 10: Tendencias de ventas por plataforma
Code
sales_data_plot <- sales_data_plot %>%group_by(clasificacion) %>%arrange(fecha) %>%mutate(month_count =row_number()) %>%ungroup()# Create linear models for each 'clasificacion'models <- sales_data_plot %>%group_by(clasificacion) %>%do(model =lm(cantidad ~as.numeric(month_count), data = .))# Extract model coefficients and create equation stringsmodels <- models %>%mutate(slope =round(coef(model)[2], 2),intercept =round(coef(model)[1], 2),equation =paste0("y = ", intercept, " + ", slope, " * x"),y_position =seq(from =max(sales_data_plot$cantidad), to =max(sales_data_plot$cantidad) *2000, # Spread out the y positionslength.out =n()),x_position =as.Date("2021-01-01") # Set all x positions to the same date )# Choose a color palettepalette <-brewer.pal(n =4, name ="Set1") # Create ggplotp <-ggplot(sales_data_plot, aes(x = fecha, y = cantidad, color = clasificacion)) +geom_line(data = . %>%filter(clasificacion =="Total"), size =0.8) +geom_line(data = . %>%filter(clasificacion !="Total"), size =0.5, alpha =0.5) +geom_smooth(method ="lm", se = F, size =0.5) +scale_color_manual(values = palette) +geom_text(data = models,aes(x = x_position, y = y_position, label = equation, color = clasificacion), hjust =0, vjust =0, size =3.5, inherit.aes =FALSE) +scale_y_continuous(labels = scales::dollar_format(prefix ="$")) +labs(title ="Graph 10: Sales Trends by Platform",x ="Date",y ="Sales Quantity") +theme_minimal()# Convert to interactive plotly plotggplotly(p)
5.2 Tabla 4: Modelos lineales y resultados
Code
# Create a sequential count column for each classificationsales_data_plot <- sales_data_plot %>%group_by(clasificacion) %>%arrange(fecha) %>%mutate(month_count =row_number()) %>%ungroup()# Create the linear model using the new count columnmodel_table_filtered <- sales_data_plot %>%group_by(clasificacion) %>%do(tidy(lm(cantidad ~ month_count, data = .))) %>%filter(term =="month_count") %>%mutate(significance =case_when( p.value <0.001~"***", p.value <0.01~"**", p.value <0.05~"*", p.value <0.1~".",TRUE~"" ) ) %>%select(Classification = clasificacion,Estimate = estimate,`Std. Error`= std.error,`t-value`= statistic,`p-value`= p.value,Significance = significance )# Print the table with stylingkable(model_table_filtered, format ="html", caption ="Table 4: Linea Models per Sales Clasification") %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed"), full_width = F)
Table 4: Linea Models per Sales Clasification
Classification
Estimate
Std. Error
t-value
p-value
Significance
Deposito
1767.54556
211.01173
8.376528
0.000000
***
Efectivo
-63.14348
22.70274
-2.781315
0.008072
**
Pagina
1525.73976
128.69518
11.855454
0.000000
***
Total
3230.14184
226.65003
14.251672
0.000000
***
Resúmen de Estadística de Ventas
Depósito: Aumento estadísticamente significativo de $1,768 MXN por mes (p < 0.001).
Efectivo: Disminución estadísticamente significativa de -$63 MXN por mes (p < 0.001).
Página: Aumento estadísticamente significativo de $1,526 MXN por mes (p < 0.001).
Total: Aumento estadísticamente significativo de $3,230.MXN por mes (p < 0.001).
6. Estado de Resultados
La tabla de estado de resultados general muestra un resumen financiero de Nala. Muestra las ventas, costos, y la utilidad neta.
Code
# Step 1: Process and calculate the financial metricsestado_resultados <- combined_data %>%group_by(año, clasificacion) %>%summarize(total_cantidad =sum(cantidad, na.rm =TRUE)) %>%pivot_wider(names_from = clasificacion, values_from = total_cantidad, values_fill =0) %>%mutate(`Ventas en Deposito`=`Ventas en Deposito`,`Ventas en Efectivo`=`Ventas en Efectivo`,`Ventas en Pagina`=`Ventas en Pagina`,Ventas =`Ventas en Deposito`+`Ventas en Efectivo`+`Ventas en Pagina`,`Costo de Ventas`=`Costos de Ventas`,`Utilidad Bruta`= Ventas -`Costo de Ventas`,`Gastos de Ventas`=`Gastos de Ventas`,`Gastos Administrativos`=`Gastos de Admin`,`Utilidad de Operacion`=`Utilidad Bruta`-`Gastos de Ventas`-`Gastos Administrativos`,`Gastos Financieros`=`Gastos Financieros`,`Otros Gastos `=0, `Utilidad Periodo`=`Utilidad de Operacion`-`Gastos Financieros`,`Utilidad Acumulada`=0,`Utilidad Neta`=`Utilidad de Operacion`)# Step 2: Calculate Utilidad Acumulada and Utilidad Netafor (i in2:nrow(estado_resultados)) { estado_resultados$Utilidad_Acumulada[i] <- estado_resultados$Utilidad_Neta[i -1] estado_resultados$Utilidad_Neta[i] <- estado_resultados$Utilidad_Periodo[i] + estado_resultados$Utilidad_Acumulada[i]}# Step 3: Format numbers as currencyestado_resultados <- estado_resultados %>%mutate(across(where(is.numeric), ~paste0("$ ", formatC(round(.), format ="f", big.mark =",", digits =0))))# Step 4: Transpose the table and reorder the rowsdesired_order <-c("Ventas en Deposito","Ventas en Efectivo","Ventas en Pagina","Ventas", "Costo de Ventas", "Utilidad Bruta", "Gastos de Ventas", "Gastos Administrativos", "Utilidad de Operacion", "Gastos Financieros", "Otros Gastos", "Utilidad Periodo", "Utilidad Acumulada", "Utilidad Neta")estado_resultados <- estado_resultados %>%pivot_longer(-año, names_to ="Metrica", values_to ="Value") %>%mutate(Metrica =factor(Metrica, levels = desired_order)) %>%arrange(Metrica) %>%pivot_wider(names_from = año, values_from = Value) %>%na.omit() %>%mutate(across(where(is.numeric), scales::dollar_format(prefix ="$")))
La tabla de márgenes presenta los márgenes financieros como porcentaje de las ventas. Estos márgenes ayudan a evaluar la eficiencia y rentabilidad de Nala, mostrando cómo los costos y gastos impactan las ganancias.
Code
# Convert all monetary values to numericestado_resultados <- estado_resultados %>%mutate(across(`2021`:`2024`, ~as.numeric(str_remove_all(.x, "\\$|,"))))# Now, calculate the percentages using the numeric valuesventas <- estado_resultados %>%filter(Metrica =="Ventas") %>%select(-Metrica)# Function to calculate percentages for each row based on 'Ventas'calculate_percentages <-function(row, ventas) {if (row["Metrica"] =="Ventas") {# Set Ventas percentage to 100 for display purposes percentages <-rep(100, length(row) -1) } else {# Calculate percentages percentages <-as.numeric(row[-1]) / ventas *100 percentages <-round(percentages, 1) # Round to one decimal }# Return the row with percentages formatted with a percentage sign percentages <-paste0(percentages, "%")return(c(row[1], percentages))}# Apply the function to each rowpercentage_results <-t(apply(estado_resultados, 1, calculate_percentages, ventas =as.numeric(ventas)))# Convert the results back to a dataframe and update column namespercentage_results_df <-as.data.frame(percentage_results)colnames(percentage_results_df) <-colnames(estado_resultados)# Print the percentage results dataframe with kablekable(percentage_results_df, format ="html", caption ="Table 6: Balance Sheet (%)") %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed"), full_width = F) %>%row_spec(c(4), background ="#E6F9E6", bold =TRUE) %>%row_spec(c(6, 9, 12), background ="lightyellow", bold =TRUE)
Table 6: Balance Sheet (%)
Metrica
2021
2022
2023
2024
Ventas en Deposito
41%
65.5%
56%
50%
Ventas en Efectivo
18.4%
6.3%
2.4%
2.2%
Ventas en Pagina
40.6%
28.2%
41.6%
47.8%
Ventas
100%
100%
100%
100%
Costo de Ventas
64.4%
58.2%
56.3%
50.9%
Utilidad Bruta
35.6%
41.8%
43.7%
49.1%
Gastos de Ventas
3.7%
4.7%
8%
10.1%
Gastos Administrativos
6.8%
41.7%
32.3%
38%
Utilidad de Operacion
25.1%
-4.5%
3.5%
1%
Gastos Financieros
0%
0%
0%
5.7%
Utilidad Periodo
25.1%
-4.5%
3.5%
-4.7%
Utilidad Acumulada
0%
0%
0%
0%
Utilidad Neta
25.1%
-4.5%
3.5%
1%
6.3 Tabla 7: Crecimiento de Nala porcentual
Code
# Calculate year-over-year growth for all relevant metricsventas_growth <- estado_resultados %>%filter(Metrica =="Ventas") %>%mutate(Growth_2022 = scales::percent((`2022`-`2021`) /`2021`),Growth_2023 = scales::percent((`2023`-`2022`) /`2022`),Growth_2024 = scales::percent((`2024`-`2023`) /`2023`) )# Cost of Goods Sold (Costo_de_Ventas)cost_growth <- estado_resultados %>%filter(Metrica =="Costo de Ventas") %>%mutate(Growth_2022 = scales::percent((`2022`-`2021`) /`2021`),Growth_2023 = scales::percent((`2023`-`2022`) /`2022`),Growth_2024 = scales::percent((`2024`-`2023`) /`2023`) )# Expenses (Gastos_de_ventas and Gastos_administrativos)expenses_growth <- estado_resultados %>%filter(Metrica %in%c("Gastos de Ventas", "Gastos Administrativos")) %>%mutate(Growth_2022 = scales::percent((`2022`-`2021`) /`2021`),Growth_2023 = scales::percent((`2023`-`2022`) /`2022`),Growth_2024 = scales::percent((`2024`-`2023`) /`2023`) )# Utility (Utilidad_Bruta, Utilidad_de_Operacion, Utilidad_Neta)utility_growth <- estado_resultados %>%filter(Metrica %in%c("Utilidad Bruta", "Utilidad de Operacion", "Utilidad Neta")) %>%mutate(Growth_2022 = scales::percent((`2022`-`2021`) /`2021`),Growth_2023 = scales::percent((`2023`-`2022`) /`2022`),Growth_2024 = scales::percent((`2024`-`2023`) /`2023`) )# Combine all into a single data framecombined_growth <-bind_rows(ventas_growth, cost_growth, expenses_growth, utility_growth) %>%select(Metrica, Growth_2022, Growth_2023, Growth_2024)# Display the combined growth tablecombined_growth %>%kable(format ="html", caption ="Table 7: Year-over-Year Growth (%)", align ='c') %>%kable_styling(bootstrap_options =c("hover", "condensed"), full_width = F) %>%row_spec(c(1), background ="#E6F9E6", bold =TRUE) %>%row_spec(c(5, 7), background ="lightyellow", bold =TRUE)
Table 7: Year-over-Year Growth (%)
Metrica
Growth_2022
Growth_2023
Growth_2024
Ventas
169%
76%
25%
Costo de Ventas
143%
71%
13%
Gastos de Ventas
241%
203%
57%
Gastos Administrativos
1 545%
37%
47%
Utilidad Bruta
217%
84%
40%
Utilidad de Operacion
-148%
-235%
-65%
Utilidad Neta
-148%
-235%
-65%
Source Code
---title: "Financial Data Analysis of Nala"author: "Javier Patrón"date: "`r Sys.Date()`"format: html: toc: true toc_depth: 2 code-fold: true code-tools: trueexecute: warning: false message: false---[{fig-align="center"}](https://www.nala.mx/)Este documento proporciona un análisis de los datos financieros de la empresa Nala.## Índice1. **Cargar bibliotecas necesarias**2. **Leer los datos de gastos y ventas de NALA 2020-2024**3. **Combinar los datos**4. **Análisis financiero** 4.1-4.3 Ventas totales vs. gastos 4.4 Análisis de variación mensual 4.5 Razón de Gastos<!-- -->5. **Análisis de las ventas** 5.1 Gráficas: Ventas a lo largo del tiempo 5.2 Tendencias de ventas por plataforma 5.3 Modelos lineales y resultados6. **Estado de Resultados** 6.1 Cálculo de utilidades 6.2 Cálculo de márgenes y porcentajes## 1. Cargar Bibliotecas```{r}library(tidyverse) # For data manipulation and visualization; includes readr, dplyr, ggplot2library(janitor) # For cleaning data and making tabular outputs cleanerlibrary(scales) # For scaling graphical outputslibrary(RColorBrewer) # For using color palettes in visualizationslibrary(plotly) # For interactive web-based graphicslibrary(stringr) # For string manipulation (also part of tidyverse but listed for clarity)library(knitr) # For dynamic report generation in Rlibrary(kableExtra) # For enhancing 'kable' tables in R Markdownlibrary(broom) # For converting statistical analysis objects into tidy data frameslibrary(DT) # For creating interactive tables in R```## 2. Leer Datos#### 2.1 Leer datos de gastos```{r}# Step 1: Read and clean the raw data, and rename it to raw_expense_dataexpenses_file_path <-"/Users/javier.patron/Downloads/pagos Nala - Expenses Data Analisis (8).csv"raw_expense_data <-read_csv(expenses_file_path) %>%clean_names() %>%# Set the first row as column names and remove the first row { setNames(.[-1,], as.character(.[1,])) } %>%clean_names() %>%select_if(~!all(is.na(.)))# Step 2: Create three separate data frames for each set of columnsdf1 <- raw_expense_data %>%select(clasificacion, fecha, empresa, cantidad, producto)df2 <- raw_expense_data %>%select(clasificacion = clasificacion_2, fecha = fecha_2, empresa = empresa_2, cantidad = cantidad_2, producto = producto_2)df3 <- raw_expense_data %>%select(clasificacion = clasificacion_3, fecha = fecha_3, empresa = empresa_3, cantidad = cantidad_3, producto = producto_3)df4 <- raw_expense_data %>%select(clasificacion = clasificacion_4, fecha = fecha_4, empresa = empresa_4, cantidad = cantidad_4, producto = producto_4)# Step 3: Combine all three data frames and name it as expenses_dataexpenses_data <-bind_rows(df1, df2, df3, df4) %>%# Remove rows where all columns are NA (if any)filter(!if_all(everything(), is.na))# Clean the dataexpenses_data <- expenses_data %>%mutate(fecha = lubridate::dmy(fecha), año = lubridate::year(fecha),mes = lubridate::month(fecha, label =TRUE),quarter = lubridate::quarter(fecha),source =case_when(str_detect(clasificacion, "Gastos") ~"Gastos",str_detect(clasificacion, "Costos") ~"Costos",TRUE~"other"# This handles any other cases, if they exist )) %>%mutate(cantidad =gsub("\\$", "", cantidad), cantidad =gsub(",", "", cantidad), cantidad =as.numeric(cantidad)) %>%select("source", "clasificacion", "fecha", "año", "mes", "quarter", "empresa", "cantidad", "producto") %>%filter(!str_detect(año, "2025"))```#### 2.2 Leer datos de ventas```{r}# Read the CSV filesales_file_path <-"/Users/javier.patron/Downloads/pagos Nala - Sales Data Analisis (2).csv"sales_data <-read_csv(sales_file_path) %>%clean_names()# Clean and Organize the datasales_data_long <- sales_data %>%pivot_longer(cols = jan:dec, names_to ="month", values_to ="cantidad") %>%rename(año = ano, tipo_venta = tipo) %>%mutate(month =match(tolower(month), tolower(month.abb)), # Convert month abbreviation to numbermes =month(month, label =TRUE, abbr =TRUE), # Convert to lubridate month formatsource ="Ventas",fecha =make_date(year = año, month =month(month), day =days_in_month(month(month, label =FALSE))), # Add last day of the monthclasificacion = tipo_venta,quarter =quarter(month),cantidad =as.numeric(gsub("[$,]", "", cantidad))) %>%select("source", "clasificacion", "fecha", "año", "mes", "quarter" , "cantidad")#sales_data_long <- sales_data_long %>% # arrange(fecha) %>% # slice(1:169)```## 3. Combinar los datos*La tabla 2 y 3 están ocultas por temas internos de Nala.*```{r}combined_data <-full_join( expenses_data %>%select(-empresa, -producto), sales_data_long) %>%mutate(clasificacion =case_when( clasificacion =="Deposito"~"Ventas en Deposito", clasificacion =="Efectivo"~"Ventas en Efectivo", clasificacion =="Pagina"~"Ventas en Pagina",TRUE~ clasificacion))combined_data <-full_join( expenses_data %>%select(-empresa, -producto), sales_data_long) %>%mutate(clasificacion =case_when( clasificacion =="Gastos Admin"~"Gastos de Admin", clasificacion =="Gastos Ventas"~"Gastos de Ventas", clasificacion =="Costos Ventas"~"Costos de Ventas", clasificacion =="Gastos Financieros"~"Gastos Financieros", clasificacion =="Deposito"~"Ventas en Deposito", clasificacion =="Efectivo"~"Ventas en Efectivo", clasificacion =="Pagina"~"Ventas en Pagina",TRUE~ clasificacion))```#### Tabla 1: Gastos```{r, include = FALSE, collapse = T}# Create an interactive table for expenses_data with smaller fontdatatable(expenses_data, options = list(autoWidth = TRUE, searching = TRUE), caption = 'Expenses Data') %>% formatStyle( columns = names(expenses_data), fontSize = '10px')```#### Tabla 2: Ventas```{r, include = FALSE}# Create an interactive table for expenses_data with smaller fontdatatable(sales_data, options = list(autoWidth = TRUE, searching = TRUE), caption = 'Expenses Data') %>% formatStyle( columns = names(sales_data), fontSize = '10px') ```## 4. Análisis Histórico### 4.1 Gráfico 1: Ventas totales vs. gastos```{r}# Define custom colors based on classificationcustom_colors <-c("Ventas en Deposito"="#6EBF74", # Green"Ventas en Efectivo"="#499F54", # Green"Ventas en Pagina"="#28703B", # Green"Costos de Ventas"="#FFDBA9", # Pink"Gastos de Admin"="#FF9F40", # Coral"Gastos de Ventas"="#FF8347", # Tomato"Gastos Financieros"="#FF5100")# Explicitly categorize each 'clasificacion' into 'sales' or 'expenses'combined_data <- combined_data %>%mutate(source_grouped =case_when( clasificacion =="Ventas en Deposito"~"sales", clasificacion =="Ventas en Efectivo"~"sales", clasificacion =="Ventas en Pagina"~"sales", clasificacion =="Costos de Ventas"~"expenses", clasificacion =="Gastos de Admin"~"expenses", clasificacion =="Gastos de Ventas"~"expenses", clasificacion =="Gastos Financieros"~"expenses",TRUE~"other"# This is just a fallback case ))# Summarize total sales and combined expenses across the entire datasettotal_summary <- combined_data %>%group_by(source_grouped, clasificacion) %>%summarize(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%mutate(total_sum =sum(total_amount))total_sums <- total_summary %>%group_by(source_grouped) %>%summarize(total_sum =sum(total_amount))# Bar graph of total sales vs. stacked combined expenses (Overall History) with custom colorsggplot(total_summary, aes(x = source_grouped,y = total_amount,fill = clasificacion)) +geom_bar(stat ="identity",position ="stack") +labs(title ="Graph 1: Total Sales vs. Expenses (Overall History)",y ="Total Amount (MXN)",x ="") +scale_fill_manual(values = custom_colors) +scale_y_continuous(labels =dollar_format(prefix ="$",big.mark =",")) +theme_minimal() +geom_text(aes(label = scales::dollar(total_amount, prefix ="$", big.mark =",")), position =position_stack(vjust =0.5), size =3, color ="white") +# Add specific text for each total sum from the total_sums tableannotate("text", x =1, y = total_sums$total_sum[1], label = scales::dollar(total_sums$total_sum[1],prefix ="$",big.mark =","), vjust =-0.5,size =2.5,fontface ="bold",color ="#FF7347") +annotate("text",x =2,y = total_sums$total_sum[2], label = scales::dollar(total_sums$total_sum[2],prefix ="$",big.mark =","), vjust =-0.2,size =2.5,fontface ="bold",color ="darkgreen")```### 4.2 Gráfico 2: Ventas totales vs. gastos por año```{r}# Summarize total sales and expenses per yearyearly_summary <- combined_data %>%group_by(año, source_grouped, clasificacion) %>%summarize(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%ungroup()# Bar graph of total sales vs. expenses per year with facets and annotationsggplot(yearly_summary, aes(x = source_grouped, y = total_amount, fill = clasificacion)) +geom_bar(stat ="identity", position ="stack") +labs(title ="Graph 2: Total Sales vs. Expenses per Year", y ="Total Amount (MXN)", x ="") +scale_fill_manual(values = custom_colors) +scale_y_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +theme_minimal() +facet_wrap(~año) ```### 4.3 Gráfico 3: Ventas totales vs. gastos por un tiempo determinado```{r}# Assuming combined_data has a date column named "fecha"month_filter <-13# POR REVISAR CON REGINA# Filter data for the last XX months#filtered_data <- combined_data %>%# mutate(fecha = as.Date(fecha, format = "%d/%m/%Y")) %>% # Ensure "fecha" is in Date format# filter(fecha >= as.Date(Sys.Date() %m-% months(month_filter))) # Filter last XXX months# Ensure combined_data has a properly formatted "fecha" columnfiltered_data <- combined_data %>%mutate(fecha =as.Date(fecha, format ="%d/%m/%Y")) %>%# Convert to Date formatfilter(year(fecha) *12+month(fecha) >=year(Sys.Date()) *12+month(Sys.Date()) - month_filter)# Summarize total sales and expenses per year and other groupingsfiltered_summary <- filtered_data %>%mutate(año =year(fecha)) %>%# Extract year from fechagroup_by(source_grouped, clasificacion) %>%summarize(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%ungroup()filtered_sums <- filtered_summary %>%group_by(source_grouped) %>%summarize(total_sum =sum(total_amount))# Assuming filtered_data has already been created and summarized as 'yearly_summary'ggplot(filtered_summary, aes(x = source_grouped, y = total_amount, fill = clasificacion)) +geom_bar(stat ="identity", position ="stack") +labs(title =paste0("Graph 3: Total Sales vs. Expenses (Last ", month_filter," Months)"), y ="Total Amount (MXN)", x ="") +scale_fill_manual(values = custom_colors) +scale_y_continuous(labels = scales::dollar_format(prefix ="$", big.mark =",")) +theme_minimal() +# Adding text labels inside the barsgeom_text(aes(label = scales::dollar(total_amount, prefix ="$", big.mark =",")), position =position_stack(vjust =0.5), size =2.5, color ="white") +# Annotate specific sums on the chart (e.g., for each category)annotate("text", x =1, y = filtered_sums$total_sum[1], label = scales::dollar(filtered_sums$total_sum[1], prefix ="$", big.mark =","), vjust =-0.5, size =3, fontface ="bold", color ="#FF7347") +annotate("text", x =2, y = filtered_sums$total_sum[2], label = scales::dollar(filtered_sums$total_sum[2], prefix ="$", big.mark =","), vjust =-0.2, size =3, fontface ="bold", color ="darkgreen")```### 4.4 Análisis de variación```{r}# Cleaned dataframe manipulationmonthly_summary <- combined_data %>%rename(month = mes) %>%group_by(source_grouped, source, año, month) %>%summarise(total_amount =sum(cantidad)) %>%mutate(month_num =match(tolower(month), tolower(month.abb)),month_abbr =month(month_num, label =TRUE, abbr =TRUE), fecha =make_date(year = año, month = month_num, day =days_in_month(month_num))) %>%arrange(año, month_num, source_grouped)```#### Gráfico 4: Box Plot```{r}# Create a filtered monthly summary using the already-filtered data (filtered_data)filtered_monthly_summary <- filtered_data %>%rename(month = mes) %>%# Assuming 'mes' exists as the month column in the datagroup_by(source_grouped, source, año, month) %>%summarise(total_amount =sum(as.numeric(cantidad), na.rm =TRUE)) %>%# Add month number and abbreviationmutate(month_num =match(tolower(month), tolower(month.abb)),month_abbr =month(month_num, label =TRUE, abbr =TRUE),fecha =make_date(year = año, month = month_num, day =days_in_month(month_num)))# Create the interactive boxplot with Plotlyp <-plot_ly(data = filtered_monthly_summary,x =~total_amount,y =~source, type ="box",color =~source, boxpoints ="outliers", # Show only outliersjitter =0.3, pointpos =-1.8) %>%layout(title =list(text =paste0("Graph 4: Distribution of Expenses by Classification<br><sup>(Last ", month_filter, " Months)</sup>"),font =list(size =18, family ="Arial", color ="black")),xaxis =list(title ="Total Monthly Sum (MXN)", tickformat ="$,.0f"),yaxis =list(title ="Classification"),showlegend =FALSE)# Display the interactive plotp```**Caja (IQR): La caja representa el rango intercuartílico. Contiene el 50% central de los datos.**Parte izquierda de la caja (Q1): Este es el primer cuartil, o el percentil 25. Esto significa que el 25% de los datos están sobre la linea de la izauierda (entre la caja y la última linea representando el valor mínimo).Parte derecha de la caja (Q3): Este es el tercer cuartil, o el percentil 75. Esto significa que el 75% de los datos están sobre la linea del lado derecgo (entre la caja y la última linea vertical representandp el valor máximo).La línea dentro de la caja representa la mediana.#### Gráfico 5: Ventas y gastos a lo largo del tiempo```{r}# Group and summarize monthly_summarymonthly_grouped_summary <- monthly_summary %>%group_by(source_grouped, año, month_num, month_abbr, fecha) %>%summarise(total_amount =sum(total_amount, na.rm =TRUE), .groups ="drop") %>%arrange(año, month_num, source_grouped)t <- monthly_grouped_summary %>%ggplot(aes(x = fecha, y = total_amount, fill = source_grouped, color = source_grouped)) +geom_area(alpha =0.8, position ="identity") +# Prevent stacking of areasgeom_line(size =0.2) +# Ensure the lines are visiblescale_y_continuous(labels = scales::dollar_format(prefix ="$", big.mark =",")) +scale_fill_manual(values =c("expenses"="#FC9272", "sales"="#9ECBEA")) +# Match area colorsscale_color_manual(values =c("expenses"="#DE2D26", "sales"="#3182BD")) +# Match line colorslabs(title ="Graph 5: Monthly Amounts Over Time by Source Group",x ="Date",y ="Total Amount (MXN)",fill ="Source Group",color ="Source Group" ) +theme_minimal()# Use ggplotly with custom tooltipsggplotly(tooltip =c("x", "y", "color"))```#### Gráfico 6: Comparación Mensual```{r}# Bar graph comparing sales and expenses per month, faceted by yearggplot(monthly_grouped_summary, aes(x = month_abbr, y = total_amount, fill = source_grouped)) +geom_bar(stat ="identity", position ="dodge", alpha =0.85) +scale_y_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +labs(title ="Graph 6: Monthly Comparison of Sales and Expenses Faceted by Year",x ="Month",y ="Total Amount",fill ="Category") +facet_wrap(~year(fecha), ncol =1) +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))``````{r}# Calculate the yearly totals and profityearly_summary_table <- monthly_summary %>%group_by(year =year(fecha)) %>%summarise(total_expenses =sum(total_amount[source_grouped =="expenses"]),total_sales =sum(total_amount[source_grouped =="sales"]),profit = total_sales - total_expenses) %>%mutate(total_expenses = scales::dollar(total_expenses, prefix ="$", big.mark =","),total_sales = scales::dollar(total_sales, prefix ="$", big.mark =","),profit = scales::dollar(profit, prefix ="$", big.mark =","))``````{r}# Calculate the monthly totals for all yearsmonthly_summary_table <- monthly_summary %>%group_by(year =year(fecha), month_abbr) %>%summarise(total_expenses =sum(total_amount[source_grouped =="expenses"]),total_sales =sum(total_amount[source_grouped =="sales"]),profit = total_sales - total_expenses ) %>%arrange(year, match(month_abbr, month.abb)) %>%mutate(total_expenses = scales::dollar(total_expenses, prefix ="$", big.mark =","),total_sales = scales::dollar(total_sales, prefix ="$", big.mark =","),profit = scales::dollar(profit, prefix ="$", big.mark =","))```#### Gráfico 7: Ganancia o perdida mensual```{r}# Ensure that 'profit' is numericmonthly_summary_table <- monthly_summary_table %>%mutate(profit_numeric =as.numeric(gsub("[$,]", "", profit)),profit_formatted = scales::comma(profit_numeric), # Format with commasdate =as.Date(paste(year, month_abbr, "01", sep ="-"), format ="%Y-%b-%d"))# Create the ggplot objectp <-ggplot(monthly_summary_table, aes(x = date, y = profit_numeric, fill = profit_numeric >0, text =paste("Date: ", format(date, "%b %Y"), "<br>Profit: $", profit_formatted))) +# Use formatted profitgeom_bar(stat ="identity") +scale_y_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +scale_x_date(date_labels ="%Y", date_breaks ="1 year", minor_breaks =NULL) +scale_fill_manual(values =c("TRUE"="darkgreen", "FALSE"="coral")) +labs(title ="Graph 7: Monthly Profits Over Time (2020-2024)",x ="Year",y ="Profit ($)" ) +theme_minimal() +theme(axis.text.x =element_text(angle =0, hjust =1, vjust =0.5),legend.position ="none")# Convert the ggplot object to an interactive plotly objectp_interactive <-ggplotly(p, tooltip ="text")# Display the interactive plotp_interactive```### 4.5 Razón de gastos sobre ventas#### Tabla 3: Cuadro resumen anual```{r}## SUPER IMPORTANT FACTORS# Function to remove dollar signs and commas, and convert to numericclean_numeric <-function(x) {as.numeric(gsub("[\\$,]", "", x))}yearly_summary_table <- yearly_summary_table %>%mutate(total_expenses =clean_numeric(total_expenses),total_sales =clean_numeric(total_sales),profit =clean_numeric(profit))yearly_summary_table <- yearly_summary_table %>%mutate(year =as.character(year))yearly_summary_table <- yearly_summary_table %>%mutate(expenses_per_dollar =ifelse(total_sales ==0, NA, total_expenses / total_sales))summary_row <- yearly_summary_table %>%summarise(year ="All", total_expenses =sum(total_expenses, na.rm =TRUE),total_sales =sum(total_sales, na.rm =TRUE),profit =sum(profit, na.rm =TRUE)) %>%mutate(expenses_per_dollar =ifelse(total_sales ==0, NA, total_expenses / total_sales))# Step 5: Add the summary row to the original data frameyearly_summary_table_with_total <- yearly_summary_table %>%bind_rows(summary_row)# Format the numeric columns with $ and commasformatted_table <- yearly_summary_table_with_total %>%mutate(total_expenses =dollar(total_expenses),total_sales =dollar(total_sales),profit =dollar(profit))# Simple formatted table with kableformatted_table %>%kable(format ="html", caption ="Table 3: Yearly Summary Table with Total", align ="c") %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed"), full_width = F)``````{r}# Step 1: Calculate expenses_per_dollar by pivoting and performing divisionmonthly_expenses_sales <- monthly_grouped_summary %>%pivot_wider(names_from = source_grouped, values_from = total_amount) %>%mutate(expenses_per_dollar =ifelse(sales ==0, NA, expenses / sales))monthly_expenses_sales <- monthly_expenses_sales %>%arrange(año, month_num)```#### Gráfico 8: Ratio de gasto por "dolar" vendido```{r}plot_data <- monthly_expenses_sales %>%mutate(adjusted_expenses_per_dollar = expenses_per_dollar -1, # Subtract 1bar_color =ifelse(adjusted_expenses_per_dollar >0, "darkred", "darkgreen"), # Assign colorsmonth_label =paste(month_abbr, año) # Create a combined label for months ) %>%arrange(fecha) %>%# Sort by fecha (chronological order)mutate(month_label =factor(month_label, levels =unique(month_label))) # Ensure chronological orderunique_years <- plot_data %>%distinct(año) %>%arrange(año)year_start_positions <-which(levels(fct_rev(plot_data$month_label)) %in%paste("Jan", unique_years$año))offset_positions <- year_start_positions +0.5# Shift the lines slightly upward# Step 2: Create the bar graph with reversed y-axisggplot(plot_data, aes(x = adjusted_expenses_per_dollar,y =fct_rev(month_label),fill = bar_color)) +geom_bar(stat ="identity",show.legend =FALSE,alpha =0.8) +# Horizontal barsgeom_vline(xintercept =0,linetype ="solid",color ="gray20",size =1) +geom_hline(yintercept = offset_positions,,linetype ="dotted",alpha =0.5,color ="gray10",size =0.7) +geom_text(aes(label =round(adjusted_expenses_per_dollar, 2),hjust =ifelse(adjusted_expenses_per_dollar >0, -0.2, 1.2)),color ="black",size =2.8) +# Add labels to the barsscale_fill_identity() +#scale_y_discrete(breaks = plot_data$month_label[seq(1, nrow(plot_data), by = 3)]) +scale_x_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +labs(title ="Graph 8: Expense Per Dollar Sold",x ="Net per dolar sold",y ="Month",caption ="Bars to the left (green) indicate profits; bars to the right (red) indicate losses." ) +theme_minimal() +theme(plot.title =element_text(hjust =0.3, size =14), # Center and style titleaxis.title.x =element_text(hjust =0.3, size =12), # Center x-axis titleaxis.text.y =element_text(size =7), # Adjust size of month labelsaxis.title.y =element_text(size =12),plot.caption =element_text(hjust =0))```### 4.6 Gráfico 9: Gastos por producto a lo largo del tiempo```{r}# Data processing and filteringexpenses_details <- expenses_data %>%rename(month = mes) %>%group_by(clasificacion, año, month, producto) %>%summarise(total_amount =sum(cantidad), .groups ="drop") %>%mutate(month_num =match(tolower(month), tolower(month.abb)),mes =month(month_num, label =TRUE, abbr =TRUE),fecha =make_date(year = año, month = month_num, day =days_in_month(month_num)))# Select a color paletten_colors <-length(unique(expenses_details$producto)) color_palette <-colorRampPalette(brewer.pal(9, "Set3"))(n_colors) # Create the ggplotexpense_plot <-ggplot(expenses_details, aes(x = fecha, y = total_amount, color = producto, group = producto)) +geom_line() +scale_color_manual(values = color_palette) +scale_y_continuous(labels =dollar_format(prefix ="$", big.mark =",")) +# Keep it dynamiclabs(title ="Graph 9: Monthly Expenses by Product Category",subtitle ="Comparison of key expense categories over time",x ="Date",y ="Total Amount (MXN)",color ="Product Category") +theme_minimal() +theme(plot.title =element_text(face ="bold",hjust =0.5),plot.subtitle =element_text(face ="italic",hjust =0.5),axis.text.x =element_text(angle =45,hjust =1),legend.position ="bottom" )# Convert to interactive Plotly plot with fully dynamic y-axisinteractive_plot <-ggplotly(expense_plot) %>%layout(autosize =TRUE,yaxis =list(autorange =TRUE, # **Dynamic y-axis scaling**tickformat ="$,.0f", # Ensures currency formattingshowgrid =TRUE, # Keeps gridlines visibleticks ="outside", # Ensures tick marks are visibletickmode ="auto"# Automatically adjusts tick spacing ) )# Display the interactive plotinteractive_plot```## 5. Análisis de Ventas```{r}# Calculate total sales and add necessary columnstotal_sales <- sales_data_long %>%group_by(fecha) %>%summarize(cantidad =sum(cantidad, na.rm =TRUE)) %>%mutate(clasificacion ="Total",source ="Ventas", año =year(fecha), mes =month(fecha, label =TRUE, abbr =TRUE), quarter =quarter(fecha))# Combine the total sales with the filtered datasales_data_plot <-bind_rows(sales_data_long, total_sales) %>%filter(!(año ==2020& mes %in%c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug"))) %>%filter(!(año ==2024& mes %in%c("Sep", "Oct", "Nov", "Dec"))) # Choose a color palettepalette <-brewer.pal(n =4, name ="Set1") ```#### 5.1 Gráfico 10: Tendencias de ventas por plataforma```{r}sales_data_plot <- sales_data_plot %>%group_by(clasificacion) %>%arrange(fecha) %>%mutate(month_count =row_number()) %>%ungroup()# Create linear models for each 'clasificacion'models <- sales_data_plot %>%group_by(clasificacion) %>%do(model =lm(cantidad ~as.numeric(month_count), data = .))# Extract model coefficients and create equation stringsmodels <- models %>%mutate(slope =round(coef(model)[2], 2),intercept =round(coef(model)[1], 2),equation =paste0("y = ", intercept, " + ", slope, " * x"),y_position =seq(from =max(sales_data_plot$cantidad), to =max(sales_data_plot$cantidad) *2000, # Spread out the y positionslength.out =n()),x_position =as.Date("2021-01-01") # Set all x positions to the same date )# Choose a color palettepalette <-brewer.pal(n =4, name ="Set1") # Create ggplotp <-ggplot(sales_data_plot, aes(x = fecha, y = cantidad, color = clasificacion)) +geom_line(data = . %>%filter(clasificacion =="Total"), size =0.8) +geom_line(data = . %>%filter(clasificacion !="Total"), size =0.5, alpha =0.5) +geom_smooth(method ="lm", se = F, size =0.5) +scale_color_manual(values = palette) +geom_text(data = models,aes(x = x_position, y = y_position, label = equation, color = clasificacion), hjust =0, vjust =0, size =3.5, inherit.aes =FALSE) +scale_y_continuous(labels = scales::dollar_format(prefix ="$")) +labs(title ="Graph 10: Sales Trends by Platform",x ="Date",y ="Sales Quantity") +theme_minimal()# Convert to interactive plotly plotggplotly(p)```#### 5.2 Tabla 4: Modelos lineales y resultados```{r}# Create a sequential count column for each classificationsales_data_plot <- sales_data_plot %>%group_by(clasificacion) %>%arrange(fecha) %>%mutate(month_count =row_number()) %>%ungroup()# Create the linear model using the new count columnmodel_table_filtered <- sales_data_plot %>%group_by(clasificacion) %>%do(tidy(lm(cantidad ~ month_count, data = .))) %>%filter(term =="month_count") %>%mutate(significance =case_when( p.value <0.001~"***", p.value <0.01~"**", p.value <0.05~"*", p.value <0.1~".",TRUE~"" ) ) %>%select(Classification = clasificacion,Estimate = estimate,`Std. Error`= std.error,`t-value`= statistic,`p-value`= p.value,Significance = significance )# Print the table with stylingkable(model_table_filtered, format ="html", caption ="Table 4: Linea Models per Sales Clasification") %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed"), full_width = F)```**Resúmen de Estadística de Ventas**- **Depósito**: Aumento estadísticamente significativo de \$1,768 MXN por mes (p \< 0.001).- **Efectivo**: Disminución estadísticamente significativa de -\$63 MXN por mes (p \< 0.001).- **Página**: Aumento estadísticamente significativo de \$1,526 MXN por mes (p \< 0.001).**Total: Aumento estadísticamente significativo de \$3,230.MXN por mes (p \< 0.001).**## 6. Estado de ResultadosLa tabla de estado de resultados general muestra un resumen financiero de Nala. Muestra las ventas, costos, y la utilidad neta.```{r}# Step 1: Process and calculate the financial metricsestado_resultados <- combined_data %>%group_by(año, clasificacion) %>%summarize(total_cantidad =sum(cantidad, na.rm =TRUE)) %>%pivot_wider(names_from = clasificacion, values_from = total_cantidad, values_fill =0) %>%mutate(`Ventas en Deposito`=`Ventas en Deposito`,`Ventas en Efectivo`=`Ventas en Efectivo`,`Ventas en Pagina`=`Ventas en Pagina`,Ventas =`Ventas en Deposito`+`Ventas en Efectivo`+`Ventas en Pagina`,`Costo de Ventas`=`Costos de Ventas`,`Utilidad Bruta`= Ventas -`Costo de Ventas`,`Gastos de Ventas`=`Gastos de Ventas`,`Gastos Administrativos`=`Gastos de Admin`,`Utilidad de Operacion`=`Utilidad Bruta`-`Gastos de Ventas`-`Gastos Administrativos`,`Gastos Financieros`=`Gastos Financieros`,`Otros Gastos `=0, `Utilidad Periodo`=`Utilidad de Operacion`-`Gastos Financieros`,`Utilidad Acumulada`=0,`Utilidad Neta`=`Utilidad de Operacion`)# Step 2: Calculate Utilidad Acumulada and Utilidad Netafor (i in2:nrow(estado_resultados)) { estado_resultados$Utilidad_Acumulada[i] <- estado_resultados$Utilidad_Neta[i -1] estado_resultados$Utilidad_Neta[i] <- estado_resultados$Utilidad_Periodo[i] + estado_resultados$Utilidad_Acumulada[i]}# Step 3: Format numbers as currencyestado_resultados <- estado_resultados %>%mutate(across(where(is.numeric), ~paste0("$ ", formatC(round(.), format ="f", big.mark =",", digits =0))))# Step 4: Transpose the table and reorder the rowsdesired_order <-c("Ventas en Deposito","Ventas en Efectivo","Ventas en Pagina","Ventas", "Costo de Ventas", "Utilidad Bruta", "Gastos de Ventas", "Gastos Administrativos", "Utilidad de Operacion", "Gastos Financieros", "Otros Gastos", "Utilidad Periodo", "Utilidad Acumulada", "Utilidad Neta")estado_resultados <- estado_resultados %>%pivot_longer(-año, names_to ="Metrica", values_to ="Value") %>%mutate(Metrica =factor(Metrica, levels = desired_order)) %>%arrange(Metrica) %>%pivot_wider(names_from = año, values_from = Value) %>%na.omit() %>%mutate(across(where(is.numeric), scales::dollar_format(prefix ="$")))```#### 6.1 Tabla 5. Estado de resultados```{r}estado_resultados %>%kable(format ="html", caption ="Table 5: Yearly Balance Sheet") %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed"), full_width = F) %>%row_spec(c(4), bold =TRUE) %>%row_spec(c(4), background ="#E6F9E6", bold =TRUE) %>%row_spec(c(6, 9, 12), background ="lightyellow", bold =TRUE)```#### 6.2 Tabla 6: Cálculo de márgenes y porcentajesLa tabla de márgenes presenta los márgenes financieros como porcentaje de las ventas. Estos márgenes ayudan a evaluar la eficiencia y rentabilidad de Nala, mostrando cómo los costos y gastos impactan las ganancias.```{r}# Convert all monetary values to numericestado_resultados <- estado_resultados %>%mutate(across(`2021`:`2024`, ~as.numeric(str_remove_all(.x, "\\$|,"))))# Now, calculate the percentages using the numeric valuesventas <- estado_resultados %>%filter(Metrica =="Ventas") %>%select(-Metrica)# Function to calculate percentages for each row based on 'Ventas'calculate_percentages <-function(row, ventas) {if (row["Metrica"] =="Ventas") {# Set Ventas percentage to 100 for display purposes percentages <-rep(100, length(row) -1) } else {# Calculate percentages percentages <-as.numeric(row[-1]) / ventas *100 percentages <-round(percentages, 1) # Round to one decimal }# Return the row with percentages formatted with a percentage sign percentages <-paste0(percentages, "%")return(c(row[1], percentages))}# Apply the function to each rowpercentage_results <-t(apply(estado_resultados, 1, calculate_percentages, ventas =as.numeric(ventas)))# Convert the results back to a dataframe and update column namespercentage_results_df <-as.data.frame(percentage_results)colnames(percentage_results_df) <-colnames(estado_resultados)# Print the percentage results dataframe with kablekable(percentage_results_df, format ="html", caption ="Table 6: Balance Sheet (%)") %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed"), full_width = F) %>%row_spec(c(4), background ="#E6F9E6", bold =TRUE) %>%row_spec(c(6, 9, 12), background ="lightyellow", bold =TRUE)```#### 6.3 Tabla 7: Crecimiento de Nala porcentual```{r}# Calculate year-over-year growth for all relevant metricsventas_growth <- estado_resultados %>%filter(Metrica =="Ventas") %>%mutate(Growth_2022 = scales::percent((`2022`-`2021`) /`2021`),Growth_2023 = scales::percent((`2023`-`2022`) /`2022`),Growth_2024 = scales::percent((`2024`-`2023`) /`2023`) )# Cost of Goods Sold (Costo_de_Ventas)cost_growth <- estado_resultados %>%filter(Metrica =="Costo de Ventas") %>%mutate(Growth_2022 = scales::percent((`2022`-`2021`) /`2021`),Growth_2023 = scales::percent((`2023`-`2022`) /`2022`),Growth_2024 = scales::percent((`2024`-`2023`) /`2023`) )# Expenses (Gastos_de_ventas and Gastos_administrativos)expenses_growth <- estado_resultados %>%filter(Metrica %in%c("Gastos de Ventas", "Gastos Administrativos")) %>%mutate(Growth_2022 = scales::percent((`2022`-`2021`) /`2021`),Growth_2023 = scales::percent((`2023`-`2022`) /`2022`),Growth_2024 = scales::percent((`2024`-`2023`) /`2023`) )# Utility (Utilidad_Bruta, Utilidad_de_Operacion, Utilidad_Neta)utility_growth <- estado_resultados %>%filter(Metrica %in%c("Utilidad Bruta", "Utilidad de Operacion", "Utilidad Neta")) %>%mutate(Growth_2022 = scales::percent((`2022`-`2021`) /`2021`),Growth_2023 = scales::percent((`2023`-`2022`) /`2022`),Growth_2024 = scales::percent((`2024`-`2023`) /`2023`) )# Combine all into a single data framecombined_growth <-bind_rows(ventas_growth, cost_growth, expenses_growth, utility_growth) %>%select(Metrica, Growth_2022, Growth_2023, Growth_2024)# Display the combined growth tablecombined_growth %>%kable(format ="html", caption ="Table 7: Year-over-Year Growth (%)", align ='c') %>%kable_styling(bootstrap_options =c("hover", "condensed"), full_width = F) %>%row_spec(c(1), background ="#E6F9E6", bold =TRUE) %>%row_spec(c(5, 7), background ="lightyellow", bold =TRUE)```####