Financial Data Analysis of Nala

Author

Javier Patrón

Published

February 24, 2025

Este documento proporciona un análisis de los datos financieros de la empresa Nala.

Índice

  1. Cargar bibliotecas necesarias

  2. Leer los datos de gastos y ventas de NALA 2020-2024

  3. Combinar los datos

  4. Análisis financiero

    4.1-4.3 Ventas totales vs. gastos

    4.4 Análisis de variación mensual

    4.5 Razón de Gastos

  1. Análisis de las ventas

    5.1 Gráficas: Ventas a lo largo del tiempo

    5.2 Tendencias de ventas por plataforma

    5.3 Modelos lineales y resultados

  2. Estado de Resultados

    6.1 Cálculo de utilidades

    6.2 Cálculo de márgenes y porcentajes

1. Cargar Bibliotecas

Code
library(tidyverse)  # For data manipulation and visualization; includes readr, dplyr, ggplot2
library(janitor)    # For cleaning data and making tabular outputs cleaner
library(scales)     # For scaling graphical outputs
library(RColorBrewer)  # For using color palettes in visualizations
library(plotly)     # For interactive web-based graphics
library(stringr)    # For string manipulation (also part of tidyverse but listed for clarity)
library(knitr)      # For dynamic report generation in R
library(kableExtra) # For enhancing 'kable' tables in R Markdown
library(broom)      # For converting statistical analysis objects into tidy data frames
library(DT)         # For creating interactive tables in R

2. Leer Datos

2.1 Leer datos de gastos

Code
# Step 1: Read and clean the raw data, and rename it to raw_expense_data

expenses_file_path <- "/Users/javier.patron/Downloads/pagos Nala - Expenses Data Analisis (8).csv"

raw_expense_data <- read_csv(expenses_file_path) %>%
  clean_names() %>%
  # Set the first row as column names and remove the first row
  { setNames(.[-1,], as.character(.[1,])) } %>%
  clean_names() %>%
  select_if(~ !all(is.na(.)))

# Step 2: Create three separate data frames for each set of columns
df1 <- raw_expense_data %>%
  select(clasificacion, fecha, empresa, cantidad, producto)

df2 <- raw_expense_data %>%
  select(clasificacion = clasificacion_2, fecha = fecha_2, empresa = empresa_2, cantidad = cantidad_2, producto = producto_2)

df3 <- raw_expense_data %>%
  select(clasificacion = clasificacion_3, fecha = fecha_3, empresa = empresa_3, cantidad = cantidad_3, producto = producto_3)

df4 <- raw_expense_data %>%
  select(clasificacion = clasificacion_4, fecha = fecha_4, empresa = empresa_4, cantidad = cantidad_4, producto = producto_4)

# Step 3: Combine all three data frames and name it as expenses_data
expenses_data <- bind_rows(df1, df2, df3, df4) %>%
  # Remove rows where all columns are NA (if any)
  filter(!if_all(everything(), is.na))


# Clean the data
expenses_data <- expenses_data %>%
  mutate(fecha = lubridate::dmy(fecha),
         año = lubridate::year(fecha),
         mes = lubridate::month(fecha, label = TRUE),
         quarter = lubridate::quarter(fecha),
         source = case_when(
    str_detect(clasificacion, "Gastos") ~ "Gastos",
    str_detect(clasificacion, "Costos") ~ "Costos",
    TRUE ~ "other"  # This handles any other cases, if they exist
  )) %>% 
  mutate(
    cantidad = gsub("\\$", "", cantidad),  
    cantidad = gsub(",", "", cantidad),    
    cantidad = as.numeric(cantidad)) %>% 
  select("source", "clasificacion", "fecha", "año", "mes", "quarter", "empresa", "cantidad", "producto") %>% 
  filter(!str_detect(año, "2025"))

2.2 Leer datos de ventas

Code
# Read the CSV file
sales_file_path <- "/Users/javier.patron/Downloads/pagos Nala - Sales Data Analisis (2).csv"

sales_data <- read_csv(sales_file_path) %>% 
  clean_names()

# Clean and Organize the data
sales_data_long <- sales_data %>%
  pivot_longer(cols = jan:dec, names_to = "month", values_to = "cantidad") %>%
  rename(año = ano, tipo_venta = tipo) %>%
  mutate(
    month = match(tolower(month), tolower(month.abb)),  # Convert month abbreviation to number
    mes = month(month, label = TRUE, abbr = TRUE),      # Convert to lubridate month format
    source = "Ventas",
    fecha = make_date(year = año, month = month(month), day = days_in_month(month(month, label = FALSE))),  # Add last day of the month
    clasificacion = tipo_venta,
    quarter = quarter(month),
    cantidad = as.numeric(gsub("[$,]", "", cantidad))) %>% 
  select("source", "clasificacion", "fecha", "año", "mes", "quarter" , "cantidad")

#sales_data_long <- sales_data_long %>% 
#  arrange(fecha) %>% 
#  slice(1:169)

3. Combinar los datos

La tabla 2 y 3 están ocultas por temas internos de Nala.

Code
combined_data <- full_join(
  expenses_data %>% select(-empresa, -producto), 
  sales_data_long) %>%
  mutate(
    clasificacion = case_when(
      clasificacion == "Deposito" ~ "Ventas en Deposito",
      clasificacion == "Efectivo" ~ "Ventas en Efectivo",
      clasificacion == "Pagina" ~ "Ventas en Pagina",
      TRUE ~ clasificacion))


combined_data <- full_join(
  expenses_data %>% select(-empresa, -producto), 
  sales_data_long) %>%
  mutate(
    clasificacion = case_when(
      clasificacion == "Gastos Admin" ~ "Gastos de Admin",
      clasificacion == "Gastos Ventas" ~ "Gastos de Ventas",
      clasificacion == "Costos Ventas" ~ "Costos de Ventas",
      clasificacion == "Gastos Financieros" ~ "Gastos Financieros",
      clasificacion == "Deposito" ~ "Ventas en Deposito",
      clasificacion == "Efectivo" ~ "Ventas en Efectivo",
      clasificacion == "Pagina" ~ "Ventas en Pagina",
      TRUE ~ clasificacion))

Tabla 1: Gastos

Tabla 2: Ventas

4. Análisis Histórico

4.1 Gráfico 1: Ventas totales vs. gastos

Code
# Define custom colors based on classification
custom_colors <- c(
  "Ventas en Deposito" = "#6EBF74",  # Green
  "Ventas en Efectivo" = "#499F54",  # Green
  "Ventas en Pagina" = "#28703B",    # Green
  "Costos de Ventas" = "#FFDBA9",    # Pink
  "Gastos de Admin" = "#FF9F40",     # Coral
  "Gastos de Ventas" = "#FF8347",     # Tomato
  "Gastos Financieros" = "#FF5100"
)

# Explicitly categorize each 'clasificacion' into 'sales' or 'expenses'
combined_data <- combined_data %>%
  mutate(source_grouped = case_when(
    clasificacion == "Ventas en Deposito" ~ "sales",
    clasificacion == "Ventas en Efectivo" ~ "sales",
    clasificacion == "Ventas en Pagina" ~ "sales",
    clasificacion == "Costos de Ventas" ~ "expenses",
    clasificacion == "Gastos de Admin" ~ "expenses",
    clasificacion == "Gastos de Ventas" ~ "expenses",
    clasificacion == "Gastos Financieros" ~ "expenses",
    TRUE ~ "other"  # This is just a fallback case
  ))

# Summarize total sales and combined expenses across the entire dataset
total_summary <- combined_data %>%
  group_by(source_grouped, clasificacion) %>%
  summarize(total_amount = sum(as.numeric(cantidad), na.rm = TRUE)) %>% 
  mutate(total_sum = sum(total_amount))

total_sums <- total_summary %>%
  group_by(source_grouped) %>%
  summarize(total_sum = sum(total_amount))


# Bar graph of total sales vs. stacked combined expenses (Overall History) with custom colors
ggplot(total_summary, aes(x = source_grouped,
                          y = total_amount,
                          fill = clasificacion)) +
  geom_bar(stat = "identity",
           position = "stack") +
  labs(title = "Graph 1: Total Sales vs. Expenses (Overall History)",
       y = "Total Amount (MXN)",
       x = "") +
  scale_fill_manual(values = custom_colors) +
  scale_y_continuous(labels = dollar_format(prefix = "$",
                                            big.mark = ",")) +
  theme_minimal() +
  geom_text(aes(label = scales::dollar(total_amount, 
                                       prefix = "$", 
                                       big.mark = ",")), 
            position = position_stack(vjust = 0.5), 
            size = 3, 
            color = "white")  +
  # Add specific text for each total sum from the total_sums table
  annotate("text", x = 1, y = total_sums$total_sum[1], 
           label = scales::dollar(total_sums$total_sum[1],
                                  prefix = "$",
                                  big.mark = ","), 
           vjust = -0.5,
           size = 2.5,
           fontface = "bold",
           color = "#FF7347") +
  annotate("text",
           x = 2,
           y = total_sums$total_sum[2], 
           label = scales::dollar(total_sums$total_sum[2],
                                  prefix = "$",
                                  big.mark = ","), 
           vjust = -0.2,
           size = 2.5,
           fontface = "bold",
           color = "darkgreen")

4.2 Gráfico 2: Ventas totales vs. gastos por año

Code
# Summarize total sales and expenses per year
yearly_summary <- combined_data %>%
  group_by(año, source_grouped, clasificacion) %>%
  summarize(total_amount = sum(as.numeric(cantidad), na.rm = TRUE)) %>%
  ungroup()

# Bar graph of total sales vs. expenses per year with facets and annotations
ggplot(yearly_summary, aes(x = source_grouped, y = total_amount, fill = clasificacion)) +
  geom_bar(stat = "identity", position = "stack") +
  labs(title = "Graph 2: Total Sales vs. Expenses per Year", y = "Total Amount (MXN)", x = "") +
  scale_fill_manual(values = custom_colors) +
  scale_y_continuous(labels = dollar_format(prefix = "$", big.mark = ",")) +
  theme_minimal() +
  facet_wrap(~año) 

4.3 Gráfico 3: Ventas totales vs. gastos por un tiempo determinado

Code
# Assuming combined_data has a date column named "fecha"
month_filter <- 13

# POR REVISAR CON REGINA

# Filter data for the last XX months
#filtered_data <- combined_data %>%
#  mutate(fecha = as.Date(fecha, format = "%d/%m/%Y")) %>%  # Ensure "fecha" is in Date format
#  filter(fecha >= as.Date(Sys.Date() %m-% months(month_filter))) # Filter last XXX months

# Ensure combined_data has a properly formatted "fecha" column
filtered_data <- combined_data %>%
  mutate(fecha = as.Date(fecha, format = "%d/%m/%Y")) %>%  # Convert to Date format
  filter(
    year(fecha) * 12 + month(fecha) >= year(Sys.Date()) * 12 + month(Sys.Date()) - month_filter)


# Summarize total sales and expenses per year and other groupings
filtered_summary <- filtered_data %>%
  mutate(año = year(fecha)) %>%  # Extract year from fecha
  group_by(source_grouped, clasificacion) %>%
  summarize(total_amount = sum(as.numeric(cantidad), na.rm = TRUE)) %>%
  ungroup()

filtered_sums <- filtered_summary %>%
  group_by(source_grouped) %>%
  summarize(total_sum = sum(total_amount))


# Assuming filtered_data has already been created and summarized as 'yearly_summary'
ggplot(filtered_summary, aes(x = source_grouped, y = total_amount, fill = clasificacion)) +
  geom_bar(stat = "identity", position = "stack") +
  labs(title = paste0("Graph 3: Total Sales vs. Expenses (Last ", month_filter," Months)"), 
       y = "Total Amount (MXN)", 
       x = "") +
  scale_fill_manual(values = custom_colors) +
  scale_y_continuous(labels = scales::dollar_format(prefix = "$", big.mark = ",")) +
  theme_minimal() +
  
  # Adding text labels inside the bars
  geom_text(aes(label = scales::dollar(total_amount, prefix = "$", big.mark = ",")), 
            position = position_stack(vjust = 0.5), 
            size = 2.5, color = "white") +

  # Annotate specific sums on the chart (e.g., for each category)
  annotate("text", x = 1, y = filtered_sums$total_sum[1], 
           label = scales::dollar(filtered_sums$total_sum[1], prefix = "$", big.mark = ","), 
           vjust = -0.5, size = 3, fontface = "bold", color = "#FF7347") +
  
  annotate("text", x = 2, y = filtered_sums$total_sum[2], 
           label = scales::dollar(filtered_sums$total_sum[2], prefix = "$", big.mark = ","), 
           vjust = -0.2, size = 3, fontface = "bold", color = "darkgreen")

4.4 Análisis de variación

Code
# Cleaned dataframe manipulation
monthly_summary <- combined_data %>% 
  rename(month = mes) %>% 
  group_by(source_grouped, source, año, month) %>% 
  summarise(total_amount = sum(cantidad)) %>% 
  mutate(month_num = match(tolower(month), tolower(month.abb)),
         month_abbr = month(month_num, label = TRUE, abbr = TRUE),  
         fecha = make_date(year = año, 
                           month = month_num, 
                           day = days_in_month(month_num))) %>%
  arrange(año, month_num, source_grouped)

Gráfico 4: Box Plot

Code
# Create a filtered monthly summary using the already-filtered data (filtered_data)
filtered_monthly_summary <- filtered_data %>%
  rename(month = mes) %>%  # Assuming 'mes' exists as the month column in the data
  group_by(source_grouped, source, año, month) %>% 
  summarise(total_amount = sum(as.numeric(cantidad), na.rm = TRUE)) %>%
  
  # Add month number and abbreviation
  mutate(month_num = match(tolower(month), tolower(month.abb)),
         month_abbr = month(month_num, label = TRUE, abbr = TRUE),
         fecha = make_date(year = año, month = month_num, day = days_in_month(month_num)))

# Create the interactive boxplot with Plotly
p <- plot_ly(
  data = filtered_monthly_summary,
  x = ~total_amount,
  y = ~source, 
  type = "box",
  color = ~source, 
  boxpoints = "outliers",  # Show only outliers
  jitter = 0.3, 
  pointpos = -1.8) %>%
  layout(
    title = list(
      text = paste0("Graph 4: Distribution of Expenses by Classification<br><sup>(Last ", month_filter, " Months)</sup>"),
      font = list(size = 18, family = "Arial", color = "black")),
    xaxis = list(title = "Total Monthly Sum (MXN)", tickformat = "$,.0f"),
    yaxis = list(title = "Classification"),
    showlegend = FALSE)

# Display the interactive plot
p

Caja (IQR): La caja representa el rango intercuartílico. Contiene el 50% central de los datos.

Parte izquierda de la caja (Q1): Este es el primer cuartil, o el percentil 25. Esto significa que el 25% de los datos están sobre la linea de la izauierda (entre la caja y la última linea representando el valor mínimo).

Parte derecha de la caja (Q3): Este es el tercer cuartil, o el percentil 75. Esto significa que el 75% de los datos están sobre la linea del lado derecgo (entre la caja y la última linea vertical representandp el valor máximo).

La línea dentro de la caja representa la mediana.

Gráfico 5: Ventas y gastos a lo largo del tiempo

Code
# Group and summarize monthly_summary

monthly_grouped_summary <- monthly_summary %>%
  group_by(source_grouped, año, month_num, month_abbr, fecha) %>%
  summarise(total_amount = sum(total_amount, na.rm = TRUE), .groups = "drop") %>% 
  arrange(año, month_num, source_grouped)


t <- monthly_grouped_summary %>%
  ggplot(aes(x = fecha, y = total_amount, fill = source_grouped, color = source_grouped)) +
  geom_area(alpha = 0.8, position = "identity") +  # Prevent stacking of areas
  geom_line(size = 0.2) +  # Ensure the lines are visible
  scale_y_continuous(labels = scales::dollar_format(prefix = "$", big.mark = ",")) +
  scale_fill_manual(values = c("expenses" = "#FC9272", "sales" = "#9ECBEA")) +  # Match area colors
  scale_color_manual(values = c("expenses" = "#DE2D26", "sales" = "#3182BD")) +  # Match line colors
  labs(
    title = "Graph 5: Monthly Amounts Over Time by Source Group",
    x = "Date",
    y = "Total Amount (MXN)",
    fill = "Source Group",
    color = "Source Group"
  ) +
  theme_minimal()

# Use ggplotly with custom tooltips
ggplotly(tooltip = c("x", "y", "color"))

Gráfico 6: Comparación Mensual

Code
# Bar graph comparing sales and expenses per month, faceted by year
ggplot(monthly_grouped_summary, aes(x = month_abbr, y = total_amount, fill = source_grouped)) +
  geom_bar(stat = "identity", position = "dodge", alpha = 0.85) +
  scale_y_continuous(labels = dollar_format(prefix = "$", big.mark = ",")) +
  labs(title = "Graph 6: Monthly Comparison of Sales and Expenses Faceted by Year",
       x = "Month",
       y = "Total Amount",
       fill = "Category") +
  facet_wrap(~ year(fecha), ncol = 1) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Code
# Calculate the yearly totals and profit
yearly_summary_table <- monthly_summary %>%
  group_by(year = year(fecha)) %>%
  summarise(
    total_expenses = sum(total_amount[source_grouped == "expenses"]),
    total_sales = sum(total_amount[source_grouped == "sales"]),
    profit = total_sales - total_expenses) %>%
  mutate(
    total_expenses = scales::dollar(total_expenses, prefix = "$", big.mark = ","),
    total_sales = scales::dollar(total_sales, prefix = "$", big.mark = ","),
    profit = scales::dollar(profit, prefix = "$", big.mark = ","))
Code
# Calculate the monthly totals for all years
monthly_summary_table <- monthly_summary %>%
  group_by(year = year(fecha), month_abbr) %>%
  summarise(
    total_expenses = sum(total_amount[source_grouped == "expenses"]),
    total_sales = sum(total_amount[source_grouped == "sales"]),
    profit = total_sales - total_expenses
  ) %>%
  arrange(year, match(month_abbr, month.abb)) %>%
  mutate(
    total_expenses = scales::dollar(total_expenses, prefix = "$", big.mark = ","),
    total_sales = scales::dollar(total_sales, prefix = "$", big.mark = ","),
    profit = scales::dollar(profit, prefix = "$", big.mark = ","))

Gráfico 7: Ganancia o perdida mensual

Code
# Ensure that 'profit' is numeric
monthly_summary_table <- monthly_summary_table %>%
  mutate(profit_numeric = as.numeric(gsub("[$,]", "", profit)),
         profit_formatted = scales::comma(profit_numeric), # Format with commas
         date = as.Date(paste(year, month_abbr, "01", sep = "-"), 
                        format = "%Y-%b-%d"))

# Create the ggplot object
p <- ggplot(monthly_summary_table, aes(x = date, 
                                       y = profit_numeric, 
                                       fill = profit_numeric > 0, 
                                       text = paste("Date: ", format(date, "%b %Y"), 
                                                    "<br>Profit: $", profit_formatted))) + # Use formatted profit
  geom_bar(stat = "identity") +
  scale_y_continuous(labels = dollar_format(prefix = "$", big.mark = ",")) +
  scale_x_date(date_labels = "%Y", 
               date_breaks = "1 year", 
               minor_breaks = NULL) + 
  scale_fill_manual(values = c("TRUE" = "darkgreen", "FALSE" = "coral")) +
  labs(
    title = "Graph 7: Monthly Profits Over Time (2020-2024)",
    x = "Year",
    y = "Profit ($)"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 0, 
                                   hjust = 1, 
                                   vjust = 0.5),
    legend.position = "none")

# Convert the ggplot object to an interactive plotly object
p_interactive <- ggplotly(p, tooltip = "text")

# Display the interactive plot
p_interactive

4.5 Razón de gastos sobre ventas

Tabla 3: Cuadro resumen anual

Code
## SUPER IMPORTANT FACTORS

# Function to remove dollar signs and commas, and convert to numeric
clean_numeric <- function(x) {
  as.numeric(gsub("[\\$,]", "", x))
}


yearly_summary_table <- yearly_summary_table %>%
  mutate(total_expenses = clean_numeric(total_expenses),
         total_sales = clean_numeric(total_sales),
         profit = clean_numeric(profit))


yearly_summary_table <- yearly_summary_table %>%
  mutate(year = as.character(year))


yearly_summary_table <- yearly_summary_table %>%
  mutate(expenses_per_dollar = ifelse(total_sales == 0, NA, total_expenses / total_sales))


summary_row <- yearly_summary_table %>%
  summarise(year = "All", 
            total_expenses = sum(total_expenses, na.rm = TRUE),
            total_sales = sum(total_sales, na.rm = TRUE),
            profit = sum(profit, na.rm = TRUE)) %>%
  mutate(expenses_per_dollar = ifelse(total_sales == 0, NA, total_expenses / total_sales))

# Step 5: Add the summary row to the original data frame
yearly_summary_table_with_total <- yearly_summary_table %>%
  bind_rows(summary_row)


# Format the numeric columns with $ and commas
formatted_table <- yearly_summary_table_with_total %>%
  mutate(
    total_expenses = dollar(total_expenses),
    total_sales = dollar(total_sales),
    profit = dollar(profit))

# Simple formatted table with kable
formatted_table %>%
  kable(format = "html", caption = "Table 3: Yearly Summary Table with Total", align = "c") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Table 3: Yearly Summary Table with Total
year total_expenses total_sales profit expenses_per_dollar
2021 $205,621 $274,423 $68,802.26 0.7492849
2022 $772,048 $738,756 -$33,291.55 1.0450649
2023 $1,258,615 $1,303,699 $45,084.12 0.9654184
2024 $1,703,863 $1,626,886 -$76,977.35 1.0473155
All $3,940,147 $3,943,764 $3,617.48 0.9990829
Code
# Step 1: Calculate expenses_per_dollar by pivoting and performing division
monthly_expenses_sales <- monthly_grouped_summary %>%
  pivot_wider(names_from = source_grouped, values_from = total_amount) %>%
  mutate(
    expenses_per_dollar = ifelse(sales == 0, NA, expenses / sales))

monthly_expenses_sales <- monthly_expenses_sales %>%
  arrange(año, month_num)

Gráfico 8: Ratio de gasto por “dolar” vendido

Code
plot_data <- monthly_expenses_sales %>%
  mutate(
    adjusted_expenses_per_dollar = expenses_per_dollar - 1,  # Subtract 1
    bar_color = ifelse(adjusted_expenses_per_dollar > 0, "darkred", "darkgreen"),  # Assign colors
    month_label = paste(month_abbr, año)  # Create a combined label for months
  ) %>%
  arrange(fecha) %>%  # Sort by fecha (chronological order)
  mutate(month_label = factor(month_label, levels = unique(month_label)))  # Ensure chronological order

unique_years <- plot_data %>%
  distinct(año) %>%
  arrange(año)

year_start_positions <- which(levels(fct_rev(plot_data$month_label)) %in% paste("Jan", unique_years$año))
offset_positions <- year_start_positions + 0.5  # Shift the lines slightly upward

# Step 2: Create the bar graph with reversed y-axis
ggplot(plot_data, aes(x = adjusted_expenses_per_dollar,
                      y = fct_rev(month_label),
                      fill = bar_color)) +
  geom_bar(stat = "identity",
           show.legend = FALSE,
           alpha = 0.8) +  # Horizontal bars
  geom_vline(xintercept = 0,
             linetype = "solid",
             color = "gray20",
             size = 1) +
  geom_hline(yintercept = offset_positions,,
             linetype = "dotted",
             alpha = 0.5,
             color = "gray10",
             size = 0.7) + 
  geom_text(aes(label = round(adjusted_expenses_per_dollar, 2),
                hjust = ifelse(adjusted_expenses_per_dollar > 0, -0.2, 1.2)),
            color = "black",
            size =2.8) +  # Add labels to the bars
  scale_fill_identity() + 
  #scale_y_discrete(breaks = plot_data$month_label[seq(1, nrow(plot_data), by = 3)]) +
  scale_x_continuous(labels = dollar_format(prefix = "$", big.mark = ",")) +
  labs(
    title = "Graph 8: Expense Per Dollar Sold",
    x = "Net per dolar sold",
    y = "Month",
    caption = "Bars to the left (green) indicate profits; bars to the right (red) indicate losses."
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.3, size = 14),  # Center and style title
    axis.title.x = element_text(hjust = 0.3, size = 12),  # Center x-axis title
    axis.text.y = element_text(size = 7),  # Adjust size of month labels
    axis.title.y = element_text(size = 12),
    plot.caption = element_text(hjust = 0))

4.6 Gráfico 9: Gastos por producto a lo largo del tiempo

Code
# Data processing and filtering
expenses_details <- expenses_data %>%
  rename(month = mes) %>%
  group_by(clasificacion, año, month, producto) %>%
  summarise(total_amount = sum(cantidad), .groups = "drop") %>%
  mutate(
    month_num = match(tolower(month), tolower(month.abb)),
    mes = month(month_num, label = TRUE, abbr = TRUE),
    fecha = make_date(year = año, month = month_num, day = days_in_month(month_num)))

# Select a color palette
n_colors <- length(unique(expenses_details$producto))  
color_palette <- colorRampPalette(brewer.pal(9, "Set3"))(n_colors)  

# Create the ggplot
expense_plot <- ggplot(expenses_details, aes(x = fecha, y = total_amount, color = producto, group = producto)) +
  geom_line() +
  scale_color_manual(values = color_palette) +  
  scale_y_continuous(labels = dollar_format(prefix = "$", big.mark = ",")) +  # Keep it dynamic
  labs(
    title = "Graph 9: Monthly Expenses by Product Category",
    subtitle = "Comparison of key expense categories over time",
    x = "Date",
    y = "Total Amount (MXN)",
    color = "Product Category") +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold",
                              hjust = 0.5),
    plot.subtitle = element_text(face = "italic",
                                 hjust = 0.5),
    axis.text.x = element_text(angle = 45,
                               hjust = 1),
    legend.position = "bottom"
  )

# Convert to interactive Plotly plot with fully dynamic y-axis
interactive_plot <- ggplotly(expense_plot) %>%
  layout(
    autosize = TRUE,
    yaxis = list(
      autorange = TRUE,  # **Dynamic y-axis scaling**
      tickformat = "$,.0f",   # Ensures currency formatting
      showgrid = TRUE,        # Keeps gridlines visible
      ticks = "outside",      # Ensures tick marks are visible
      tickmode = "auto"       # Automatically adjusts tick spacing
    )
  )

# Display the interactive plot
interactive_plot

5. Análisis de Ventas

Code
# Calculate total sales and add necessary columns
total_sales <- sales_data_long %>%
  group_by(fecha) %>%
  summarize(cantidad = sum(cantidad, na.rm = TRUE)) %>%
  mutate(
    clasificacion = "Total",
    source = "Ventas",          
    año = year(fecha),           
    mes = month(fecha, label = TRUE, abbr = TRUE),  
    quarter = quarter(fecha))

# Combine the total sales with the filtered data
sales_data_plot <- bind_rows(sales_data_long, total_sales) %>%
  filter(!(año == 2020 & mes %in% c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug"))) %>% 
  filter(!(año == 2024 & mes %in% c("Sep", "Oct", "Nov", "Dec"))) 


# Choose a color palette
palette <- brewer.pal(n = 4, name = "Set1")  

5.1 Gráfico 10: Tendencias de ventas por plataforma

Code
sales_data_plot <- sales_data_plot %>%
  group_by(clasificacion) %>%
  arrange(fecha) %>%
  mutate(month_count = row_number()) %>%
  ungroup()

# Create linear models for each 'clasificacion'
models <- sales_data_plot %>%
  group_by(clasificacion) %>%
  do(model = lm(cantidad ~ as.numeric(month_count), data = .))

# Extract model coefficients and create equation strings
models <- models %>%
  mutate(
    slope = round(coef(model)[2], 2),
    intercept = round(coef(model)[1], 2),
    equation = paste0("y = ", intercept, " + ", slope, " * x"),
    y_position = seq(from = max(sales_data_plot$cantidad), 
                     to = max(sales_data_plot$cantidad) * 2000, # Spread out the y positions
                     length.out = n()),
    x_position = as.Date("2021-01-01") # Set all x positions to the same date
  )
# Choose a color palette
palette <- brewer.pal(n = 4, name = "Set1")  

# Create ggplot
p <- ggplot(sales_data_plot, aes(x = fecha, y = cantidad, color = clasificacion)) +
  geom_line(data = . %>% filter(clasificacion == "Total"), size = 0.8) + 
  geom_line(data = . %>% filter(clasificacion != "Total"), size = 0.5, alpha = 0.5) +
  geom_smooth(method = "lm", se = F, size =0.5) +
  scale_color_manual(values = palette) +
  geom_text(
    data = models,
    aes(x = x_position, 
        y = y_position, 
        label = equation, 
        color = clasificacion), 
    hjust = 0, 
    vjust = 0, 
    size = 3.5, inherit.aes = FALSE) +
  scale_y_continuous(labels = scales::dollar_format(prefix = "$")) +  
  labs(title = "Graph 10: Sales Trends by Platform",
       x = "Date",
       y = "Sales Quantity") +
  theme_minimal()

# Convert to interactive plotly plot
ggplotly(p)

5.2 Tabla 4: Modelos lineales y resultados

Code
# Create a sequential count column for each classification
sales_data_plot <- sales_data_plot %>%
  group_by(clasificacion) %>%
  arrange(fecha) %>%
  mutate(month_count = row_number()) %>%
  ungroup()

# Create the linear model using the new count column
model_table_filtered <- sales_data_plot %>%
  group_by(clasificacion) %>%
  do(tidy(lm(cantidad ~ month_count, data = .))) %>%
  filter(term == "month_count") %>%
  mutate(
    significance = case_when(
      p.value < 0.001 ~ "***",
      p.value < 0.01 ~ "**",
      p.value < 0.05 ~ "*",
      p.value < 0.1 ~ ".",
      TRUE ~ ""
    )
  ) %>%
  select(
    Classification = clasificacion,
    Estimate = estimate,
    `Std. Error` = std.error,
    `t-value` = statistic,
    `p-value` = p.value,
    Significance = significance
  )

# Print the table with styling
kable(model_table_filtered, format = "html", caption = "Table 4: Linea Models per Sales Clasification") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Table 4: Linea Models per Sales Clasification
Classification Estimate Std. Error t-value p-value Significance
Deposito 1767.54556 211.01173 8.376528 0.000000 ***
Efectivo -63.14348 22.70274 -2.781315 0.008072 **
Pagina 1525.73976 128.69518 11.855454 0.000000 ***
Total 3230.14184 226.65003 14.251672 0.000000 ***

Resúmen de Estadística de Ventas

  • Depósito: Aumento estadísticamente significativo de $1,768 MXN por mes (p < 0.001).

  • Efectivo: Disminución estadísticamente significativa de -$63 MXN por mes (p < 0.001).

  • Página: Aumento estadísticamente significativo de $1,526 MXN por mes (p < 0.001).

Total: Aumento estadísticamente significativo de $3,230.MXN por mes (p < 0.001).

6. Estado de Resultados

La tabla de estado de resultados general muestra un resumen financiero de Nala. Muestra las ventas, costos, y la utilidad neta.

Code
# Step 1: Process and calculate the financial metrics
estado_resultados <- combined_data %>%
  group_by(año, clasificacion) %>%
  summarize(total_cantidad = sum(cantidad, na.rm = TRUE)) %>%
  pivot_wider(names_from = clasificacion, values_from = total_cantidad, values_fill = 0) %>%
  mutate(
    `Ventas en Deposito` = `Ventas en Deposito`,
    `Ventas en Efectivo` = `Ventas en Efectivo`,
    `Ventas en Pagina` = `Ventas en Pagina`,
    Ventas = `Ventas en Deposito` + `Ventas en Efectivo` + `Ventas en Pagina`,
    `Costo de Ventas` = `Costos de Ventas`,
    `Utilidad Bruta` = Ventas - `Costo de Ventas`,
    `Gastos de Ventas` = `Gastos de Ventas`,
    `Gastos Administrativos` = `Gastos de Admin`,
    `Utilidad de Operacion` = `Utilidad Bruta` - `Gastos de Ventas` - `Gastos Administrativos`,
    `Gastos Financieros` = `Gastos Financieros`,
    `Otros Gastos `= 0,        
    `Utilidad Periodo` = `Utilidad de Operacion` - `Gastos Financieros`,
    `Utilidad Acumulada` = 0,
    `Utilidad Neta` = `Utilidad de Operacion`)

# Step 2: Calculate Utilidad Acumulada and Utilidad Neta
for (i in 2:nrow(estado_resultados)) {
  estado_resultados$Utilidad_Acumulada[i] <- estado_resultados$Utilidad_Neta[i - 1]
  estado_resultados$Utilidad_Neta[i] <- estado_resultados$Utilidad_Periodo[i] + estado_resultados$Utilidad_Acumulada[i]
}

# Step 3: Format numbers as currency
estado_resultados <- estado_resultados %>%
  mutate(across(where(is.numeric), ~ paste0("$ ", formatC(round(.), format = "f", big.mark = ",", digits = 0))))

# Step 4: Transpose the table and reorder the rows
desired_order <- c(
  "Ventas en Deposito",
  "Ventas en Efectivo",
  "Ventas en Pagina",
  "Ventas", 
  "Costo de Ventas", 
  "Utilidad Bruta", 
  "Gastos de Ventas", 
  "Gastos Administrativos", 
  "Utilidad de Operacion", 
  "Gastos Financieros", 
  "Otros Gastos", 
  "Utilidad Periodo", 
  "Utilidad Acumulada", 
  "Utilidad Neta")

estado_resultados <- estado_resultados %>%
  pivot_longer(-año, names_to = "Metrica", values_to = "Value") %>%
  mutate(Metrica = factor(Metrica, levels = desired_order)) %>%
  arrange(Metrica) %>%
  pivot_wider(names_from = año, values_from = Value) %>% 
  na.omit() %>%
  mutate(across(where(is.numeric), scales::dollar_format(prefix = "$")))

6.1 Tabla 5. Estado de resultados

Code
estado_resultados %>%
  kable(format = "html", caption = "Table 5: Yearly Balance Sheet") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
  row_spec(c(4), bold = TRUE) %>% 
  row_spec(c(4), background = "#E6F9E6", bold = TRUE) %>% 
  row_spec(c(6, 9, 12), background = "lightyellow", bold = TRUE)
Table 5: Yearly Balance Sheet
Metrica 2021 2022 2023 2024
Ventas en Deposito $ 112,554 $ 483,896 $ 730,154 $ 812,896
Ventas en Efectivo $ 50,495 $ 46,355 $ 31,318 $ 35,906
Ventas en Pagina $ 111,374 $ 208,505 $ 542,228 $ 778,084
Ventas $ 274,423 $ 738,756 $ 1,303,699 $ 1,626,886
Costo de Ventas $ 176,794 $ 429,597 $ 733,346 $ 828,552
Utilidad Bruta $ 97,629 $ 309,159 $ 570,353 $ 798,334
Gastos de Ventas $ 10,107 $ 34,463 $ 104,373 $ 164,248
Gastos Administrativos $ 18,720 $ 307,988 $ 420,896 $ 618,364
Utilidad de Operacion $ 68,802 $ -33,292 $ 45,084 $ 15,722
Gastos Financieros $ 0 $ 0 $ 0 $ 92,699
Utilidad Periodo $ 68,802 $ -33,292 $ 45,084 $ -76,977
Utilidad Acumulada $ 0 $ 0 $ 0 $ 0
Utilidad Neta $ 68,802 $ -33,292 $ 45,084 $ 15,722

6.2 Tabla 6: Cálculo de márgenes y porcentajes

La tabla de márgenes presenta los márgenes financieros como porcentaje de las ventas. Estos márgenes ayudan a evaluar la eficiencia y rentabilidad de Nala, mostrando cómo los costos y gastos impactan las ganancias.

Code
# Convert all monetary values to numeric
estado_resultados <- estado_resultados %>%
  mutate(across(`2021`:`2024`, ~ as.numeric(str_remove_all(.x, "\\$|,"))))

# Now, calculate the percentages using the numeric values
ventas <- estado_resultados %>%
  filter(Metrica == "Ventas") %>%
  select(-Metrica)

# Function to calculate percentages for each row based on 'Ventas'
calculate_percentages <- function(row, ventas) {
  if (row["Metrica"] == "Ventas") {
    # Set Ventas percentage to 100 for display purposes
    percentages <- rep(100, length(row) - 1)
  } else {
    # Calculate percentages
    percentages <- as.numeric(row[-1]) / ventas * 100
    percentages <- round(percentages, 1)  # Round to one decimal
  }
  # Return the row with percentages formatted with a percentage sign
  percentages <- paste0(percentages, "%")
  return(c(row[1], percentages))
}

# Apply the function to each row
percentage_results <- t(apply(estado_resultados, 1, calculate_percentages, ventas = as.numeric(ventas)))

# Convert the results back to a dataframe and update column names
percentage_results_df <- as.data.frame(percentage_results)
colnames(percentage_results_df) <- colnames(estado_resultados)


# Print the percentage results dataframe with kable
kable(percentage_results_df, format = "html", caption = "Table 6: Balance Sheet (%)") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
  row_spec(c(4), background = "#E6F9E6", bold = TRUE) %>% 
  row_spec(c(6, 9, 12), background = "lightyellow", bold = TRUE)
Table 6: Balance Sheet (%)
Metrica 2021 2022 2023 2024
Ventas en Deposito 41% 65.5% 56% 50%
Ventas en Efectivo 18.4% 6.3% 2.4% 2.2%
Ventas en Pagina 40.6% 28.2% 41.6% 47.8%
Ventas 100% 100% 100% 100%
Costo de Ventas 64.4% 58.2% 56.3% 50.9%
Utilidad Bruta 35.6% 41.8% 43.7% 49.1%
Gastos de Ventas 3.7% 4.7% 8% 10.1%
Gastos Administrativos 6.8% 41.7% 32.3% 38%
Utilidad de Operacion 25.1% -4.5% 3.5% 1%
Gastos Financieros 0% 0% 0% 5.7%
Utilidad Periodo 25.1% -4.5% 3.5% -4.7%
Utilidad Acumulada 0% 0% 0% 0%
Utilidad Neta 25.1% -4.5% 3.5% 1%

6.3 Tabla 7: Crecimiento de Nala porcentual

Code
# Calculate year-over-year growth for all relevant metrics
ventas_growth <- estado_resultados %>%
  filter(Metrica == "Ventas") %>%
  mutate(
    Growth_2022 = scales::percent((`2022` - `2021`) / `2021`),
    Growth_2023 = scales::percent((`2023` - `2022`) / `2022`),
    Growth_2024 = scales::percent((`2024` - `2023`) / `2023`)
  )


# Cost of Goods Sold (Costo_de_Ventas)
cost_growth <- estado_resultados %>%
  filter(Metrica == "Costo de Ventas") %>%
  mutate(
    Growth_2022 = scales::percent((`2022` - `2021`) / `2021`),
    Growth_2023 = scales::percent((`2023` - `2022`) / `2022`),
    Growth_2024 = scales::percent((`2024` - `2023`) / `2023`)
  )

# Expenses (Gastos_de_ventas and Gastos_administrativos)
expenses_growth <- estado_resultados %>%
  filter(Metrica %in% c("Gastos de Ventas", "Gastos Administrativos")) %>%
  mutate(
    Growth_2022 = scales::percent((`2022` - `2021`) / `2021`),
    Growth_2023 = scales::percent((`2023` - `2022`) / `2022`),
    Growth_2024 = scales::percent((`2024` - `2023`) / `2023`)
  )

# Utility (Utilidad_Bruta, Utilidad_de_Operacion, Utilidad_Neta)
utility_growth <- estado_resultados %>%
  filter(Metrica %in% c("Utilidad Bruta", "Utilidad de Operacion", "Utilidad Neta")) %>%
  mutate(
    Growth_2022 = scales::percent((`2022` - `2021`) / `2021`),
    Growth_2023 = scales::percent((`2023` - `2022`) / `2022`),
    Growth_2024 = scales::percent((`2024` - `2023`) / `2023`)
  )

# Combine all into a single data frame
combined_growth <- bind_rows(ventas_growth, cost_growth, expenses_growth, utility_growth) %>%
  select(Metrica, Growth_2022, Growth_2023, Growth_2024)

# Display the combined growth table
combined_growth %>%
  kable(format = "html", caption = "Table 7: Year-over-Year Growth (%)", align = 'c') %>%
  kable_styling(bootstrap_options = c("hover", "condensed"), full_width = F) %>%
  row_spec(c(1), background = "#E6F9E6", bold = TRUE) %>% 
  row_spec(c(5, 7), background = "lightyellow", bold = TRUE)
Table 7: Year-over-Year Growth (%)
Metrica Growth_2022 Growth_2023 Growth_2024
Ventas 169% 76% 25%
Costo de Ventas 143% 71% 13%
Gastos de Ventas 241% 203% 57%
Gastos Administrativos 1 545% 37% 47%
Utilidad Bruta 217% 84% 40%
Utilidad de Operacion -148% -235% -65%
Utilidad Neta -148% -235% -65%