Financial Data Analysis of Nala

Author

Javier Patrón

Published

September 5, 2024

Análisis Financiero de Nala

Este documento proporciona un análisis de los datos financieros de la empresa Nala.

Índice

  1. Cargar bibliotecas necesarias

  2. Leer los datos de gastos y ventas de NALA 2020-2024

  3. Combinar los datos

  4. Análisis financiero

    4.1 Ventas totales vs. gastos

  1. Análisis de las ventas

    5.1 Gráficas: Ventas a lo largo del tiempo

    5.2 Tendencias de ventas por plataforma

    5.3 Modelos lineales y resultados

  2. Estado de Resultados

    6.1 Cálculo de Utilidades

    6.2 Cálculo de márgenes y porcentajes

1. Cargar bibliotecas

Code
library(tidyverse)  # For data manipulation and visualization; includes readr, dplyr, ggplot2
library(janitor)    # For cleaning data and making tabular outputs cleaner
library(scales)     # For scaling graphical outputs
library(RColorBrewer)  # For using color palettes in visualizations
library(plotly)     # For interactive web-based graphics
library(stringr)    # For string manipulation (also part of tidyverse but listed for clarity)
library(knitr)      # For dynamic report generation in R
library(kableExtra) # For enhancing 'kable' tables in R Markdown
library(broom)      # For converting statistical analysis objects into tidy data frames
library(DT)         # For creating interactive tables in R

2. Cargar los datos

2.1 Leer datos de gastos 2020-2024

Code
# Step 1: Read and clean the raw data, and rename it to raw_expense_data
raw_expense_data <- read_csv("/Users/javier.patron/Downloads/pagos Nala - Expenses Data Analisis (2).csv") %>%
  clean_names() %>%
  # Set the first row as column names and remove the first row
  { setNames(.[-1,], as.character(.[1,])) } %>%
  clean_names() %>%
  select_if(~ !all(is.na(.)))

# Step 2: Create three separate data frames for each set of columns
df1 <- raw_expense_data %>%
  select(clasificacion, fecha, empresa, cantidad, producto)

df2 <- raw_expense_data %>%
  select(clasificacion = clasificacion_2, fecha = fecha_2, empresa = empresa_2, cantidad = cantidad_2, producto = producto_2)

df3 <- raw_expense_data %>%
  select(clasificacion = clasificacion_3, fecha = fecha_3, empresa = empresa_3, cantidad = cantidad_3, producto = producto_3)

# Step 3: Combine all three data frames and name it as expenses_data
expenses_data <- bind_rows(df1, df2, df3) %>%
  # Remove rows where all columns are NA (if any)
  filter(!if_all(everything(), is.na))


# Clean the data
expenses_data <- expenses_data %>%
  mutate(fecha = lubridate::dmy(fecha),
         año = lubridate::year(fecha),
         mes = lubridate::month(fecha, label = TRUE),
         quarter = lubridate::quarter(fecha),
         
         # Convert parentheses numbers to negative and clean other symbols
         cantidad = as.numeric(ifelse(grepl("\\(", cantidad), 
                                      paste0("-", gsub("[^0-9.]", "", cantidad)), 
                                      gsub("[^0-9.]", "", cantidad)))) %>% 
  
  mutate(source = case_when(
    str_detect(clasificacion, "Gastos") ~ "Gastos",
    str_detect(clasificacion, "Costos") ~ "Costos",
    TRUE ~ "other"  # This handles any other cases, if they exist
  )) %>% 
  select("source", "clasificacion", "fecha", "año", "mes", "quarter", "empresa", "cantidad", "producto")

2.2 Leer datos de ventas 2020-2024

Code
# Read the CSV file
sales_data <- read_csv("/Users/javier.patron/Downloads/pagos Nala - Ventas Data Analisis (1).csv") %>% 
  clean_names()

# Clean and Organize the data
sales_data_long <- sales_data %>%
  pivot_longer(cols = jan:dec, names_to = "month", values_to = "cantidad") %>%
  rename(año = ano, tipo_venta = tipo) %>%
  mutate(
    month = match(tolower(month), tolower(month.abb)),  # Convert month abbreviation to number
    mes = month(month, label = TRUE, abbr = TRUE),      # Convert to lubridate month format
    source = "Ventas",
    fecha = make_date(year = año, month = month(month), day = days_in_month(month(month, label = FALSE))),  # Add last day of the month
    clasificacion = tipo_venta,
    quarter = quarter(month),
    cantidad = as.numeric(gsub("[$,]", "", cantidad))) %>% 
  select("source", "clasificacion", "fecha", "año", "mes", "quarter" , "cantidad")

sales_data_long <- sales_data_long %>% 
  arrange(fecha) %>% 
  slice(1:168)

3. Combinar los datos

Code
combined_data <- full_join(
  expenses_data %>% select(-empresa, -producto), 
  sales_data_long) %>%
  mutate(
    clasificacion = case_when(
      clasificacion == "Gastos Admin" ~ "Gastos de Admin",
      clasificacion == "Gastos Ventas" ~ "Gastos de Ventas",
      clasificacion == "Costos Ventas" ~ "Costos de Ventas",
      clasificacion == "Deposito" ~ "Ventas en Deposito",
      clasificacion == "Efectivo" ~ "Ventas en Efectivo",
      clasificacion == "Pagina" ~ "Ventas en Pagina",
      TRUE ~ clasificacion  # Default case to handle any unexpected values
    )
  )

3.1 Tabla de Gastos

3.2 Tabla de Ventas

Code
# Create an interactive table for expenses_data with smaller font
datatable(sales_data, 
          options = list(autoWidth = TRUE, searching = TRUE), 
          caption = 'Expenses Data') %>%
  formatStyle(
    columns = names(sales_data),
    fontSize = '10px') 

4. Análisis Financiero

4.1 Ventas Totales vs. Gastos

Code
# Summarize total sales and expenses across the entire dataset
total_summary <- combined_data %>%
  group_by(source) %>%
  summarize(total_amount = sum(as.numeric(cantidad), na.rm = TRUE))

# Define a custom color palette similar to Set1 or Set2
custom_palette <- c("#E41A1C", "#377EB8", "#4DAF4A")

# Create the ggplot object
p <- ggplot(total_summary, aes(x = source, y = total_amount, fill = source)) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = custom_palette) +  # Apply the custom palette
  labs(title = "Total Sales vs. Expenses (Overall History)",
       y = "Total Amount (MXN)",
       x = "") +
  scale_y_continuous(labels = scales::dollar_format(prefix = "$")) +
  theme_minimal()

# Convert the ggplot2 plot to an interactive plotly graph
ggplotly(p)
Code
# Define custom colors based on classification
custom_colors <- c(
  "Ventas en Deposito" = "#6EBF74",  # Green
  "Ventas en Efectivo" = "#499F54",  # Green
  "Ventas en Pagina" = "#28703B",    # Green
  "Costos de Ventas" = "#FFDAB9",    # Pink
  "Gastos de Admin" = "#FF9F50",     # Coral
  "Gastos de Ventas" = "#FF7347"     # Tomato
)

# Explicitly categorize each 'clasificacion' into 'sales' or 'expenses'
combined_data <- combined_data %>%
  mutate(source_grouped = case_when(
    clasificacion == "Ventas en Deposito" ~ "sales",
    clasificacion == "Ventas en Efectivo" ~ "sales",
    clasificacion == "Ventas en Pagina" ~ "sales",
    clasificacion == "Costos de Ventas" ~ "expenses",
    clasificacion == "Gastos de Admin" ~ "expenses",
    clasificacion == "Gastos de Ventas" ~ "expenses",
    TRUE ~ "other"  # This is just a fallback case
  ))

# Summarize total sales and combined expenses across the entire dataset
total_summary <- combined_data %>%
  group_by(source_grouped, clasificacion) %>%
  summarize(total_amount = sum(as.numeric(cantidad), na.rm = TRUE)) %>% 
  mutate(total_sum = sum(total_amount))

total_sums <- total_summary %>%
  group_by(source_grouped) %>%
  summarize(total_sum = sum(total_amount))


# Bar graph of total sales vs. stacked combined expenses (Overall History) with custom colors
ggplot(total_summary, aes(x = source_grouped, y = total_amount, fill = clasificacion)) +
  geom_bar(stat = "identity", position = "stack") +
  labs(title = "Total Sales vs. Expenses (Overall History)", y = "Total Amount (MXN)", x = "") +
  scale_fill_manual(values = custom_colors) +
  scale_y_continuous(labels = dollar_format(prefix = "$", big.mark = ",")) +
  theme_minimal() +
  geom_text(aes(label = scales::dollar(total_amount, prefix = "$", big.mark = ",")), 
            position = position_stack(vjust = 0.5), 
            size = 4, color = "white")  +
  # Add specific text for each total sum from the total_sums table
  annotate("text", x = 1, y = total_sums$total_sum[1], 
           label = scales::dollar(total_sums$total_sum[1], prefix = "$", big.mark = ","), 
           vjust = -0.5, size = 5, fontface = "bold", color = "#FF7347") +
  annotate("text", x = 2, y = total_sums$total_sum[2], 
           label = scales::dollar(total_sums$total_sum[2], prefix = "$", big.mark = ","), 
           vjust = -0.2, size = 5, fontface = "bold", color = "darkgreen")

Code
# Summarize total sales and expenses per year
yearly_summary <- combined_data %>%
  group_by(año, source_grouped, clasificacion) %>%
  summarize(total_amount = sum(as.numeric(cantidad), na.rm = TRUE)) %>%
  ungroup()

# Bar graph of total sales vs. expenses per year with facets and annotations
ggplot(yearly_summary, aes(x = source_grouped, y = total_amount, fill = clasificacion)) +
  geom_bar(stat = "identity", position = "stack") +
  labs(title = "Total Sales vs. Expenses per Year", y = "Total Amount (MXN)", x = "") +
  scale_fill_manual(values = custom_colors) +
  scale_y_continuous(labels = dollar_format(prefix = "$", big.mark = ",")) +
  theme_minimal() +
  facet_wrap(~año) 

Code
# Assuming combined_data has a date column named "fecha"
month_filter <- 11

# Filter data for the last XX months
filtered_data <- combined_data %>%
  mutate(fecha = as.Date(fecha, format = "%d/%m/%Y")) %>%  # Ensure "fecha" is in Date format
  filter(fecha >= as.Date(Sys.Date() %m-% months(month_filter))) # Filter last XXX months

# Summarize total sales and expenses per year and other groupings
filtered_summary <- filtered_data %>%
  mutate(año = year(fecha)) %>%  # Extract year from fecha
  group_by(source_grouped, clasificacion) %>%
  summarize(total_amount = sum(as.numeric(cantidad), na.rm = TRUE)) %>%
  ungroup()

filtered_sums <- filtered_summary %>%
  group_by(source_grouped) %>%
  summarize(total_sum = sum(total_amount))


# Assuming filtered_data has already been created and summarized as 'yearly_summary'
ggplot(filtered_summary, aes(x = source_grouped, y = total_amount, fill = clasificacion)) +
  geom_bar(stat = "identity", position = "stack") +
  labs(title = paste0("Total Sales vs. Expenses (Last ", month_filter," Months)"), 
       y = "Total Amount (MXN)", 
       x = "") +
  scale_fill_manual(values = custom_colors) +
  scale_y_continuous(labels = scales::dollar_format(prefix = "$", big.mark = ",")) +
  theme_minimal() +
  
  # Adding text labels inside the bars
  geom_text(aes(label = scales::dollar(total_amount, prefix = "$", big.mark = ",")), 
            position = position_stack(vjust = 0.5), 
            size = 4, color = "white") +

  # Annotate specific sums on the chart (e.g., for each category)
  annotate("text", x = 1, y = filtered_sums$total_sum[1], 
           label = scales::dollar(filtered_sums$total_sum[1], prefix = "$", big.mark = ","), 
           vjust = -0.5, size = 5, fontface = "bold", color = "#FF7347") +
  
  annotate("text", x = 2, y = filtered_sums$total_sum[2], 
           label = scales::dollar(filtered_sums$total_sum[2], prefix = "$", big.mark = ","), 
           vjust = -0.2, size = 5, fontface = "bold", color = "darkgreen")

4.2 Análisis de Gastos

Box Plots

Code
# Cleaned dataframe manipulation
monthly_summary <- combined_data %>% 
  rename(month = mes) %>% 
  group_by(source_grouped, source, año, month) %>% 
  summarise(total_amount = sum(cantidad)) %>% 
  mutate(month_num = match(tolower(month), tolower(month.abb)),
         month_abbr = month(month_num, label = TRUE, abbr = TRUE),  
         fecha = make_date(year = año, 
                           month = month_num, 
                           day = days_in_month(month_num))) 
Code
# Create a filtered monthly summary using the already-filtered data (filtered_data)
filtered_monthly_summary <- filtered_data %>%
  rename(month = mes) %>%  # Assuming 'mes' exists as the month column in the data
  group_by(source_grouped, año, month) %>% 
  summarise(total_amount = sum(as.numeric(cantidad), na.rm = TRUE)) %>%
  
  # Add month number and abbreviation
  mutate(month_num = match(tolower(month), tolower(month.abb)),
         month_abbr = month(month_num, label = TRUE, abbr = TRUE),
         fecha = make_date(year = año, month = month_num, day = days_in_month(month_num)))

# Box plot graph using the filtered monthly summary
ggplot(filtered_monthly_summary, aes(x = total_amount, y = source_grouped, fill = source_grouped)) +
  geom_boxplot() +
  scale_x_continuous(labels = scales::dollar_format(prefix = "$", big.mark = ",")) +
  labs(title = paste0("Distribution of Monthly Amounts by Source Group (Last ", month_filter," Months)"),
       x = "Total Amount",
       y = "Source Group",
       fill = "Source Group") +
  theme_minimal()

Caja (IQR): La caja representa el rango intercuartílico. Contiene el 50% central de los datos.

Parte inferior de la caja (Q1): Este es el primer cuartil, o el percentil 25. Esto significa que el 25% de los datos están por debajo de este valor.

Parte superior de la caja (Q3): Este es el tercer cuartil, o el percentil 75. Esto significa que el 75% de los datos están por debajo de este valor, y el 25% están por encima.

Línea dentro de la caja: Esta línea representa la mediana.

Code
# Create a filtered monthly summary using the already-filtered data (filtered_data)
filtered_monthly_summary <- filtered_data %>%
  rename(month = mes) %>%  # Assuming 'mes' exists as the month column in the data
  group_by(source_grouped, source, año, month) %>% 
  summarise(total_amount = sum(as.numeric(cantidad), na.rm = TRUE)) %>%
  
  # Add month number and abbreviation
  mutate(month_num = match(tolower(month), tolower(month.abb)),
         month_abbr = month(month_num, label = TRUE, abbr = TRUE),
         fecha = make_date(year = año, month = month_num, day = days_in_month(month_num)))


# Improved boxplot using the existing monthly_summary dataframe
ggplot(filtered_monthly_summary, aes(x = total_amount, y = source, fill = source)) +
  geom_boxplot(outlier.colour = "red", outlier.shape = 16, outlier.size = 2) +
  scale_x_continuous(labels = dollar_format(prefix = "$", big.mark = ",")) +
  labs(title = "Distribution of Expenses by Classification",
       subtitle = paste0("(Last ", month_filter," Months)"),
       x = "Total Monthly Sum (MXN)",
       y = "Classification",
       fill = "Classification") +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold"),
    legend.position = "none"
  ) +
  scale_fill_brewer(palette = "Set2")

Code
monthly_summary %>% 
ggplot(aes(x = fecha, y = total_amount, color = source_grouped)) +
  geom_line() +
  scale_y_continuous(labels = dollar_format(prefix = "$", big.mark = ",")) +
  labs(title = "Monthly Amounts Over Time by Source Group",
       x = "Date",
       y = "Total Amount",
       color = "Source Group") +
  theme_minimal()

Code
# Bar graph comparing sales and expenses per month, faceted by year
ggplot(monthly_summary, aes(x = month_abbr, y = total_amount, fill = source_grouped)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_y_continuous(labels = dollar_format(prefix = "$", big.mark = ",")) +
  labs(title = "Monthly Comparison of Sales and Expenses Faceted by Year",
       x = "Month",
       y = "Total Amount",
       fill = "Category") +
  facet_wrap(~ year(fecha), ncol = 1) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Tablas Resumen

Code
# Calculate the yearly totals and profit
yearly_summary_table <- monthly_summary %>%
  group_by(year = year(fecha)) %>%
  summarise(
    total_expenses = sum(total_amount[source_grouped == "expenses"]),
    total_sales = sum(total_amount[source_grouped == "sales"]),
    profit = total_sales - total_expenses) %>%
  mutate(
    total_expenses = scales::dollar(total_expenses, prefix = "$", big.mark = ","),
    total_sales = scales::dollar(total_sales, prefix = "$", big.mark = ","),
    profit = scales::dollar(profit, prefix = "$", big.mark = ","))

kable(yearly_summary_table, align = "c", caption = "Year-by-Year Financial Summary") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Year-by-Year Financial Summary
year total_expenses total_sales profit
2020 $103,630 $55,832 -$47,798.85
2021 $256,209 $274,423 $18,214.26
2022 $772,048 $738,756 -$33,291.55
2023 $1,253,615 $1,303,699 $50,084.12
2024 $1,077,319 $1,022,979 -$54,340.24
Code
# Calculate the monthly totals for all years
monthly_summary_table <- monthly_summary %>%
  group_by(year = year(fecha), month_abbr) %>%
  summarise(
    total_expenses = sum(total_amount[source_grouped == "expenses"]),
    total_sales = sum(total_amount[source_grouped == "sales"]),
    profit = total_sales - total_expenses
  ) %>%
  arrange(year, match(month_abbr, month.abb)) %>%
  mutate(
    total_expenses = scales::dollar(total_expenses, prefix = "$", big.mark = ","),
    total_sales = scales::dollar(total_sales, prefix = "$", big.mark = ","),
    profit = scales::dollar(profit, prefix = "$", big.mark = ","))

Tabla resumen de ganancias y perdidas mes con mes

Code
# Create an interactive table for expenses_data with smaller font
datatable(monthly_summary_table, 
          options = list(autoWidth = TRUE, searching = TRUE), 
          caption = 'monthly_summary_table') %>%
  formatStyle(
    columns = names(monthly_summary_table),
    fontSize = '14px') 

Gráfica final de ganancias y perdidas por mes

Code
# Ensure that 'profit' is numeric
monthly_summary_table <- monthly_summary_table %>%
  mutate(profit_numeric = as.numeric(gsub("[$,]", "", profit)),
         date = as.Date(paste(year, month_abbr, "01", sep = "-"), 
                        format = "%Y-%b-%d"))

# Create the ggplot object
p <- ggplot(monthly_summary_table, aes(x = date, 
                                       y = profit_numeric, 
                                       fill = profit_numeric > 0, 
                                       text = paste("Date: ", format(date, "%b %Y"), "<br>Profit: $", profit_numeric))) +
  geom_bar(stat = "identity") +
  scale_y_continuous(labels = dollar_format(prefix = "$", big.mark = ",")) +
  scale_x_date(date_labels = "%Y", 
               date_breaks = "1 year", 
               minor_breaks = NULL) + 
  scale_fill_manual(values = c("TRUE" = "darkgreen", "FALSE" = "coral")) +
  labs(
    title = "Monthly Profits Over Time (2020-2024)",
    x = "Year",
    y = "Profit ($)"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 0, 
                                   hjust = 1, 
                                   vjust = 0.5),
    legend.position = "none")

# Convert the ggplot object to an interactive plotly object
p_interactive <- ggplotly(p, tooltip = "text")

# Display the interactive plot
p_interactive
Code
# Data processing and filtering
processed_expenses <- expenses_data %>%
  rename(month = mes) %>%
  group_by(clasificacion, año, month, producto) %>%
  summarise(total_amount = sum(cantidad)) %>%
  mutate(
    month_num = match(tolower(month), tolower(month.abb)),
    mes = month(month_num, label = TRUE, abbr = TRUE),
    fecha = make_date(year = año, month = month_num, day = days_in_month(month_num))
  ) %>%
  filter(producto %in% c("Producto", "Sueldo", "Transporte y Logistica", "Marketing y Ventas", "Envases", "Servicios Profesionales"))

# Create the ggplot
expense_plot <- processed_expenses %>%
  ggplot(aes(x = fecha, y = total_amount, color = producto)) +
  geom_line() +
  scale_y_continuous(labels = dollar_format(prefix = "$", big.mark = ",")) +
  labs(
    title = "Monthly Expenses by Product Category",
    subtitle = "Comparison of key expense categories over time",
    x = "Date",
    y = "Total Amount (MXN)",
    color = "Product Category") +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5),
    plot.subtitle = element_text(face = "italic", hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "bottom"
  )

# Convert the ggplot2 plot to an interactive plotly graph
interactive_plot <- ggplotly(expense_plot)

# Display the interactive plot
interactive_plot

5. Análisis de las ventas

5.1 Gráficas: Ventas a lo largo del tiempo

Code
# Calculate total sales and add necessary columns
total_sales <- sales_data_long %>%
  group_by(fecha) %>%
  summarize(cantidad = sum(cantidad, na.rm = TRUE)) %>%
  mutate(
    clasificacion = "Total",
    source = "Ventas",          
    año = year(fecha),           
    mes = month(fecha, label = TRUE, abbr = TRUE),  
    quarter = quarter(fecha))

# Combine the total sales with the filtered data
sales_data_plot <- bind_rows(sales_data_long, total_sales) %>%
  filter(!(año == 2020 & mes %in% c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug"))) %>% 
  filter(!(año == 2024 & mes %in% c("Sep", "Oct", "Nov", "Dec"))) 


# Choose a color palette
palette <- brewer.pal(n = 4, name = "Set1")  

p <- ggplot(sales_data_plot, aes(x = fecha, y = cantidad, color = clasificacion, group = clasificacion)) +
  geom_line(data = . %>% filter(clasificacion == "Total"), size = 0.8) + 
  geom_line(data = . %>% filter(clasificacion != "Total"), size = 0.5, alpha = 0.5) +  
  scale_color_manual(values = palette) +  # Apply the chosen color palette
  labs(title = "Sales Over Time",
       x = " ",
       y = "Sales (MXN)",
       color = "Sales Type") +
  scale_y_continuous(labels = scales::dollar_format(prefix = "$")) +  
  theme_minimal() 

# Convert the ggplot2 plot to an interactive plotly graph
ggplotly(p)

5.2 Tendencias de ventas por plataforma

Code
sales_data_plot <- sales_data_plot %>%
  group_by(clasificacion) %>%
  arrange(fecha) %>%
  mutate(month_count = row_number()) %>%
  ungroup()

# Create linear models for each 'clasificacion'
models <- sales_data_plot %>%
  group_by(clasificacion) %>%
  do(model = lm(cantidad ~ as.numeric(month_count), data = .))

# Extract model coefficients and create equation strings
models <- models %>%
  mutate(
    slope = round(coef(model)[2], 2),
    intercept = round(coef(model)[1], 2),
    equation = paste0("y = ", intercept, " + ", slope, " * x"),
    y_position = seq(from = max(sales_data_plot$cantidad), 
                     to = max(sales_data_plot$cantidad) * 2000, # Spread out the y positions
                     length.out = n()),
    x_position = as.Date("2021-01-01") # Set all x positions to the same date
  )
# Choose a color palette
palette <- brewer.pal(n = 4, name = "Set1")  

# Create ggplot
p <- ggplot(sales_data_plot, aes(x = fecha, y = cantidad, color = clasificacion)) +
  geom_line(data = . %>% filter(clasificacion == "Total"), size = 0.8) + 
  geom_line(data = . %>% filter(clasificacion != "Total"), size = 0.5, alpha = 0.5) +
  geom_smooth(method = "lm", se = F, size =0.5) +
  scale_color_manual(values = palette) +
  geom_text(
    data = models,
    aes(x = x_position, 
        y = y_position, 
        label = equation, 
        color = clasificacion), 
    hjust = 0, 
    vjust = 0, 
    size = 3.5, inherit.aes = FALSE) +
  scale_y_continuous(labels = scales::dollar_format(prefix = "$")) +  
  labs(title = "Sales Trends by Platform", x = "Date", y = "Sales Quantity") +
  theme_minimal()

# Convert to interactive plotly plot
ggplotly(p)

5.3 Modelos lineales y resultados

Code
# Create a sequential count column for each classification
sales_data_plot <- sales_data_plot %>%
  group_by(clasificacion) %>%
  arrange(fecha) %>%
  mutate(month_count = row_number()) %>%
  ungroup()

# Create the linear model using the new count column
model_table_filtered <- sales_data_plot %>%
  group_by(clasificacion) %>%
  do(tidy(lm(cantidad ~ month_count, data = .))) %>%
  filter(term == "month_count") %>%
  mutate(
    significance = case_when(
      p.value < 0.001 ~ "***",
      p.value < 0.01 ~ "**",
      p.value < 0.05 ~ "*",
      p.value < 0.1 ~ ".",
      TRUE ~ ""
    )
  ) %>%
  select(
    Classification = clasificacion,
    Estimate = estimate,
    `Std. Error` = std.error,
    `t-value` = statistic,
    `p-value` = p.value,
    Significance = significance
  )

# Print the table with styling
kable(model_table_filtered, format = "html", caption = "Modelos lineales por clasificación") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Modelos lineales por clasificación
Classification Estimate Std. Error t-value p-value Significance
Deposito 1717.8205 177.68295 9.667898 0.00e+00 ***
Efectivo -103.2204 23.89972 -4.318894 8.28e-05 ***
Pagina 1408.2854 101.52694 13.871052 0.00e+00 ***
Total 3022.8856 200.93745 15.043913 0.00e+00 ***

Resúmen de Estadística de Ventas

  • Depósito: Aumento estadísticamente significativo de 1818.58 MXN por mes (p < 0.001).

  • Efectivo: Disminución estadísticamente significativa de -108.72 MXN por mes (p < 0.001).

  • Página: Aumento estadísticamente significativo de 1398.58 MXN por mes (p < 0.001).

Total: Aumento estadísticamente significativo de XXXXXX.XX MXN por mes (p < 0.001).

5.4 Modelos futuros de venta.

Code
# Step 1: Create a new column with year and quarter combined
combined_data <- combined_data %>%
  mutate(year_quarter = paste(año, quarter, sep = "-Q"))

# Step 2: Summarize data by year_quarter and source
quarterly_data <- combined_data %>%
  group_by(source_grouped, source, year_quarter) %>%
  summarize(total_amount = sum(cantidad, na.rm = TRUE)) %>%
  ungroup() %>%
  filter(year_quarter != "2024-Q3")


# Plotting the data
ggplot(quarterly_data, aes(x = year_quarter, y = total_amount, color = source, group = source)) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  labs(title = "Quarterly Data by Source",
       x = "Year-Quarter",
       y = "Total Amount",
       color = "Source") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Creating the model

Code
# Step 3: Split year_quarter into separate columns for modeling
quarterly_data <- quarterly_data %>%
  separate(year_quarter, into = c("año", "quarter"), sep = "-Q") %>%
  mutate(año = as.numeric(año), quarter = as.numeric(quarter))

# Step 4: Create a model for each source type
models <- quarterly_data %>%
  group_by(source) %>%
  do(model = lm(total_amount ~ año + quarter, data = .))

# Step 5: Generate projections for the next 5 years (20 quarters)
future_years <- rep(seq(max(quarterly_data$año) + 1, by = 1, length.out = 6), each = 4)
future_quarters <- rep(1:4, 6)

projected_data <- models %>%
  rowwise() %>%
  do(data.frame(año = future_years, 
                quarter = future_quarters,
                total_amount = predict(.$model, 
                                       newdata = data.frame(año = future_years, 
                                                            quarter = future_quarters)),
                source = .$source)) %>%
  mutate(year_quarter = paste(año, quarter, sep = "-Q"))

# Combine real and projected data
quarterly_data <- quarterly_data %>%
  mutate(year_quarter = paste(año, quarter, sep = "-Q"), type = "Real")

projected_data <- projected_data %>%
  mutate(type = "Projected",
         source_grouped = case_when(
           source == "Costos" ~ "expenses",
           source == "Gastos" ~ "expenses",
           source == "Ventas" ~ "sales"))

# Reorder columns in `projected_data` to match `quarterly_data`
projected_data <- projected_data[, c("source_grouped","source","total_amount", "año", "quarter","type" , "year_quarter")]

plot_data <- bind_rows(quarterly_data, projected_data)
Code
x <- ggplot(plot_data, aes(x = year_quarter,  # Use 'año' for the x-axis (year)
                           y = total_amount, 
                           color = source, 
                           linetype = type, 
                           group = interaction(source, type))) +  # Ensure the grouping works across years
  geom_line(size = 0.5) +
  geom_point(size = 0.5) +
  scale_y_continuous(labels = scales::dollar_format()) +
  labs(title = "Real and Projected Quarterly Data by Source",
       x = "Year",  # Use "Year" for the x-axis label
       y = "Total Amount",
       color = "Source",
       linetype = "Data Type") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 50,
                                   color = "gray80",
                                   hjust = 1, 
                                   size = 10)) + # Adjust label size for readability
  scale_x_discrete(breaks = function(x) x[seq(1, length(x), by = 1)])  # Show fewer labels (every 4th)


# Display the plot
x

\[ \text{total_amount} = \beta_0 + \beta_1 \times \text{año} + \beta_2 \times \text{quarter} + \epsilon \]

Here:

  • \(\beta_0\) is the intercept (the starting value of total_amount when both año and quarter are zero).

  • \(\beta_1\) is the coefficient for año, representing the expected change in total_amount for each one-unit increase in año, holding quarter constant.

  • \(\beta_2\) is the coefficient for quarter, representing the expected change in total_amount for each one-unit increase in quarter, holding año constant.

  • \(\epsilon\) represents the error term, which accounts for the variability in total_amount that cannot be explained by año and quarter.

Code
# Group Costos and Gastos into 'Expenses', and leave Ventas as 'Sales'
plot_data_grouped <- plot_data %>%
  mutate(source_grouped = case_when(
    source %in% c("Costos", "Gastos") ~ "Expenses",
    source == "Ventas" ~ "Sales"
  )) %>%
  group_by(year_quarter, source_grouped, type) %>%
  summarize(total_amount = sum(total_amount, na.rm = TRUE)) %>%
  ungroup()

x <- ggplot(plot_data_grouped, aes(x = year_quarter,  
                                   y = total_amount, 
                                   color = source_grouped,  # Use 'source_grouped' for color
                                   linetype = type, 
                                   group = interaction(source_grouped, type))) +  # Ensure the grouping works
  geom_line(size = 0.5) +
  geom_point(size = 0.5) +
  scale_y_continuous(labels = scales::dollar_format()) +
  labs(title = "Real and Projected Quarterly Data (Expenses vs. Sales)",
       x = "Year",
       y = "Total Amount",
       color = "Category",
       linetype = "Data Type") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 50, 
                                   color = "gray80",
                                   hjust = 1, 
                                   size = 10)) +
  scale_x_discrete(breaks = function(x) x[seq(1, length(x), by = 1)])

# Display the plot
x

6. Estado de Resultados

La tabla de estado de resultados general muestra un resumen financiero de Nala. Muestra las ventas, costos, y la utilidad neta. Esta tabla ayuda para ver como los ingresos y gastos afectan las ganancias finales de la empresa.

6.1 Cálculo de Utilidades

Code
# Step 1: Process and calculate the financial metrics
estado_resultados <- combined_data %>%
  group_by(año, clasificacion) %>%
  summarize(total_cantidad = sum(cantidad, na.rm = TRUE)) %>%
  pivot_wider(names_from = clasificacion, values_from = total_cantidad, values_fill = 0) %>%
  mutate(
    `Ventas en Deposito` = `Ventas en Deposito`,
    `Ventas en Efectivo` = `Ventas en Efectivo`,
    `Ventas en Pagina` = `Ventas en Pagina`,
    Ventas = `Ventas en Deposito` + `Ventas en Efectivo` + `Ventas en Pagina`,
    Costo_de_Ventas = `Costos de Ventas`,
    Utilidad_Bruta = Ventas - Costo_de_Ventas,
    Gastos_de_ventas = `Gastos de Ventas`,
    Gastos_administrativos = `Gastos de Admin`,
    Utilidad_de_Operacion = Utilidad_Bruta - Gastos_de_ventas - Gastos_administrativos,
    Gastos_financieros = 0, 
    Otros_gastos = 0,        
    Utilidad_Periodo = Utilidad_de_Operacion,
    Utilidad_Acumulada = 0,
    Utilidad_Neta = Utilidad_de_Operacion
  )

# Step 2: Calculate Utilidad Acumulada and Utilidad Neta
for (i in 2:nrow(estado_resultados)) {
  estado_resultados$Utilidad_Acumulada[i] <- estado_resultados$Utilidad_Neta[i - 1]
  estado_resultados$Utilidad_Neta[i] <- estado_resultados$Utilidad_Periodo[i] + estado_resultados$Utilidad_Acumulada[i]
}

# Step 3: Format numbers as currency
estado_resultados <- estado_resultados %>%
  mutate(across(where(is.numeric), ~ paste0("$ ", formatC(round(.), format = "f", big.mark = ",", digits = 0))))

# Step 4: Transpose the table and reorder the rows
desired_order <- c(
  "Ventas en Deposito",
  "Ventas en Efectivo",
  "Ventas en Pagina",
  "Ventas", 
  "Costo_de_Ventas", 
  "Utilidad_Bruta", 
  "Gastos_de_ventas", 
  "Gastos_administrativos", 
  "Utilidad_de_Operacion", 
  "Gastos_financieros", 
  "Otros_gastos", 
  "Utilidad_Periodo", 
  "Utilidad_Acumulada", 
  "Utilidad_Neta")

estado_resultados <- estado_resultados %>%
  pivot_longer(-año, names_to = "Metrica", values_to = "Value") %>%
  mutate(Metrica = factor(Metrica, levels = desired_order)) %>%
  arrange(Metrica) %>%
  pivot_wider(names_from = año, values_from = Value) %>% 
  na.omit() %>%
  mutate(across(where(is.numeric), scales::dollar_format(prefix = "$")))


# Step 5: Print the estado_resultados dataframe with kable and highlight rows
estado_resultados %>%
  kable(format = "html", caption = "Estado de Resultados") %>%
  kable_styling(bootstrap_options = c("hover", "condensed"), full_width = F) %>%
  row_spec(c(1, 2, 3), background = "#F4FBF4") %>% 
  row_spec(c(4), background = "#E6F9E6", bold = TRUE) %>%
  row_spec(c(5, 7, 8, 10, 11), background = "#FDECEC") %>% 
  row_spec(c(6, 9, 12), background = "lightyellow", bold = TRUE) %>% 
  row_spec(c(13,14), background = "#E6F2FA",bold = TRUE)  
Estado de Resultados
Metrica 2020 2021 2022 2023 2024
Ventas en Deposito $ 20,092 $ 112,554 $ 483,896 $ 730,154 $ 538,846
Ventas en Efectivo $ 35,740 $ 50,495 $ 46,355 $ 31,318 $ 18,571
Ventas en Pagina $ 0 $ 111,374 $ 208,505 $ 542,228 $ 465,562
Ventas $ 55,832 $ 274,423 $ 738,756 $ 1,303,699 $ 1,022,979
Costo_de_Ventas $ 64,637 $ 227,382 $ 429,597 $ 733,346 $ 554,001
Utilidad_Bruta $ -8,806 $ 47,041 $ 309,159 $ 570,353 $ 468,978
Gastos_de_ventas $ 20,747 $ 10,107 $ 34,463 $ 104,373 $ 106,150
Gastos_administrativos $ 18,246 $ 18,720 $ 307,988 $ 415,896 $ 417,168
Utilidad_de_Operacion $ -47,799 $ 18,214 $ -33,292 $ 50,084 $ -54,340
Gastos_financieros $ 0 $ 0 $ 0 $ 0 $ 0
Otros_gastos $ 0 $ 0 $ 0 $ 0 $ 0
Utilidad_Periodo $ -47,799 $ 18,214 $ -33,292 $ 50,084 $ -54,340
Utilidad_Acumulada $ 0 $ -47,799 $ -29,585 $ -62,876 $ -12,792
Utilidad_Neta $ -47,799 $ -29,585 $ -62,876 $ -12,792 $ -67,132
Code
estado_resultados %>%
  kable(format = "html", caption = "Estado de Resultados") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
  row_spec(c(4), bold = TRUE) %>% 
  row_spec(c(4), background = "#E6F9E6", bold = TRUE) %>% 
  row_spec(c(6, 9, 12), background = "lightyellow", bold = TRUE)
Estado de Resultados
Metrica 2020 2021 2022 2023 2024
Ventas en Deposito $ 20,092 $ 112,554 $ 483,896 $ 730,154 $ 538,846
Ventas en Efectivo $ 35,740 $ 50,495 $ 46,355 $ 31,318 $ 18,571
Ventas en Pagina $ 0 $ 111,374 $ 208,505 $ 542,228 $ 465,562
Ventas $ 55,832 $ 274,423 $ 738,756 $ 1,303,699 $ 1,022,979
Costo_de_Ventas $ 64,637 $ 227,382 $ 429,597 $ 733,346 $ 554,001
Utilidad_Bruta $ -8,806 $ 47,041 $ 309,159 $ 570,353 $ 468,978
Gastos_de_ventas $ 20,747 $ 10,107 $ 34,463 $ 104,373 $ 106,150
Gastos_administrativos $ 18,246 $ 18,720 $ 307,988 $ 415,896 $ 417,168
Utilidad_de_Operacion $ -47,799 $ 18,214 $ -33,292 $ 50,084 $ -54,340
Gastos_financieros $ 0 $ 0 $ 0 $ 0 $ 0
Otros_gastos $ 0 $ 0 $ 0 $ 0 $ 0
Utilidad_Periodo $ -47,799 $ 18,214 $ -33,292 $ 50,084 $ -54,340
Utilidad_Acumulada $ 0 $ -47,799 $ -29,585 $ -62,876 $ -12,792
Utilidad_Neta $ -47,799 $ -29,585 $ -62,876 $ -12,792 $ -67,132

6.2 Cálculo de márgenes y porcentajes

La tabla de márgenes presenta los márgenes financieros como porcentaje de las ventas. Estos márgenes ayudan a evaluar la eficiencia y rentabilidad de Nala, mostrando cómo los costos y gastos impactan las ganancias.

Code
# Convert all monetary values to numeric
estado_resultados <- estado_resultados %>%
  mutate(across(`2020`:`2024`, ~ as.numeric(str_remove_all(.x, "\\$|,"))))

# Now, calculate the percentages using the numeric values
ventas <- estado_resultados %>%
  filter(Metrica == "Ventas") %>%
  select(-Metrica)

# Function to calculate percentages for each row based on 'Ventas'
calculate_percentages <- function(row, ventas) {
  if (row["Metrica"] == "Ventas") {
    # Set Ventas percentage to 100 for display purposes
    percentages <- rep(100, length(row) - 1)
  } else {
    # Calculate percentages
    percentages <- as.numeric(row[-1]) / ventas * 100
    percentages <- round(percentages, 1)  # Round to one decimal
  }
  # Return the row with percentages formatted with a percentage sign
  percentages <- paste0(percentages, "%")
  return(c(row[1], percentages))
}

# Apply the function to each row
percentage_results <- t(apply(estado_resultados, 1, calculate_percentages, ventas = as.numeric(ventas)))

# Convert the results back to a dataframe and update column names
percentage_results_df <- as.data.frame(percentage_results)
colnames(percentage_results_df) <- colnames(estado_resultados)


# Print the percentage results dataframe with kable
kable(percentage_results_df, format = "html", caption = "Margen de Resultados") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
  row_spec(c(6, 9, 12), background = "lightyellow", bold = TRUE)
Margen de Resultados
Metrica 2020 2021 2022 2023 2024
Ventas en Deposito 36% 41% 65.5% 56% 52.7%
Ventas en Efectivo 64% 18.4% 6.3% 2.4% 1.8%
Ventas en Pagina 0% 40.6% 28.2% 41.6% 45.5%
Ventas 100% 100% 100% 100% 100%
Costo_de_Ventas 115.8% 82.9% 58.2% 56.3% 54.2%
Utilidad_Bruta -15.8% 17.1% 41.8% 43.7% 45.8%
Gastos_de_ventas 37.2% 3.7% 4.7% 8% 10.4%
Gastos_administrativos 32.7% 6.8% 41.7% 31.9% 40.8%
Utilidad_de_Operacion -85.6% 6.6% -4.5% 3.8% -5.3%
Gastos_financieros 0% 0% 0% 0% 0%
Otros_gastos 0% 0% 0% 0% 0%
Utilidad_Periodo -85.6% 6.6% -4.5% 3.8% -5.3%
Utilidad_Acumulada 0% -17.4% -4% -4.8% -1.3%
Utilidad_Neta -85.6% -10.8% -8.5% -1% -6.6%

6.3 Crecimiento de Nala porcentual

Code
# Calculate year-over-year growth for all relevant metrics
ventas_growth <- estado_resultados %>%
  filter(Metrica == "Ventas") %>%
  mutate(
    Growth_2021 = scales::percent((`2021` - `2020`) / `2020`),
    Growth_2022 = scales::percent((`2022` - `2021`) / `2021`),
    Growth_2023 = scales::percent((`2023` - `2022`) / `2022`),
    Growth_2024 = scales::percent((`2024` - `2023`) / `2023`))


# Cost of Goods Sold (Costo_de_Ventas)
cost_growth <- estado_resultados %>%
  filter(Metrica == "Costo_de_Ventas") %>%
  mutate(
    Growth_2021 = scales::percent((`2021` - `2020`) / `2020`),
    Growth_2022 = scales::percent((`2022` - `2021`) / `2021`),
    Growth_2023 = scales::percent((`2023` - `2022`) / `2022`),
    Growth_2024 = scales::percent((`2024` - `2023`) / `2023`)
  )

# Expenses (Gastos_de_ventas and Gastos_administrativos)
expenses_growth <- estado_resultados %>%
  filter(Metrica %in% c("Gastos_de_ventas", "Gastos_administrativos")) %>%
  mutate(
    Growth_2021 = scales::percent((`2021` - `2020`) / `2020`),
    Growth_2022 = scales::percent((`2022` - `2021`) / `2021`),
    Growth_2023 = scales::percent((`2023` - `2022`) / `2022`),
    Growth_2024 = scales::percent((`2024` - `2023`) / `2023`)
  )

# Utility (Utilidad_Bruta, Utilidad_de_Operacion, Utilidad_Neta)
utility_growth <- estado_resultados %>%
  filter(Metrica %in% c("Utilidad_Bruta", "Utilidad_de_Operacion", "Utilidad_Neta")) %>%
  mutate(
    Growth_2021 = scales::percent((`2021` - `2020`) / `2020`),
    Growth_2022 = scales::percent((`2022` - `2021`) / `2021`),
    Growth_2023 = scales::percent((`2023` - `2022`) / `2022`),
    Growth_2024 = scales::percent((`2024` - `2023`) / `2023`)
  )

# Combine all into a single data frame
combined_growth <- bind_rows(ventas_growth, cost_growth, expenses_growth, utility_growth) %>%
  select(Metrica, Growth_2021, Growth_2022, Growth_2023, Growth_2024)

# Display the combined growth table
combined_growth %>%
  kable(format = "html", caption = "Year-over-Year Growth for Costs, Expenses, and Utility Metrics (as Percentage)", align = 'c') %>%
  kable_styling(bootstrap_options = c("hover", "condensed"), full_width = F)
Year-over-Year Growth for Costs, Expenses, and Utility Metrics (as Percentage)
Metrica Growth_2021 Growth_2022 Growth_2023 Growth_2024
Ventas 392% 169% 76% -22%
Costo_de_Ventas 252% 89% 71% -24%
Gastos_de_ventas -51% 241% 203% 1.7%
Gastos_administrativos 3% 1 545% 35% 0.3%
Utilidad_Bruta -634% 557% 84% -18%
Utilidad_de_Operacion -138% -283% -250% -208%
Utilidad_Neta -38% 113% -80% 425%

7.0 Next Steps

  1. Cambiar “tags” o dropdown list de gastos.