Analisis de TikTok

Carga de librerías

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(plotly)
## Warning: package 'plotly' was built under R version 4.4.3
## 
## Adjuntando el paquete: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(shiny)
## Warning: package 'shiny' was built under R version 4.4.3
library(readr)
## Warning: package 'readr' was built under R version 4.4.3

Leer el archivo CSV

datos <- read_csv("C:/R/tiktok_dataset.csv")
## Rows: 19382 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): claim_status, video_transcription_text, verified_status, author_ban...
## dbl (3): #, video_id, video_duration_sec
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Limpieza de datos

datos <- datos %>%
  rename_with(tolower) %>%
  rename(registro = `#`) %>%     # Renombrar columna conflictiva
  mutate_if(is.character, as.factor) %>%
  na.omit()

Visualización 1: Gráfico de dispersión con plotly

grafico1 <- plot_ly(datos, x = ~registro, y = ~video_duration_sec,
                    type = 'scatter', mode = 'markers',
                    color = ~verified_status,
                    text = ~paste("Estado de verificación:", verified_status)) %>%
  layout(title = "Dispersión interactiva")

Visualización 2: Histograma interactivo

grafico2 <- plot_ly(datos, x = ~video_duration_sec,
                    type = "histogram", nbinsx = 20) %>%
  layout(title = "Distribución de duración de video")

Visualización 3: ggplot + plotly

gg <- ggplot(datos, aes(x = registro, y = video_duration_sec, color = verified_status,
                        text = paste("Video ID:", video_id))) +
  geom_point() +
  theme_minimal()

grafico3 <- ggplotly(gg, tooltip = "text")

Visualización 4: Boxplot interactivo por categorías

grafico4 <- plot_ly(datos, y = ~video_duration_sec, color = ~verified_status,
                    type = "box") %>%
  layout(title = "Distribución de duración de video por verificación")

Interfaz Shiny

ui <- fluidPage(
  titlePanel("Dashboard Interactivo"),
  sidebarLayout(
    sidebarPanel(
      selectInput("variable", "Selecciona variable numérica:",
                  choices = names(select_if(datos, is.numeric)))
    ),
    mainPanel(
      tabsetPanel(
        tabPanel("Gráfico 1", plotlyOutput("plot1")),
        tabPanel("Gráfico 2", plotlyOutput("plot2")),
        tabPanel("Gráfico 3", plotlyOutput("plot3")),
        tabPanel("Gráfico 4", plotlyOutput("plot4"))
      )
    )
  )
)

server <- function(input, output) {
  output$plot1 <- renderPlotly({ grafico1 })
  output$plot2 <- renderPlotly({ grafico2 })
  output$plot3 <- renderPlotly({ grafico3 })
  output$plot4 <- renderPlotly({
    plot_ly(datos, y = as.formula(paste0("~", input$variable)),
            color = ~verified_status, type = "box")
  })
}

shinyApp(ui, server)
Shiny applications not supported in static R Markdown documents

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.