Datos masivos Portafolio

chunks: Ctrl+Alt+I, o Comand+Option+I se realizará un ejemplo de análisis de datos

install.packages("pacman")

## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)

library ("pacman")
p_load("ggplot2", "dplyr", "vroom")

llama a base de datos

Datos_masivos <- vroom(file="https://raw.githubusercontent.com/ManuelLaraMVZ/Metabolomica_2026_1/refs/heads/main/Datos_ejercicio_PCR1.1.csv")

## Rows: 1001 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Gen
## dbl (6): C1, C2, C3, T1, T2, T3
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Datos_masivos

## # A tibble: 1,001 × 7
##    Gen         C1    C2    C3    T1    T2    T3
##    <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 B-actina  19.9  20.2  19.9  20.0  20.1  19.9
##  2 Gene_1    23.9  23.0  24.0  21.7  22.4  21.0
##  3 Gene_2    24.5  22.9  25.5  21.3  23.3  24.3
##  4 Gene_3    28.1  25.0  23.9  19.1  23.3  19.7
##  5 Gene_4    25.1  24.7  27.4  20.6  19.4  25.0
##  6 Gene_5    25.3  19.9  25.3  27.2  17.9  23.8
##  7 Gene_6    28.4  27.1  23.8  21.9  26.4  22.7
##  8 Gene_7    25.9  25.5  21.4  23.8  22.5  23.1
##  9 Gene_8    22.5  29.8  23.7  21.6  22.8  22.4
## 10 Gene_9    23.6  26.4  29.1  23.2  19.6  21.1
## # ℹ 991 more rows

aislar los genes de referencia de cada condición

Gen_ref <- Datos_masivos %>%
  filter(Gen == "B-actina") #== significa "que diga exactamente igual"
Gen_ref

## # A tibble: 1 × 7
##   Gen         C1    C2    C3    T1    T2    T3
##   <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 B-actina  19.9  20.2  19.9  20.0  20.1  19.9

generar base de datos con genes de interés

Gen_int <- Datos_masivos %>%
  filter(Gen != "B-actina") #!= significa menos la fila de la actina
Gen_int

## # A tibble: 1,000 × 7
##    Gen        C1    C2    C3    T1    T2    T3
##    <chr>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1 Gene_1   23.9  23.0  24.0  21.7  22.4  21.0
##  2 Gene_2   24.5  22.9  25.5  21.3  23.3  24.3
##  3 Gene_3   28.1  25.0  23.9  19.1  23.3  19.7
##  4 Gene_4   25.1  24.7  27.4  20.6  19.4  25.0
##  5 Gene_5   25.3  19.9  25.3  27.2  17.9  23.8
##  6 Gene_6   28.4  27.1  23.8  21.9  26.4  22.7
##  7 Gene_7   25.9  25.5  21.4  23.8  22.5  23.1
##  8 Gene_8   22.5  29.8  23.7  21.6  22.8  22.4
##  9 Gene_9   23.6  26.4  29.1  23.2  19.6  21.1
## 10 Gene_10  24.1  24.1  23.9  21.9  24.3  21.3
## # ℹ 990 more rows

análisis

DCT <- Gen_int %>% #restar el gen de referencia a los GOI
  mutate(DC1 = C1 - Gen_ref$C1,
         DC2 = C2 - Gen_ref$C2,
         DC3 = C3 - Gen_ref$C3,
         DT1 = T1 - Gen_ref$T1,
         DT2 = T2 - Gen_ref$T2,
         DT3 = T3 - Gen_ref$T3) %>% #mutate crea nuevas columnas, $ es para seleccionar esa columma de esa base de datos
  mutate(DosDCTC1 = 2^-DC1,
         DosDCTC2 = 2^-DC2,
         DosDCTC3 = 2^-DC3,
         DosDCTT1 = 2^-DT1,
         DosDCTT2 = 2^-DT2,
         DosDCTT3 = 2^-DT3,) %>% 
  #dividir tx entre la situación control, pero hay por triplicado, hay que sacar el promedio del Cx y Tx
  mutate (DosDCTCx = (DosDCTC1+DosDCTC2+DosDCTC3)/3,
          DosDCTTx = (DosDCTT1+DosDCTT2+DosDCTT3)/3) %>%
  #hacer la división tx/cx
  mutate (DosDDCT = DosDCTTx/DosDCTCx)


DCT

## # A tibble: 1,000 × 22
##    Gen        C1    C2    C3    T1    T2    T3   DC1    DC2   DC3    DT1    DT2
##    <chr>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl>  <dbl>  <dbl>
##  1 Gene_1   23.9  23.0  24.0  21.7  22.4  21.0  4.02  2.81   4.12  1.72   2.27 
##  2 Gene_2   24.5  22.9  25.5  21.3  23.3  24.3  4.68  2.72   5.61  1.37   3.18 
##  3 Gene_3   28.1  25.0  23.9  19.1  23.3  19.7  8.26  4.76   4.06 -0.876  3.22 
##  4 Gene_4   25.1  24.7  27.4  20.6  19.4  25.0  5.28  4.54   7.58  0.626 -0.689
##  5 Gene_5   25.3  19.9  25.3  27.2  17.9  23.8  5.40 -0.298  5.49  7.22  -2.17 
##  6 Gene_6   28.4  27.1  23.8  21.9  26.4  22.7  8.57  6.88   3.91  1.95   6.29 
##  7 Gene_7   25.9  25.5  21.4  23.8  22.5  23.1  6.06  5.30   1.52  3.85   2.34 
##  8 Gene_8   22.5  29.8  23.7  21.6  22.8  22.4  2.61  9.63   3.85  1.65   2.63 
##  9 Gene_9   23.6  26.4  29.1  23.2  19.6  21.1  3.77  6.17   9.23  3.24  -0.507
## 10 Gene_10  24.1  24.1  23.9  21.9  24.3  21.3  4.25  3.91   4.02  1.92   4.14 
## # ℹ 990 more rows
## # ℹ 10 more variables: DT3 <dbl>, DosDCTC1 <dbl>, DosDCTC2 <dbl>,
## #   DosDCTC3 <dbl>, DosDCTT1 <dbl>, DosDCTT2 <dbl>, DosDCTT3 <dbl>,
## #   DosDCTCx <dbl>, DosDCTTx <dbl>, DosDDCT <dbl>

aislar datos

Datos_grafica <- DCT %>%
  select("Gen","DosDDCT")
Datos_grafica

## # A tibble: 1,000 × 2
##    Gen     DosDDCT
##    <chr>     <dbl>
##  1 Gene_1    3.69 
##  2 Gene_2    2.59 
##  3 Gene_3   30.6  
##  4 Gene_4   30.9  
##  5 Gene_5    3.59 
##  6 Gene_6    5.36 
##  7 Gene_7    0.955
##  8 Gene_8    2.79 
##  9 Gene_9   21.9  
## 10 Gene_10   3.82 
## # ℹ 990 more rows

grafica

Grafica_PCR <- ggplot(Datos_grafica,
                      aes(x = Gen,
                          y = DosDDCT)) +   # fill por Gen para colores distintos
  geom_col() +
  labs(title = "Expresión relativa de genes",
       subtitle = "Normalización con B-actina como referencia",
       caption = "Diseño: XXXX") +
  theme_minimal(base_size = 14) +   # estilo minimalista
  theme(plot.background = element_rect(fill = "white", color = NA),
        panel.background = element_rect(fill = "white", color = NA)) +
  scale_fill_brewer(palette = "Set3")   # paleta con colores distintos

Grafica_PCR

gráfica regresión lineal: necesita nombres de genes, promedios de 2^-deltaCT de cx y tx ctrl+shift+m: magrritr

Datos_regresion <- DCT  %>%
  select("Gen", "DosDCTCx", "DosDCTTx")
Datos_regresion

## # A tibble: 1,000 × 3
##    Gen     DosDCTCx DosDCTTx
##    <chr>      <dbl>    <dbl>
##  1 Gene_1    0.0874    0.322
##  2 Gene_2    0.0704    0.182
##  3 Gene_3    0.0334    1.02 
##  4 Gene_4    0.0247    0.763
##  5 Gene_5    0.425     1.53 
##  6 Gene_6    0.0259    0.139
##  7 Gene_7    0.129     0.123
##  8 Gene_8    0.0781    0.218
##  9 Gene_9    0.0297    0.651
## 10 Gene_10   0.0603    0.231
## # ℹ 990 more rows

graficar regresión lineal

Grafica_regresion <- ggplot(Datos_regresion,
                            aes(x = DosDCTCx, y = DosDCTTx)) +
  geom_point(color = "steelblue", size = 3, alpha = 0.7) +   # puntos más visibles
  geom_abline(intercept = 0, slope = 1,                      # línea con pendiente 1
              color = "red", linetype = "dashed", size = 1) +
  geom_smooth(method = "lm", se = FALSE, color = "darkgreen") +
  labs(title = "Comparación de DosDCTCx vs DosDCTTx",
       x = "DosDCTCx",
       y = "DosDCTTx") +
  theme_minimal(base_size = 14) +                            # estilo limpio
  theme(plot.title = element_text(hjust = 0.5, face = "bold")) # título centrado

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Grafica_regresion

## `geom_smooth()` using formula = 'y ~ x'

knit, republish, para tener el link actualizado

Datos masivos Portafolio

Montserrat Espinosa

2026-02-24