Untitled

nombre <- function(a, b)
{
  result <- a + b
  return(result)
}


nombre(8,2)

## [1] 10

operaciones <- function(x, y){
  suma <- x+y
resta <- x-y
producto <- x*y
cociente <- x/y

return(list(Suma = suma, Resta = resta, Producto = producto, Cociente = cociente))
}

resultados <- operaciones(10,5)
print(resultados)

## $Suma
## [1] 15
## 
## $Resta
## [1] 5
## 
## $Producto
## [1] 50
## 
## $Cociente
## [1] 2

listas <- list()
vector_a <- c("T")

for(i in 2:3){
  a <- sample(c("G","A"),1)
  vector_a[i] <- a
}

if(vector_a[2] == "G" & vector_a[2] == vector_a[3]){
  a <- sample(1:2,1)
  
  vector_a[a]="A"
}
print(vector_a)

## [1] "T" "G" "A"

vector_b <- c(c("A", "T", "G"),c())
Variable <- c("T", "G", "A", "C")



for (i in 4:102){
  
  a <- sample(1:4,1)
  
  vector_b[i] = Variable[a]
}

vector_b <- c(vector_b,vector_a)
print(vector_b)

##   [1] "A" "T" "G" "C" "A" "T" "C" "G" "T" "A" "T" "T" "A" "A" "T" "G" "C" "T"
##  [19] "C" "T" "A" "A" "A" "A" "G" "C" "C" "T" "G" "A" "G" "T" "T" "A" "A" "T"
##  [37] "G" "T" "G" "A" "T" "C" "T" "T" "T" "A" "A" "T" "A" "T" "G" "T" "C" "C"
##  [55] "C" "A" "G" "A" "T" "G" "A" "G" "G" "T" "T" "G" "A" "T" "A" "A" "G" "A"
##  [73] "A" "C" "A" "C" "A" "G" "G" "A" "C" "C" "G" "C" "A" "T" "T" "G" "T" "C"
##  [91] "A" "A" "G" "C" "A" "T" "A" "A" "A" "G" "C" "A" "T" "G" "A"

x <- -3

if(x>0){
  print("x es positivo")
} else{
  print("X es negativo o cero")
}

## [1] "X es negativo o cero"

numeros <- c(5, -2, 0, 8, -1)

resultado<- ifelse(numeros>0, "positivo", "No positivo")
print(resultado)

## [1] "positivo"    "No positivo" "No positivo" "positivo"    "No positivo"

numeros <- c(1, 2, 3, 4, 5, 6)
i <- 1

while (numeros[i] <=4){
  print(numeros[i])
  i <- i+1
}

## [1] 1
## [1] 2
## [1] 3
## [1] 4

options(repos = c(CRAN = "https://cloud.r-project.org/"))
install.packages(c("BiocManager", "seqinr", "rentrez"))

## Installing packages into 'C:/Users/ugams/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)

## package 'BiocManager' successfully unpacked and MD5 sums checked
## package 'seqinr' successfully unpacked and MD5 sums checked

## Warning: cannot remove prior installation of package 'seqinr'

## Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
## C:\Users\ugams\AppData\Local\R\win-library\4.5\00LOCK\seqinr\libs\x64\seqinr.dll
## a C:\Users\ugams\AppData\Local\R\win-library\4.5\seqinr\libs\x64\seqinr.dll:
## Permission denied

## Warning: restored 'seqinr'

## package 'rentrez' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\ugams\AppData\Local\Temp\RtmpItbxwj\downloaded_packages

BiocManager::install("Biostrings")

## 'getOption("repos")' replaces Bioconductor standard repositories, see
## 'help("repositories", package = "BiocManager")' for details.
## Replacement repositories:
##     CRAN: https://cloud.r-project.org/

## Bioconductor version 3.21 (BiocManager 1.30.25), R 4.5.0 (2025-04-11 ucrt)

## Warning: package(s) not installed when version(s) same as or greater than current; use
##   `force = TRUE` to re-install: 'Biostrings'

## Installation paths not writeable, unable to update packages
##   path: C:/Program Files/R/R-4.5.0/library
##   packages:
##     lattice, mgcv

## Old packages: 'cli'

library(Biostrings)

## Cargando paquete requerido: BiocGenerics

## Cargando paquete requerido: generics

## 
## Adjuntando el paquete: 'generics'

## The following objects are masked from 'package:base':
## 
##     as.difftime, as.factor, as.ordered, intersect, is.element, setdiff,
##     setequal, union

## 
## Adjuntando el paquete: 'BiocGenerics'

## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs

## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, saveRDS, table, tapply, unique,
##     unsplit, which.max, which.min

## Cargando paquete requerido: S4Vectors

## Cargando paquete requerido: stats4

## 
## Adjuntando el paquete: 'S4Vectors'

## The following object is masked from 'package:utils':
## 
##     findMatches

## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname

## Cargando paquete requerido: IRanges

## 
## Adjuntando el paquete: 'IRanges'

## The following object is masked from 'package:grDevices':
## 
##     windows

## Cargando paquete requerido: XVector

## Cargando paquete requerido: GenomeInfoDb

## 
## Adjuntando el paquete: 'Biostrings'

## The following object is masked from 'package:base':
## 
##     strsplit

library(ggplot2)
library(seqinr)

## 
## Adjuntando el paquete: 'seqinr'

## The following object is masked from 'package:Biostrings':
## 
##     translate

library(rentrez)

ncbi_id <- "NC_045512.2"
sequence_raw <- entrez_fetch(db = "nuccore", id = ncbi_id, rettype = "fasta", retmode = "text")

# Procesar la secuencia descargada
sequence_lines <- unlist(strsplit(sequence_raw, "\n"))
sequence_only <- paste(sequence_lines[-1], collapse = "")

# Convertir la secuencia en un objeto de Biostrings
sequence <- DNAString(sequence_only)

# Mostrar los primeros 100 nucleótidos
substring(sequence, 1, 100)

## 100-letter DNAString object
## seq: ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAAC...TCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTC

length_sequence <- length(sequence)
cat("El tamaño de la secuencia es:", length_sequence, "nucleótidos.")

## El tamaño de la secuencia es: 29903 nucleótidos.

nucleotide_composition <- alphabetFrequency(sequence, baseOnly = TRUE)
print(nucleotide_composition)

##     A     C     G     T other 
##  8954  5492  5863  9594     0

GC_content <- (nucleotide_composition["G"] + nucleotide_composition["C"]) / length_sequence * 100
cat("El contenido de GC es:", round(GC_content, 2), "%.")

## El contenido de GC es: 37.97 %.

complementary_sequence <- complement(sequence)

# Mostrar los primeros 100 nucleótidos de la secuencia complementaria
substring(complementary_sequence, 1, 100)

## 100-letter DNAString object
## seq: TAATTTCCAAATATGGAAGGGTCCATTGTTTGGTTG...AGATTTGCTTGAAATTTTAGACACACCGACAGTGAG

# Crear un vector con frecuencias de A, T, C y G únicamente
nucleotide_counts <- nucleotide_composition[c("A", "T", "C", "G")]
nucleotide_percent <- nucleotide_counts*100/length_sequence

# Crear un gráfico de barras usando barplot()
barplot(nucleotide_counts, 
        col = c("red", "orange", "green", "cyan"), 
        main = "Composición de nucleótidos del genoma de Wuhan-Hu-1", 
        ylab = "Frecuencia", 
        xlab = "Nucleótido")

barplot(nucleotide_percent, 
        col = c("red", "orange", "green", "cyan"), 
        main = "Composición de nucleótidos del genoma de Wuhan-Hu-1", 
        ylab = "Porcentaje", 
        xlab = "Nucleótido")

ncbi_id_2 <- "CM001378.3"

sequence_raw_2 <- entrez_fetch(db = "nuccore", id = ncbi_id_2, rettype = "fasta", retmode = "text")

# Procesar la secuencia descargada
sequence_lines_2 <- unlist(strsplit(sequence_raw_2, "\n"))
sequence_only_2 <- paste(sequence_lines_2[-1], collapse = "")

# Convertir la secuencia en un objeto de Biostrings
sequence_2 <- DNAString(sequence_only_2)

substring(sequence_2, 1, 100)

## 100-letter DNAString object
## seq: ATCAGGAGATCTAGATGCCTGGAGAGGAGTGGAGAA...AATATGTATTTCTCCTTCGAATATAAAAAAAGTAAA

length_sequence_2 <- length(sequence_2)
cat("El tamaño de la secuencia es:", length_sequence_2, "nucleótidos.")

## El tamaño de la secuencia es: 242100913 nucleótidos.

%Manejo de bases de datos

if (!require("ggplot2")) install.packages("ggplot2")
if (!require("dplyr")) install.packages("dplyr")

## Cargando paquete requerido: dplyr

## 
## Adjuntando el paquete: 'dplyr'

## The following object is masked from 'package:seqinr':
## 
##     count

## The following objects are masked from 'package:Biostrings':
## 
##     collapse, intersect, setdiff, setequal, union

## The following object is masked from 'package:GenomeInfoDb':
## 
##     intersect

## The following object is masked from 'package:XVector':
## 
##     slice

## The following objects are masked from 'package:IRanges':
## 
##     collapse, desc, intersect, setdiff, slice, union

## The following objects are masked from 'package:S4Vectors':
## 
##     first, intersect, rename, setdiff, setequal, union

## The following objects are masked from 'package:BiocGenerics':
## 
##     combine, intersect, setdiff, setequal, union

## The following object is masked from 'package:generics':
## 
##     explain

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

if (!require("maps")) install.packages("maps")

## Cargando paquete requerido: maps

if (!require("viridis")) install.packages("viridis")

## Cargando paquete requerido: viridis

## Cargando paquete requerido: viridisLite

## 
## Adjuntando el paquete: 'viridis'

## The following object is masked from 'package:maps':
## 
##     unemp

if (!require("seqinr")) install.packages("seqinr")
if (!require("adegenet")) install.packages("adegenet")

## Cargando paquete requerido: adegenet

## Cargando paquete requerido: ade4

## 
## Adjuntando el paquete: 'ade4'

## The following object is masked from 'package:BiocGenerics':
## 
##     score

## 
##    /// adegenet 2.1.11 is loaded ////////////
## 
##    > overview: '?adegenet'
##    > tutorials/doc/questions: 'adegenetWeb()' 
##    > bug reports/feature requests: adegenetIssues()

if (!require("ape")) install.packages("ape")

## Cargando paquete requerido: ape

## 
## Adjuntando el paquete: 'ape'

## The following object is masked from 'package:dplyr':
## 
##     where

## The following objects are masked from 'package:seqinr':
## 
##     as.alignment, consensus

## The following object is masked from 'package:Biostrings':
## 
##     complement

if (!require("BiocManager")) install.packages("BiocManager")

## Cargando paquete requerido: BiocManager

BiocManager::install("ggtree")

## 'getOption("repos")' replaces Bioconductor standard repositories, see
## 'help("repositories", package = "BiocManager")' for details.
## Replacement repositories:
##     CRAN: https://cloud.r-project.org/

## Bioconductor version 3.21 (BiocManager 1.30.25), R 4.5.0 (2025-04-11 ucrt)

## Warning: package(s) not installed when version(s) same as or greater than current; use
##   `force = TRUE` to re-install: 'ggtree'

## Installation paths not writeable, unable to update packages
##   path: C:/Program Files/R/R-4.5.0/library
##   packages:
##     lattice, mgcv

## Old packages: 'cli'

BiocManager::install("DECIPHER")

## 'getOption("repos")' replaces Bioconductor standard repositories, see
## 'help("repositories", package = "BiocManager")' for details.
## Replacement repositories:
##     CRAN: https://cloud.r-project.org/

## Bioconductor version 3.21 (BiocManager 1.30.25), R 4.5.0 (2025-04-11 ucrt)

## Warning: package(s) not installed when version(s) same as or greater than current; use
##   `force = TRUE` to re-install: 'DECIPHER'

## Installation paths not writeable, unable to update packages
##   path: C:/Program Files/R/R-4.5.0/library
##   packages:
##     lattice, mgcv

## Old packages: 'cli'

BiocManager::install("ggmsa")

## 'getOption("repos")' replaces Bioconductor standard repositories, see
## 'help("repositories", package = "BiocManager")' for details.
## Replacement repositories:
##     CRAN: https://cloud.r-project.org/

## Bioconductor version 3.21 (BiocManager 1.30.25), R 4.5.0 (2025-04-11 ucrt)

## Warning: package(s) not installed when version(s) same as or greater than current; use
##   `force = TRUE` to re-install: 'ggmsa'

## Installation paths not writeable, unable to update packages
##   path: C:/Program Files/R/R-4.5.0/library
##   packages:
##     lattice, mgcv

## Old packages: 'cli'

library(ggplot2)
library(dplyr)
library(maps)
library(viridis)
library(seqinr)
library(adegenet)
library(ape)
library(ggtree)

## ggtree v3.16.0 Learn more at https://yulab-smu.top/contribution-tree-data/
## 
## Please cite:
## 
## Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods
## for mapping and visualizing associated data on phylogeny using ggtree.
## Molecular Biology and Evolution. 2018, 35(12):3041-3043.
## doi:10.1093/molbev/msy194

## 
## Adjuntando el paquete: 'ggtree'

## The following object is masked from 'package:ape':
## 
##     rotate

## The following object is masked from 'package:Biostrings':
## 
##     collapse

## The following object is masked from 'package:IRanges':
## 
##     collapse

## The following object is masked from 'package:S4Vectors':
## 
##     expand

library(DECIPHER)
library(ggmsa)

## Registered S3 methods overwritten by 'ggalt':
##   method                  from   
##   grid.draw.absoluteGrob  ggplot2
##   grobHeight.absoluteGrob ggplot2
##   grobWidth.absoluteGrob  ggplot2
##   grobX.absoluteGrob      ggplot2
##   grobY.absoluteGrob      ggplot2

## ggmsa v1.14.0  Document: http://yulab-smu.top/ggmsa/
## 
## If you use ggmsa in published research, please cite:
## L Zhou, T Feng, S Xu, F Gao, TT Lam, Q Wang, T Wu, H Huang, L Zhan, L Li, Y Guan, Z Dai*, G Yu* ggmsa: a visual exploration tool for multiple sequence alignment and associated data. Briefings in Bioinformatics. DOI:10.1093/bib/bbac222

Parte 1: Visualización de la pandemia de COVID-19 Carga y exploración de los datos Los datos provienen del repositorio abierto de la pandemia de COVID-19 procesados por DataHub:

covid_19_data <- read.csv(url("https://raw.githubusercontent.com/datasets/covid-19/master/data/time-series-19-covid-combined.csv"))
head(covid_19_data)

##         Date Country.Region Province.State Confirmed Recovered Deaths
## 1 2020-01-22    Afghanistan                        0         0      0
## 2 2020-01-23    Afghanistan                        0         0      0
## 3 2020-01-24    Afghanistan                        0         0      0
## 4 2020-01-25    Afghanistan                        0         0      0
## 5 2020-01-26    Afghanistan                        0         0      0
## 6 2020-01-27    Afghanistan                        0         0      0

dim(covid_19_data)

## [1] 231744      6

length(unique(covid_19_data[,2]))

## [1] 198

$2. Casos en México Vamos a filtrar los datos para México y graficar la evolución de casos confirmados y decesos.

mexico <- subset(covid_19_data, Country.Region == "Mexico")

mexico$Date <- as.Date(mexico$Date, "%Y-%m-%d")

ggplot(mexico, aes(Date, Confirmed, label=Confirmed))+
  geom_point() +
  geom_line() +
   geom_text(hjust = "right", size = 3) +
  labs(title = "Casos confirmados en México", x = "Fecha", y = "Casos Confirmados")

Untitled

2025-04-03