R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

#Variables Culitativas categorias

#ordinales #nominaes

#cuantitativas #Continuos - resultado de un proceso de medicion #discretas resulta de un proceso de conteo

#clasificar su dataset las variables

library(dslabs)
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(viridis)
## Cargando paquete requerido: viridisLite
library(viridisLite)
library(RColorBrewer)
data("murders")

Descripcion data set

#total   cuantitativa discreta
#Descripcion

#variables cualitativas nominales: state,abb,region

#variables cuantitativas discretas: population, total

#daigrama de barras  se debe hacer un conteo

tabla_region<- table(murders$region)
tabla_region
## 
##     Northeast         South North Central          West 
##             9            17            12            13
#Diagram de barras
#agregar titulo main
#agregar titulos de etiquetas ylab   xlab
#  limites de cantidad  ylimp
# colores viridis
barplot(tabla_region, main ="Grafico de Barras",ylab="Frecuencia",xlab="Region",ylim=c(0,20),
        col =c("blue","green","red","yellow"))

#oner mas bonito
#color  
barplot(tabla_region, 
        main = "Gráfico de Barras",
        ylab = "Frecuencia", 
        xlab = "Región", 
        ylim = c(0, 20),
        col = viridis(4),
        border=viridis(4))

grid(nx=NA,ny=NULL,col="lightgray",lty="dotted",lwd = par("lwd"))

#diagrama circular

tabla_region <- table(murders$region)

# Calcular porcentajes
porcentajes <- round(tabla_region * 100 / sum(tabla_region), 1)

# Definir una paleta de colores (asegúrate de tener suficientes colores)
colores <- brewer.pal(length(porcentajes), "Set3")

# Crear el diagrama circular
pie(porcentajes, 
    labels = paste(porcentajes, "%"), 
    main = "Diagrama Circular", 
    col = colores) # Asignar colores a cada segmento

# Añadir la leyenda
legend("topright", 
       legend = names(porcentajes), 
       fill = colores,  # Asegurar que los colores coincidan
       cex = 0.6)

#orden especifico

#ordenar
region<-factor(murders$region,levels=c("South","West","North Central","Northeast"))
tabla_region1 <-table(region)

               
barplot(tabla_region1, 
        main = "Gráfico de Barras",
        ylab = "Frecuencia", 
        xlab = "Región", 
        ylim = c(0, 20),
        col = viridis(4),
        border=viridis(4))

grid(nx=NA,ny=NULL,col="lightgray",lty="dotted",lwd = par("lwd"))

#variables cuantitativas

tabla_population<- table(murders$population)
tabla_population
## 
##   563626   601723   625741   672591   710231   814180   897934   989415 
##        1        1        1        1        1        1        1        1 
##  1052567  1316470  1328361  1360301  1567582  1826341  1852994  2059179 
##        1        1        1        1        1        1        1        1 
##  2700551  2763885  2853118  2915918  2967297  3046355  3574097  3751351 
##        1        1        1        1        1        1        1        1 
##  3831074  4339367  4533372  4625364  4779736  5029196  5303925  5686986 
##        1        1        1        1        1        1        1        1 
##  5773552  5988927  6346105  6392017  6483802  6547629  6724540  8001024 
##        1        1        1        1        1        1        1        1 
##  8791894  9535483  9883640  9920000 11536504 12702379 12830632 19378102 
##        1        1        1        1        1        1        1        1 
## 19687653 25145561 37253956 
##        1        1        1
barplot(tabla_population, 
        main = "Gráfico de Poblacion",
        ylab = "Frecuencia", 
        xlab = "Región", 
        ylim = c(0, 20),
        col = viridis(4),
        border=viridis(4))

grid(nx=NA,ny=NULL,col="lightgray",lty="dotted",lwd = par("lwd"))

#histograma population #AGREGAR LINEA DE DSITRIBUCION

hist(murders$population ,breaks = "Sturges",
     xlab = "Poplation in US",col = viridis(4),
     
     
     )

# Crear el histograma con densidad superpuesta
hist(murders$population,
     breaks = "Sturges",
     probability = TRUE,
     main = "Histograma de Population Densidad",
     xlab = "Population in US",
     col = brewer.pal(8, "Set1"),  # Usa un solo color de la paleta
     border = brewer.pal(8, "Set1")  # Usa el mismo color para los bordes
)

# Añadir la línea de densidad
lines(density(murders$population), col = "black", lwd = 2)

#interpretar este grafico

summary(murders$population)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##   563626  1696962  4339367  6075769  6636084 37253956

#Analisis #la densidad mas alta de la poblacion esta entre 500.000 mil y se mantiene baja la densidad apartir de los 2’000.000 de poblacion

hist(murders$population,
     breaks = "Sturges",
     probability = TRUE,
     main = "Histograma de Population",
     xlab = "Population in US",
     col = brewer.pal(10, "Set1"),
     border = brewer.pal(10, "Set1")
)
## Warning in brewer.pal(10, "Set1"): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning in brewer.pal(10, "Set1"): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
lines(density(murders$population), col = "black", lwd = 2)

#agregar dos lineas verticales 1 ala media y otra al promedio #la media de poblacion esta en menos de 500.000 millones #el promedio de la poblacion se encuentra entre 500.000 y 1.000.000

summary_stats <- summary(murders$population)
promedio <- summary_stats["Mean"]
mediana <- summary_stats["Median"]


hist(murders$population ,breaks = "Sturges",
     main="Histograma de Population frecuencia",
     xlab = "Poplation in US",col = viridis(7),
     border = viridis(7),
     ylim = c(0,50),
     
     
     )
abline(v = mediana, col = "blue", lwd = 2, lty = 2) 
abline(v = promedio, col = "yellow", lwd = 2, lty = 2)

murders
##                   state abb        region population total
## 1               Alabama  AL         South    4779736   135
## 2                Alaska  AK          West     710231    19
## 3               Arizona  AZ          West    6392017   232
## 4              Arkansas  AR         South    2915918    93
## 5            California  CA          West   37253956  1257
## 6              Colorado  CO          West    5029196    65
## 7           Connecticut  CT     Northeast    3574097    97
## 8              Delaware  DE         South     897934    38
## 9  District of Columbia  DC         South     601723    99
## 10              Florida  FL         South   19687653   669
## 11              Georgia  GA         South    9920000   376
## 12               Hawaii  HI          West    1360301     7
## 13                Idaho  ID          West    1567582    12
## 14             Illinois  IL North Central   12830632   364
## 15              Indiana  IN North Central    6483802   142
## 16                 Iowa  IA North Central    3046355    21
## 17               Kansas  KS North Central    2853118    63
## 18             Kentucky  KY         South    4339367   116
## 19            Louisiana  LA         South    4533372   351
## 20                Maine  ME     Northeast    1328361    11
## 21             Maryland  MD         South    5773552   293
## 22        Massachusetts  MA     Northeast    6547629   118
## 23             Michigan  MI North Central    9883640   413
## 24            Minnesota  MN North Central    5303925    53
## 25          Mississippi  MS         South    2967297   120
## 26             Missouri  MO North Central    5988927   321
## 27              Montana  MT          West     989415    12
## 28             Nebraska  NE North Central    1826341    32
## 29               Nevada  NV          West    2700551    84
## 30        New Hampshire  NH     Northeast    1316470     5
## 31           New Jersey  NJ     Northeast    8791894   246
## 32           New Mexico  NM          West    2059179    67
## 33             New York  NY     Northeast   19378102   517
## 34       North Carolina  NC         South    9535483   286
## 35         North Dakota  ND North Central     672591     4
## 36                 Ohio  OH North Central   11536504   310
## 37             Oklahoma  OK         South    3751351   111
## 38               Oregon  OR          West    3831074    36
## 39         Pennsylvania  PA     Northeast   12702379   457
## 40         Rhode Island  RI     Northeast    1052567    16
## 41       South Carolina  SC         South    4625364   207
## 42         South Dakota  SD North Central     814180     8
## 43            Tennessee  TN         South    6346105   219
## 44                Texas  TX         South   25145561   805
## 45                 Utah  UT          West    2763885    22
## 46              Vermont  VT     Northeast     625741     2
## 47             Virginia  VA         South    8001024   250
## 48           Washington  WA          West    6724540    93
## 49        West Virginia  WV         South    1852994    27
## 50            Wisconsin  WI North Central    5686986    97
## 51              Wyoming  WY          West     563626     5