This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
#Variables Culitativas categorias
#ordinales #nominaes
#cuantitativas #Continuos - resultado de un proceso de medicion #discretas resulta de un proceso de conteo
#clasificar su dataset las variables
library(dslabs)
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(viridis)
## Cargando paquete requerido: viridisLite
library(viridisLite)
library(RColorBrewer)
data("murders")
Descripcion data set
#total cuantitativa discreta
#Descripcion
#variables cualitativas nominales: state,abb,region
#variables cuantitativas discretas: population, total
#daigrama de barras se debe hacer un conteo
tabla_region<- table(murders$region)
tabla_region
##
## Northeast South North Central West
## 9 17 12 13
#Diagram de barras
#agregar titulo main
#agregar titulos de etiquetas ylab xlab
# limites de cantidad ylimp
# colores viridis
barplot(tabla_region, main ="Grafico de Barras",ylab="Frecuencia",xlab="Region",ylim=c(0,20),
col =c("blue","green","red","yellow"))
#oner mas bonito
#color
barplot(tabla_region,
main = "Gráfico de Barras",
ylab = "Frecuencia",
xlab = "Región",
ylim = c(0, 20),
col = viridis(4),
border=viridis(4))
grid(nx=NA,ny=NULL,col="lightgray",lty="dotted",lwd = par("lwd"))
#diagrama circular
tabla_region <- table(murders$region)
# Calcular porcentajes
porcentajes <- round(tabla_region * 100 / sum(tabla_region), 1)
# Definir una paleta de colores (asegúrate de tener suficientes colores)
colores <- brewer.pal(length(porcentajes), "Set3")
# Crear el diagrama circular
pie(porcentajes,
labels = paste(porcentajes, "%"),
main = "Diagrama Circular",
col = colores) # Asignar colores a cada segmento
# Añadir la leyenda
legend("topright",
legend = names(porcentajes),
fill = colores, # Asegurar que los colores coincidan
cex = 0.6)
#orden especifico
#ordenar
region<-factor(murders$region,levels=c("South","West","North Central","Northeast"))
tabla_region1 <-table(region)
barplot(tabla_region1,
main = "Gráfico de Barras",
ylab = "Frecuencia",
xlab = "Región",
ylim = c(0, 20),
col = viridis(4),
border=viridis(4))
grid(nx=NA,ny=NULL,col="lightgray",lty="dotted",lwd = par("lwd"))
#variables cuantitativas
tabla_population<- table(murders$population)
tabla_population
##
## 563626 601723 625741 672591 710231 814180 897934 989415
## 1 1 1 1 1 1 1 1
## 1052567 1316470 1328361 1360301 1567582 1826341 1852994 2059179
## 1 1 1 1 1 1 1 1
## 2700551 2763885 2853118 2915918 2967297 3046355 3574097 3751351
## 1 1 1 1 1 1 1 1
## 3831074 4339367 4533372 4625364 4779736 5029196 5303925 5686986
## 1 1 1 1 1 1 1 1
## 5773552 5988927 6346105 6392017 6483802 6547629 6724540 8001024
## 1 1 1 1 1 1 1 1
## 8791894 9535483 9883640 9920000 11536504 12702379 12830632 19378102
## 1 1 1 1 1 1 1 1
## 19687653 25145561 37253956
## 1 1 1
barplot(tabla_population,
main = "Gráfico de Poblacion",
ylab = "Frecuencia",
xlab = "Región",
ylim = c(0, 20),
col = viridis(4),
border=viridis(4))
grid(nx=NA,ny=NULL,col="lightgray",lty="dotted",lwd = par("lwd"))
#histograma population #AGREGAR LINEA DE DSITRIBUCION
hist(murders$population ,breaks = "Sturges",
xlab = "Poplation in US",col = viridis(4),
)
# Crear el histograma con densidad superpuesta
hist(murders$population,
breaks = "Sturges",
probability = TRUE,
main = "Histograma de Population Densidad",
xlab = "Population in US",
col = brewer.pal(8, "Set1"), # Usa un solo color de la paleta
border = brewer.pal(8, "Set1") # Usa el mismo color para los bordes
)
# Añadir la línea de densidad
lines(density(murders$population), col = "black", lwd = 2)
#interpretar este grafico
summary(murders$population)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 563626 1696962 4339367 6075769 6636084 37253956
#Analisis #la densidad mas alta de la poblacion esta entre 500.000 mil y se mantiene baja la densidad apartir de los 2’000.000 de poblacion
hist(murders$population,
breaks = "Sturges",
probability = TRUE,
main = "Histograma de Population",
xlab = "Population in US",
col = brewer.pal(10, "Set1"),
border = brewer.pal(10, "Set1")
)
## Warning in brewer.pal(10, "Set1"): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning in brewer.pal(10, "Set1"): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
lines(density(murders$population), col = "black", lwd = 2)
#agregar dos lineas verticales 1 ala media y otra al promedio #la media de poblacion esta en menos de 500.000 millones #el promedio de la poblacion se encuentra entre 500.000 y 1.000.000
summary_stats <- summary(murders$population)
promedio <- summary_stats["Mean"]
mediana <- summary_stats["Median"]
hist(murders$population ,breaks = "Sturges",
main="Histograma de Population frecuencia",
xlab = "Poplation in US",col = viridis(7),
border = viridis(7),
ylim = c(0,50),
)
abline(v = mediana, col = "blue", lwd = 2, lty = 2)
abline(v = promedio, col = "yellow", lwd = 2, lty = 2)
murders
## state abb region population total
## 1 Alabama AL South 4779736 135
## 2 Alaska AK West 710231 19
## 3 Arizona AZ West 6392017 232
## 4 Arkansas AR South 2915918 93
## 5 California CA West 37253956 1257
## 6 Colorado CO West 5029196 65
## 7 Connecticut CT Northeast 3574097 97
## 8 Delaware DE South 897934 38
## 9 District of Columbia DC South 601723 99
## 10 Florida FL South 19687653 669
## 11 Georgia GA South 9920000 376
## 12 Hawaii HI West 1360301 7
## 13 Idaho ID West 1567582 12
## 14 Illinois IL North Central 12830632 364
## 15 Indiana IN North Central 6483802 142
## 16 Iowa IA North Central 3046355 21
## 17 Kansas KS North Central 2853118 63
## 18 Kentucky KY South 4339367 116
## 19 Louisiana LA South 4533372 351
## 20 Maine ME Northeast 1328361 11
## 21 Maryland MD South 5773552 293
## 22 Massachusetts MA Northeast 6547629 118
## 23 Michigan MI North Central 9883640 413
## 24 Minnesota MN North Central 5303925 53
## 25 Mississippi MS South 2967297 120
## 26 Missouri MO North Central 5988927 321
## 27 Montana MT West 989415 12
## 28 Nebraska NE North Central 1826341 32
## 29 Nevada NV West 2700551 84
## 30 New Hampshire NH Northeast 1316470 5
## 31 New Jersey NJ Northeast 8791894 246
## 32 New Mexico NM West 2059179 67
## 33 New York NY Northeast 19378102 517
## 34 North Carolina NC South 9535483 286
## 35 North Dakota ND North Central 672591 4
## 36 Ohio OH North Central 11536504 310
## 37 Oklahoma OK South 3751351 111
## 38 Oregon OR West 3831074 36
## 39 Pennsylvania PA Northeast 12702379 457
## 40 Rhode Island RI Northeast 1052567 16
## 41 South Carolina SC South 4625364 207
## 42 South Dakota SD North Central 814180 8
## 43 Tennessee TN South 6346105 219
## 44 Texas TX South 25145561 805
## 45 Utah UT West 2763885 22
## 46 Vermont VT Northeast 625741 2
## 47 Virginia VA South 8001024 250
## 48 Washington WA West 6724540 93
## 49 West Virginia WV South 1852994 27
## 50 Wisconsin WI North Central 5686986 97
## 51 Wyoming WY West 563626 5