Variables categoricas se dividen en 2 ordinal y nominales
Nominales= no tienen ningun orden ordinales= respetan un orden
se expresan numericamente se dividen en 2: discretas y continuas
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(dslabs)
library(viridisLite)
library(RColorBrewer)
library(ggplot2)
library(reshape2)
install.packages("reshape2")
## Warning: package 'reshape2' is in use and will not be installed
Dataset
data("murders")
table <- table(murders$region)
valores<- c(9,17,12,13)
nlevels(murders$region)
## [1] 4
table
##
## Northeast South North Central West
## 9 17 12 13
barplot(table, main = "Garfico de barras de las Regiones ", ylab = "Frecuencia", xlab = "Regiones", ylim = c(0,20), col = "red")
text(x = seq_along(valores), y = valores + 1, labels = valores, cex = 0.8, col = "black")
#pie(table, labels = levels(murders$region),col = viridis(10),main = "Garfico de Torta Regiones ", border = viridis(10))
valores <- c(17,9,12,13)
porcentaje= valores/sum(valores)*100
pie(table,paste(labels=levels(murders$region),round(porcentaje,2),"%"),col = viridis(10),main = "Garfico de Torta Regiones ", border = viridis(10),radius = 0.55)
legend("topleft",legend=names(table),cex= 0.85, fill = viridis(10),)
#help("legend")
#ordenar de mayor a menor
region <- factor(murders$region,levels =c("South","West", "North Central","Northeast"))
table1<- table(region)
barplot(table1)
#Histograma
murders<- murders %>% mutate(murders, rate=total/population*100000)
head(murders)
## state abb region population total rate
## 1 Alabama AL South 4779736 135 2.824424
## 2 Alaska AK West 710231 19 2.675186
## 3 Arizona AZ West 6392017 232 3.629527
## 4 Arkansas AR South 2915918 93 3.189390
## 5 California CA West 37253956 1257 3.374138
## 6 Colorado CO West 5029196 65 1.292453
hist(murders$rate, main = "Histograma de la Tasa de Asesinatos",ylim=c(0,25), xlim =c(0,20),col =brewer.pal(5,"Set1"), xlab = "Tasa de Asesinatos",breaks = 17,xaxt="n")
axis(1,at=seq(0,17,by=1),las=1)
#Boxplot
boxplot(murders$rate, col="Blue",ylab="Tasa de asesinatos",outline=FALSE,main="Boxplot",ylim=c(0,6))
#Adicionar la media
points(mean(murders$rate),col="black",pch=20)
text(paste(" ", round(mean(murders$rate), 2)),x=1.1,y=2.3)
summary(murders$rate)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.3196 1.2526 2.6871 2.7791 3.3861 16.4528
# Crear el boxplot
boxplot(murders$rate, col="blue", ylab="Tasa de asesinatos", outline=FALSE,
main="Boxplot", ylim=c(0,6))
# Adicionar la media al grƔfico
mean_rate <- mean(murders$rate)
points(mean_rate, col="red", pch=20)
text(mean_rate, 2.3, paste("Media:", round(mean_rate, 2)), pos=4, col="black")
# Obtener el resumen estadĆstico
summary_stats <- summary(murders$rate)
# Adicionar el resumen estadĆstico al grĆ”fico
text(x=1, y=5, labels=paste("Min:", round(summary_stats[1], 2)), pos=4, cex=0.8)
text(x=1, y=4.5, labels=paste("1Q:", round(summary_stats[2], 2)), pos=4, cex=0.8)
text(x=1, y=4, labels=paste("Mediana:", round(summary_stats[3], 2)), pos=4, cex=0.8)
text(x=1, y=3.5, labels=paste("Media:", round(summary_stats[4], 2)), pos=4, cex=0.8)
text(x=1, y=3, labels=paste("3Q:", round(summary_stats[5], 2)), pos=4, cex=0.8)
text(x=1, y=2.5, labels=paste("Max:", round(summary_stats[6], 2)), pos=4, cex=0.8)
## COMPARACION DE VARIABLES # Diagrama de dispercion
x<- murders$population/10^6
y<- murders$total
plot(x,y,main="Total vs Population",col=murders$region,pch=20,xlab="Population /10^6,",ylab="population")
legend("bottomright",legend = levels(murders$region), fill = unique(murders$region))
#bOXplot
boxplot(rate~region,data=murders,col=brewer.pal(9,"Set1"),outline=FALSE,ylim=c(0,6),main="Boxplot")
abline(h=mean(murders$rate),col="blue",lwd=2)
text(paste("",round(mean(murders$rate),2)),x=0.5,y=3,col="blue")
#Varios Graficos
par(mfrow=c(1,2))
plot(x,y,main="Total vs Population",col=murders$region,pch=20,xlab="Population /10^6,",ylab="population")
legend("bottomright",legend = levels(murders$region), fill = unique(murders$region))
hist(murders$rate, main = "Histograma de la Tasa de Asesinatos",ylim=c(0,25), xlim =c(0,20),col =brewer.pal(5,"Set1"), xlab = "Tasa de Asesinatos",breaks = 17,xaxt="n")
axis(1,at=seq(0,17,by=1),las=1)
region <- factor(murders$region,levels =c("South","West", "North Central","Northeast"))
table1<- table(region)
barplot(table1)
boxplot(rate~region,data=murders,col=brewer.pal(9,"Set1"),outline=FALSE,ylim=c(0,6),main="Boxplot")
abline(h=mean(murders$rate),col="blue",lwd=2)
text(paste("",round(mean(murders$rate),2)),x=0.5,y=3,col="blue")