Variable categóricas y se dividen en dos: ordinales y nominales
Expresar númericamente y se dividen en dos: discretas y continuas
#install.packages("dslabs")
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(dslabs)
library(viridisLite)
library(RColorBrewer)
# Dataset
data("murders")
levels(murders$region) #Cualitativa nominal
## [1] "Northeast" "South" "North Central" "West"
tabla<-table(murders$region)
tabla
##
## Northeast South North Central West
## 9 17 12 13
bp<-barplot(tabla, main="Gráfico de barras", ylab = "Frecuencia", xlab = "Regiones",ylim = c(0,20),col = "blue")
text(bp,tabla+2,format(tabla),cex = 1)
#help("barplot")
pie(tabla, labels=row.names(tabla), col=rocket(4), main="Diagrama circular", border = rocket(4))
legend("bottomright",legend=names(tabla),cex=0.75, fill = rocket(4))
# Crear tabla de porcentajes y gráficar mostrando el dato # Crea tabla
con los porcentajes por valor en las regiones
region<-c("Northeast","South","North Central","West")
x<-c(9,17,12,13)
df_region_x<-data.frame(region,x)
#df_region_x
porc<-function(x){
return(round(x*1.96))
}
porcentaje<-porc(x)
df_region_x$porcentaje<-porcentaje
df_region_x
## region x porcentaje
## 1 Northeast 9 18
## 2 South 17 33
## 3 North Central 12 24
## 4 West 13 25
# Gráficar mostrando el dato
pie(x,labels = paste0((df_region_x$porcentaje),"%"), col = viridis(4), main = "Diagrama circular", border = rocket(4))
legend("bottomright",legend=names(tabla),cex=0.75, fill = viridis(4))
Variables ordinales que requieren un orden especÃfico.
region<-factor(murders$region,levels=c("South","West","North Central","Northeast"))
tabla1<-table(region)
barplot(tabla1)
# Histograma
murders<-murders %>% mutate(murders,rate=total/population*100000)
hist(murders$rate, main = "Histograma de la tasa de aesinatos en EEUU por arma de fuego",xlim = c(0,20),ylim =c(0,25), col=brewer.pal(9,"Set1"),xlab = "Tasa de asesinatos", labels=TRUE,border=brewer.pal(3,"Paired"))
axis(1,at=seq(0,25,by=1)) #Modificar la escala del eje x en R
# Boxplot
boxplot(murders$rate, col="Blue",ylab="Tasa de asesinatos",outline=FALSE,main="Boxplot",ylim=c(0,6))
#Adicionar la media
points(mean(murders$rate),col="black",pch=20)
text(paste(" ", round(mean(murders$rate), 2)),x=1.1,y=2.3)
#Agregar los puntos que se muestran al ejecutar summary(murders$rate) en el boxplot.
stripchart(summary(murders$rate),vertical = TRUE, method = "jitter", pch = 19, add = TRUE, col = "red")
summary(murders$rate)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.3196 1.2526 2.6871 2.7791 3.3861 16.4528
#Diagrama de dispersión total vs population
x<-murders$population/10^6
y<-murders$total
plot(x,y, main = "Total vs Population", col=murders$region,pch=20, xlab="Populatio/10^6", ylab = "Populatio")
legend("bottomright",legend = levels(murders$region),fill = unique(murders$region))
#Boxplot rate vs region
boxplot(rate~region,data=murders,col=brewer.pal(9,"Set1"),outline=FALSE,ylim=c(0,6))
abline(h=mean(murders$rate),col="blue",lwd=2)
text(paste("",round(mean(murders$rate),2)),x=0.5,y=3,col="blue",cex=0.5)
# Varios gráficos
par(mfrow=c(1,2))
plot(x,y, main = "Total vs Population", col=murders$region,pch=20, xlab="Populatio/10^6", ylab = "Populatio")
legend("bottomright",legend = levels(murders$region),fill = unique(murders$region))
boxplot(rate~region,data=murders,col=brewer.pal(9,"Set1"),outline=FALSE,ylim=c(0,6))
abline(h=mean(murders$rate),col="blue",lwd=2)
text(paste("",round(mean(murders$rate),2)),x=0.5,y=3,col="blue",cex=0.5)