Variables categóricas y se dividen en dos: ordinales y nominales
Expresar numéricamente y se dividen en dos: discretas y continuas
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(dslabs)
library(viridisLite)
library(RColorBrewer)
data("murders")
levels(murders$region) #Cualitativa nominal
## [1] "Northeast" "South" "North Central" "West"
tabla <- table(murders$region)
tabla
##
## Northeast South North Central West
## 9 17 12 13
barplot(tabla, main = "Diagrama de barras", ylab = "Frecuencia", xlab = "Regiones",ylim = c(0,20),col="blue")
#help("barplot")
pie(tabla, labels = row.names(tabla),col = rocket(4), main = "Diagrama circular", border = rocket(4))
legend("bottomright",legend = names(tabla),cex = 0.75,fill=rocket(4))
Variables ordinales que requieren un orden especÃfico.
region<-factor(murders$region,levels=c("South","West","North Central","Northeast"))
tabla1<-table(region)
barplot(tabla1)
murders <- murders %>% mutate(murders,rate= total/population*100000)
hist(murders$rate,main = "Histograma de la tasa de asesinatos en EEUU",xlim=c(0,20),ylim=c(0,25),col = brewer.pal(3,"Paired"),xlab = "Tasa de asesinatos",labels=TRUE,border = brewer.pal(3,"Paired"))
hist(murders$population,breaks = "Sturges",
main="Histogram of Population",
xlab = "Population in US",col = brewer.pal(10,"Set1"),
border=brewer.pal(10,"Set1"),ylim=c(0,40),labels = TRUE)
## Warning in brewer.pal(10, "Set1"): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning in brewer.pal(10, "Set1"): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
hist(murders$population,breaks = "Sturges",probability = TRUE,
main="Histogram of Population Density",
xlab = "Population in US",col = brewer.pal(10,"Set1"),
border=brewer.pal(10,"Set1"),ylim = c(0,12e-08))
## Warning in brewer.pal(10, "Set1"): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning in brewer.pal(10, "Set1"): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
lines(density(murders$population), col = "black", lwd = 2)
boxplot(murders$rate, col="Blue",ylab="Tasa de asesinatos",outline=FALSE,main="Boxplot",ylim=c(0,6))
#Adicionar la media
points(mean(murders$rate),col="black",pch=20)
text(paste(" ", round(mean(murders$rate), 2)),x=1.1,y=2.3)
summary(murders$rate)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.3196 1.2526 2.6871 2.7791 3.3861 16.4528
#Diagrama de dispersión total vs population
x<- murders$population/10^6
y<-murders$total
plot(x,y,main="Total vs Population",col=murders$region,pch=20,xlab = "Population/10^6",ylab="Total")
legend("bottomright",legend=levels(murders$region),fill = unique(murders$region),cex=0.75)
#Boxplot rates vs region
boxplot(rate~region,data=murders,col=brewer.pal(9,"Set1"),outline=FALSE,ylim=c(0,6),main="Boxplot")
abline(h=mean(murders$rate),col="blue",lwd=2)
text(paste("",round(mean(murders$rate),2)),x=0.5,y=3,col="blue")
# Varios gráficos
par(mfrow=c(1,2))
plot(x,y,main="Total vs Population",col=murders$region,pch=20,xlab = "Population/10^6",ylab="Total")
legend("bottomright",legend=levels(murders$region),fill = unique(murders$region),cex=0.75)
boxplot(rate~region,data=murders,col=brewer.pal(9,"Set1"),outline=FALSE,ylim=c(0,6),main="Boxplot")
abline(h=mean(murders$rate),col="blue",lwd=2)
text(paste("",round(mean(murders$rate),2)),x=0.5,y=3,col="blue",cex=0.5)