Variables Cualitativas y cuantitativas

Variables categoricas se dividen en 2 ordinal y nominales

Nominales= no tienen ningun orden ordinales= respetan un orden

Variables cuantitativas

se expresan numericamente se dividen en 2: discretas y continuas

library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(dslabs)
library(viridisLite)
library(RColorBrewer)
library(ggplot2)
library(reshape2)
install.packages("reshape2")
## Warning: package 'reshape2' is in use and will not be installed

Dataset

data("murders")

Graficos

Diagramas de barras

table <- table(murders$region)
valores<- c(9,17,12,13)
nlevels(murders$region)
## [1] 4
table
## 
##     Northeast         South North Central          West 
##             9            17            12            13
barplot(table, main = "Garfico de barras de las Regiones ", ylab = "Frecuencia", xlab = "Regiones", ylim = c(0,20), col = "red")
text(x = seq_along(valores), y = valores + 1, labels = valores, cex = 0.8, col = "black")

Diagrama de Torta

#pie(table, labels = levels(murders$region),col = viridis(10),main = "Garfico de Torta Regiones ", border = viridis(10))


valores <- c(17,9,12,13)
porcentaje= valores/sum(valores)*100

pie(table,paste(labels=levels(murders$region),round(porcentaje,2),"%"),col = viridis(10),main = "Garfico de Torta Regiones ", border = viridis(10),radius = 0.55)
legend("topleft",legend=names(table),cex= 0.85, fill = viridis(10),)

#help("legend")

#ordenar de mayor a menor

region <- factor(murders$region,levels =c("South","West", "North Central","Northeast"))
table1<- table(region)
barplot(table1)

#Histograma

murders<- murders %>% mutate(murders, rate=total/population*100000)
head(murders)
##        state abb region population total     rate
## 1    Alabama  AL  South    4779736   135 2.824424
## 2     Alaska  AK   West     710231    19 2.675186
## 3    Arizona  AZ   West    6392017   232 3.629527
## 4   Arkansas  AR  South    2915918    93 3.189390
## 5 California  CA   West   37253956  1257 3.374138
## 6   Colorado  CO   West    5029196    65 1.292453
hist(murders$rate, main = "Histograma de la Tasa de Asesinatos",ylim=c(0,25), xlim =c(0,20),col =brewer.pal(5,"Set1"), xlab = "Tasa de Asesinatos",breaks = 17,xaxt="n")
axis(1,at=seq(0,17,by=1),las=1)

#Boxplot

boxplot(murders$rate, col="Blue",ylab="Tasa de asesinatos",outline=FALSE,main="Boxplot",ylim=c(0,6))

#Adicionar la media

points(mean(murders$rate),col="black",pch=20)
text(paste(" ", round(mean(murders$rate), 2)),x=1.1,y=2.3) 

summary(murders$rate)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.3196  1.2526  2.6871  2.7791  3.3861 16.4528
# Crear el boxplot
boxplot(murders$rate, col="blue", ylab="Tasa de asesinatos", outline=FALSE,
        main="Boxplot", ylim=c(0,6))

# Adicionar la media al grƔfico
mean_rate <- mean(murders$rate)
points(mean_rate, col="red", pch=20)
text(mean_rate, 2.3, paste("Media:", round(mean_rate, 2)), pos=4, col="black")

# Obtener el resumen estadĆ­stico
summary_stats <- summary(murders$rate)

# Adicionar el resumen estadƭstico al grƔfico
text(x=1, y=5, labels=paste("Min:", round(summary_stats[1], 2)), pos=4, cex=0.8)
text(x=1, y=4.5, labels=paste("1Q:", round(summary_stats[2], 2)), pos=4, cex=0.8)
text(x=1, y=4, labels=paste("Mediana:", round(summary_stats[3], 2)), pos=4, cex=0.8)
text(x=1, y=3.5, labels=paste("Media:", round(summary_stats[4], 2)), pos=4, cex=0.8)
text(x=1, y=3, labels=paste("3Q:", round(summary_stats[5], 2)), pos=4, cex=0.8)
text(x=1, y=2.5, labels=paste("Max:", round(summary_stats[6], 2)), pos=4, cex=0.8)

## COMPARACION DE VARIABLES # Diagrama de dispercion

x<- murders$population/10^6
y<- murders$total

plot(x,y,main="Total vs Population",col=murders$region,pch=20,xlab="Population /10^6,",ylab="population")
legend("bottomright",legend = levels(murders$region), fill = unique(murders$region))

#bOXplot

boxplot(rate~region,data=murders,col=brewer.pal(9,"Set1"),outline=FALSE,ylim=c(0,6),main="Boxplot")
abline(h=mean(murders$rate),col="blue",lwd=2)
text(paste("",round(mean(murders$rate),2)),x=0.5,y=3,col="blue")

#Varios Graficos

par(mfrow=c(1,2))

plot(x,y,main="Total vs Population",col=murders$region,pch=20,xlab="Population /10^6,",ylab="population")
legend("bottomright",legend = levels(murders$region), fill = unique(murders$region))

hist(murders$rate, main = "Histograma de la Tasa de Asesinatos",ylim=c(0,25), xlim =c(0,20),col =brewer.pal(5,"Set1"), xlab = "Tasa de Asesinatos",breaks = 17,xaxt="n")
axis(1,at=seq(0,17,by=1),las=1)

region <- factor(murders$region,levels =c("South","West", "North Central","Northeast"))
table1<- table(region)
barplot(table1)

boxplot(rate~region,data=murders,col=brewer.pal(9,"Set1"),outline=FALSE,ylim=c(0,6),main="Boxplot")
abline(h=mean(murders$rate),col="blue",lwd=2)
text(paste("",round(mean(murders$rate),2)),x=0.5,y=3,col="blue")