library(summarytools)
library(plotrix)
library(gmodels)
library(agricolae)

1. variable cualitativa

Los datos corresponden a la marca de auto que poseen 50 personas.

Marca<-c("Chevrolet","Ford","Kia","Mazda","Renault","Renault","Chevrolet","Chevrolet","Mazda","Ford","Mazda","Chevrolet","Renault","Chevrolet","Ford","Chevrolet","Renault","Ford","Mazda","Chevrolet","Renault","Mazda","Chevrolet","Ford","Renault","Chevrolet","Renault","Mazda","Ford","Kia","Renault","Mazda","Mazda","Chevrolet","Kia","Renault","Chevrolet","Chevrolet","Mazda","Ford","Kia","Renault","Chevrolet","Chevrolet","Renault","Renault","Mazda","Ford","Chevrolet","Mazda")

1.1 Tabla de distribución de frecuencias.

T1<-table(Marca)
Personas<-c(T1)
Porcentaje<-round(prop.table(T1)*100,1)
t1<-sum(Personas)
t2<-sum(Porcentaje)
Tabla<-cbind(Personas,Porcentaje)
Suma<-cbind(t1,t2)
rownames(Suma)<-c("Total")
Tabla2<-rbind(Tabla,Suma)
Tabla2
##           Personas Porcentaje
## Chevrolet       15         30
## Ford             8         16
## Kia              4          8
## Mazda           11         22
## Renault         12         24
## Total           50        100

1.2 Utilizando el paquete summarytools

freq(Marca,style="rmarkdown", report.nas = FALSE, headings = FALSE)
## setting plain.ascii to FALSE
## 
## |        &nbsp; | Freq |      % | % Cum. |
## |--------------:|-----:|-------:|-------:|
## | **Chevrolet** |   15 |  30.00 |  30.00 |
## |      **Ford** |    8 |  16.00 |  46.00 |
## |       **Kia** |    4 |   8.00 |  54.00 |
## |     **Mazda** |   11 |  22.00 |  76.00 |
## |   **Renault** |   12 |  24.00 | 100.00 |
## |     **Total** |   50 | 100.00 | 100.00 |

1.3 Gráfico de barras

barpos<-barplot(T1,main="Marca del auto",col="lightblue")
barlabels(barpos,T1)

1.4 Gráfico de sectores (pie chart)

lab<-c("Chevrolet","Ford","Kia","Mazda","Renault")
lab<-paste(lab,Porcentaje)
lab <- paste(lab,"%",sep="")
pie(T1,labels=lab,radius=1,col=3:7)

2. Dos variables cualitativas (tablas de contingencia)

Se agrega la variable: forma de tenencia de la vivienda donde residen las personas.

Vivienda<-c("Propia","Arriendo","Familiar","Propia","Propia","Arriendo","Familiar","Arriendo","Arriendo","Arriendo","Familiar","Propia","Propia","Propia","Arriendo","Familiar","Propia","Propia","Arriendo","Arriendo","Familiar","Familiar","Propia","Propia","Arriendo","Arriendo","Arriendo","Familiar","Propia","Propia","Propia","Arriendo","Familiar","Familiar","Propia","Propia","Arriendo","Arriendo","Familiar","Propia","Arriendo","Familiar","Familiar","Propia","Arriendo","Propia","Arriendo","Familiar","Propia","Propia")

2.1 Tabla de contingencia

T2<-table(Marca,Vivienda)
addmargins(T2)
##            Vivienda
## Marca       Arriendo Familiar Propia Sum
##   Chevrolet        5        4      6  15
##   Ford             3        1      4   8
##   Kia              1        1      2   4
##   Mazda            4        5      2  11
##   Renault          4        2      6  12
##   Sum             17       13     20  50
addmargins(prop.table(T2)*100)
##            Vivienda
## Marca       Arriendo Familiar Propia Sum
##   Chevrolet       10        8     12  30
##   Ford             6        2      8  16
##   Kia              2        2      4   8
##   Mazda            8       10      4  22
##   Renault          8        4     12  24
##   Sum             34       26     40 100
t1<-prop.table(T2,1)
t2<-round(prop.table(T2,2),3)
addmargins(prop.table(T2,1),2)
##            Vivienda
## Marca        Arriendo  Familiar    Propia       Sum
##   Chevrolet 0.3333333 0.2666667 0.4000000 1.0000000
##   Ford      0.3750000 0.1250000 0.5000000 1.0000000
##   Kia       0.2500000 0.2500000 0.5000000 1.0000000
##   Mazda     0.3636364 0.4545455 0.1818182 1.0000000
##   Renault   0.3333333 0.1666667 0.5000000 1.0000000

2.2 utilizando el paquete summarytools

ctable(x ="Marca", 
       y = "Vivienda", 
       prop = "r")
## Cross-Tabulation, Row Proportions  
## Marca * Vivienda  
## 
## ------- ---------- ------------ ------------
##           Vivienda     Vivienda        Total
##   Marca                                     
##   Marca              1 (100.0%)   1 (100.0%)
##   Total              1 (100.0%)   1 (100.0%)
## ------- ---------- ------------ ------------

2.3 Utilizando el paquete gmodels

T4<-CrossTable(Marca,Vivienda, prop.chisq=FALSE)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  50 
## 
##  
##              | Vivienda 
##        Marca |  Arriendo |  Familiar |    Propia | Row Total | 
## -------------|-----------|-----------|-----------|-----------|
##    Chevrolet |         5 |         4 |         6 |        15 | 
##              |     0.333 |     0.267 |     0.400 |     0.300 | 
##              |     0.294 |     0.308 |     0.300 |           | 
##              |     0.100 |     0.080 |     0.120 |           | 
## -------------|-----------|-----------|-----------|-----------|
##         Ford |         3 |         1 |         4 |         8 | 
##              |     0.375 |     0.125 |     0.500 |     0.160 | 
##              |     0.176 |     0.077 |     0.200 |           | 
##              |     0.060 |     0.020 |     0.080 |           | 
## -------------|-----------|-----------|-----------|-----------|
##          Kia |         1 |         1 |         2 |         4 | 
##              |     0.250 |     0.250 |     0.500 |     0.080 | 
##              |     0.059 |     0.077 |     0.100 |           | 
##              |     0.020 |     0.020 |     0.040 |           | 
## -------------|-----------|-----------|-----------|-----------|
##        Mazda |         4 |         5 |         2 |        11 | 
##              |     0.364 |     0.455 |     0.182 |     0.220 | 
##              |     0.235 |     0.385 |     0.100 |           | 
##              |     0.080 |     0.100 |     0.040 |           | 
## -------------|-----------|-----------|-----------|-----------|
##      Renault |         4 |         2 |         6 |        12 | 
##              |     0.333 |     0.167 |     0.500 |     0.240 | 
##              |     0.235 |     0.154 |     0.300 |           | 
##              |     0.080 |     0.040 |     0.120 |           | 
## -------------|-----------|-----------|-----------|-----------|
## Column Total |        17 |        13 |        20 |        50 | 
##              |     0.340 |     0.260 |     0.400 |           | 
## -------------|-----------|-----------|-----------|-----------|
## 
## 

2.4 Gráfico de barras compuestas

bar<-barplot(T2,col=c(3,4,5,6,7))
barlabels(bar,T2)
## Warning in rep(xpos, each = length(ypos)/length(xpos)) + rep(c(-offset, :
## longitud de objeto mayor no es múltiplo de la longitud de uno menor
legend("top",legend=c("Chevrolet","Ford","Kia","Mazda","Renault"),
col=c(3,4,5,6,7),lty=1:4,lwd=3)

2.5 Gráfico de barras agrupadas

barpos<-barplot(T2,main="Marca vs Vivienda",col=c("darkblue","red","green","yellow"),
beside=TRUE)
barlabels(barpos,T2)
legend("top",legend=c("Chevrolet","Ford","Kia","Mazda","Renault"),
col=c("darkblue","red","green","yellow"),lty=1:4,lwd=3)

2.6 barras compuestas en proporciones

barplot(t2,col=c(1,2,3,4,5),ylim=c(0,1.5))
barlabels(bar,t2)
## Warning in rep(xpos, each = length(ypos)/length(xpos)) + rep(c(-offset, :
## longitud de objeto mayor no es múltiplo de la longitud de uno menor
legend("top",legend=c("Chevrolet","Ford","Kia","Mazda","Renault"),
col=c(1,2,3,4,5),lty=1:4,lwd=3)

3 Variable cuantitativa discreta

Se utilizan datos correspondientes al número de defectos encontrados en una muestra de 50 piezas elaboradas en un proceso de producción.

defectos<-c(3,1,2,0,1,2,1,0,1,0,2,1,0,1,2,5,1,2,3,2,1,0,1,2,1,1,2,5,4,1,2,3,0,
0,1,2,4,3,1,0,2,1,4,3,1,0,1,1,2,4)

3.1 Tabla de distribución de frecuencias.

T10<-table(defectos)  
num<-c(T10)
por<-round(prop.table(T10)*100,1)
nunacu<-cumsum(num)
poracu<-cumsum(por)
Tabla11<-cbind(num,por,nunacu,poracu)
colnames(Tabla11)<-c("n°piezas","por. piezas","num. ac. piezas","por.ac.piezas")
Tabla11
##   n°piezas por. piezas num. ac. piezas por.ac.piezas
## 0        9          18               9            18
## 1       18          36              27            54
## 2       12          24              39            78
## 3        5          10              44            88
## 4        4           8              48            96
## 5        2           4              50           100
t4<-sum(num)
t5<-sum(por)
Suma<-cbind(t4,t5,0,0)
rownames(Suma)<-c("Total")
Tabla12<-rbind(Tabla11,Suma)
Tabla12
##       n°piezas por. piezas num. ac. piezas por.ac.piezas
## 0            9          18               9            18
## 1           18          36              27            54
## 2           12          24              39            78
## 3            5          10              44            88
## 4            4           8              48            96
## 5            2           4              50           100
## Total       50         100               0             0

3.2 Gráfico de barra

plot(T10,xlab="Número de defectos",ylab="Número de piezas")

3.3 Gráfico escalonado

plot(ecdf(defectos),xlab="Número de defectos",ylab="Proporción de piezas")

## 4. Variable cuantitativa continua

Se utilizan datos referentes al tiempo que se require para realizar una tarea por parte de50 operarios.

tiempo<-c(21.3,25.8,26.7,28.6,29.3,24.2,25.0,19.7,24.7,26.2,27.1,28.3,26.1,
23.6,26.4,20.0,22.6,27.7,25.8,25.9,30.2,31.4,26.7,22.4,23.0,28.3,26.5,29.1,
32.6,24.6,26.9,27.1,27.2,28.2,25.8,24.2,25.5,26.6,27.7,27.3,21.6,22.4,25.4,
23.1,24.7,26.5,22.8,27.7,25.9,27.2)

4.1 Distribución de frecuencias

min(tiempo)
## [1] 19.7
max(tiempo)
## [1] 32.6
c<-round(((max(tiempo)-min(tiempo))/6),1)
a<-seq(min(tiempo),6*c+min(tiempo),c)

T5<-table(cut(tiempo,breaks=c(a),include.lowest=TRUE,right=FALSE))
T5
## 
## [19.7,21.9) [21.9,24.1) [24.1,26.3) [26.3,28.5) [28.5,30.7) [30.7,32.9] 
##           4           7          15          18           4           2
nu<-c(T5)
porc<-round(nu/sum(nu)*100,1)
freac<-cumsum(nu)
poracu<-cumsum(freac)
t11<-sum(nu)
t12<-sum(porc)
Tabla6<-cbind(T5,porc,freac,poracu)
colnames(Tabla6)<-c("n.operar.","Por.operar.","n.ac.operar.","Por.ac.operar.")
Tabla6
##             n.operar. Por.operar. n.ac.operar. Por.ac.operar.
## [19.7,21.9)         4           8            4              4
## [21.9,24.1)         7          14           11             15
## [24.1,26.3)        15          30           26             41
## [26.3,28.5)        18          36           44             85
## [28.5,30.7)         4           8           48            133
## [30.7,32.9]         2           4           50            183
Suma1<-cbind(t11,t12,NA ,NA)
rownames(Suma1)<-c("Total")
Tabla7<-rbind(Tabla6,Suma1)
Tabla7
##             n.operar. Por.operar. n.ac.operar. Por.ac.operar.
## [19.7,21.9)         4           8            4              4
## [21.9,24.1)         7          14           11             15
## [24.1,26.3)        15          30           26             41
## [26.3,28.5)        18          36           44             85
## [28.5,30.7)         4           8           48            133
## [30.7,32.9]         2           4           50            183
## Total              50         100           NA             NA

4.2 Histograma

h1<-hist(tiempo,breaks=c(a),xlab="Tiempo",ylab="n° de operarios",main="distribución",col="green",xaxt="n",
include.lowest=TRUE,right=FALSE,border="red",xlim=c(18,34))
axis(1,c(a))

4.3 Polígono de frecuencias

p<-c(h1$mids)
p1<-c(18.6,p,34.0)
k<-c(nu)
k1<-c(0,k,0)
data.frame(p1,k1)
##     p1 k1
## 1 18.6  0
## 2 20.8  4
## 3 23.0  7
## 4 25.2 15
## 5 27.4 18
## 6 29.6  4
## 7 31.8  2
## 8 34.0  0
plot(p1,k1,type="l",lwd=2,xaxt="n",xlab="tiempo",ylab="número operarios")
axis(1,c(p1))

4.4 Histograma y polígono de frecuencias.

h1<-hist(tiempo,breaks=c(a),xlab="Tiempo",ylab="n° de operarios", main="distribución",col="green",xaxt="n",
include.lowest=TRUE,right=FALSE,border="red",xlim=c(18,34))
axis(1,c(a))
lines(p1,k1,type="l",lwd=2,xlab="tiempo",ylab="número operarios")

4.5 Ojiva

q1<-c(0,freac);q1
##             [19.7,21.9) [21.9,24.1) [24.1,26.3) [26.3,28.5) [28.5,30.7) 
##           0           4          11          26          44          48 
## [30.7,32.9] 
##          50
plot(a,q1,type="b",pch=16,xlab="Tiempo",ylab="número ac. operarios",lwd=2,xaxt="n")
axis(1,c(a))

4.6 Utilizando el paquete agricolae

ta<-table.freq(h1);ta
##   Lower Upper Main Frequency Percentage CF CPF
## 1  19.7  21.9 20.8         4          8  4   8
## 2  21.9  24.1 23.0         7         14 11  22
## 3  24.1  26.3 25.2        15         30 26  52
## 4  26.3  28.5 27.4        18         36 44  88
## 5  28.5  30.7 29.6         4          8 48  96
## 6  30.7  32.9 31.8         2          4 50 100
ogive.freq(h1)

##      x  RCF
## 1 19.7 0.00
## 2 21.9 0.08
## 3 24.1 0.22
## 4 26.3 0.52
## 5 28.5 0.88
## 6 30.7 0.96
## 7 32.9 1.00
## 8 35.1 1.00
h1<-hist(tiempo,breaks=c(a),xlab="Tiempo",ylab="n° de operarios", main="distribución",col="green",xaxt="n",
include.lowest=TRUE,right=FALSE,border="red",xlim=c(18,34))
axis(1,c(a))
polygon.freq(h1,lwd=2)

5. Tallos y hojas

stem(tiempo,scale=2)
## 
##   The decimal point is at the |
## 
##   19 | 7
##   20 | 0
##   21 | 36
##   22 | 4468
##   23 | 016
##   24 | 22677
##   25 | 04588899
##   26 | 124556779
##   27 | 11223777
##   28 | 2336
##   29 | 13
##   30 | 2
##   31 | 4
##   32 | 6
  1. Boxplot
boxplot(tiempo,col="red",horizontal=T,notch=F,border="green")