Se desarrollo una encuesta con los estudiantes de la asignatura Seminarios de Construcciones para la cual se realizaron preguntas que arrojaron las siguientes variables:

sandra<-read.csv("datos codificados.csv")
colnames(sandra)
 [1] "Nombre"       "Region"       "Ocupacion"    "Estatura"     "Edad"         "Sexo"         "Peso"        
 [8] "Hermanos"     "Hijos"        "Estrato"      "Estado.Civil" "Matricula"   
#install.packages("ggplot2") #1- Instalar el paquete ggplot2
library(ggplot2) #2- Cargar el paquete en memoria

se definieron los tipos de variles como cualitativas o cuantitativas, las variables cualitativas son variables no numericas por tanto no podemos sacar grafica de histograma si no un diagrama de barras.

regiones<-sandra$Region
options(digits=3)
EP <- table(regiones)
EP <- round(prop.table(EP)*100,3)
lbls<- c("valle","cauca","quindio","nariño","bogota")
lbls <- paste(lbls, EP) # add percents to labels 
lbls <- paste(lbls,"%",sep=" ") # ad % to labels
library(ggplot2)
library(plotrix)
 pie3D(EP, labels =lbls, col=c("blue","red","purple","cornflowerblue","yellow"), main = "grafica regiones")
par(cex.axis= 0.7, cex.lab= 0.5, cex.sub= 0.7, las=1)   

bp1<- barplot(EP,legend.text=c("valle","cauca","quindio","nariño","bogota"), main="regiones", col=c("blue","red","purple","cornflowerblue","yellow"), ylim=c(0,80), cex.main=0.8)
text(bp1, EP+2,format(EP), cex=0.7)

ocupaciones<-sandra$Ocupacion
EP <- table(ocupaciones)
EP <- prop.table(EP)*100
EP<- round(EP)
lbls<- c("pregrado","postgrado","maestria")
lbls <- paste(lbls, EP) # add percents to labels 
lbls <- paste(lbls,"%",sep=" ") # ad % to labels
library(ggplot2)
library(plotrix)
 pie3D(EP, labels =lbls, main = "Ocupacion  en el curso de Seminario")

para poder hallar la moda se descarga el paquete modeest , luego se procede a llamarla libreria con library(“modeest”) ahora si con mvl podemos calcular la moda de los datos

sandra<-read.csv("datos codificados.csv")
estatura<-sandra$Estatura
mean(estatura)
[1] 168
median(estatura)
[1] 167.5
var(estatura)
[1] 108.4
sd(estatura)
[1] 10.41
estatura1 <- data.frame(estatura)
library("modeest")
mlv(estatura,method="discrete")
Mode (most frequent value): 160 
Bickel's modal skewness: 0.4167 
Call: mlv.integer(x = estatura, method = "discrete") 
hist(estatura, breaks = 8, col= "cornflowerblue", main="Estatura estudiantes de  seminario")
 curve(dnorm(x, mean(b), sd(b)),
       col = "blue", lwd = 3, add = TRUE)

label<- paste(estatura,"%",sep= " ")
b=read.csv("datos codificados.csv")
 b=b$Estatura
  
 hist(b, freq = F,
      ylab = "Densidad",
      xlab = "Alturas (cm)", main = "Estaturas estudiantes de seminario")
 curve(dnorm(x, mean(b), sd(b)),
       col = "blue", lwd = 3, add = TRUE)

sandra<-read.csv("datos codificados.csv")
edad<-sandra$Edad
hist(edad, breaks = "scott",main="histograma de edad", col= "cornflowerblue")

mean(edad)
[1] 28.6
median(edad)
[1] 25
var(edad)
[1] 78.2
library("modeest")
mlv(edad,method="discrete")
Mode (most frequent value): 23 
Bickel's modal skewness: 0.417 
Call: mlv.integer(x = edad, method = "discrete") 
sexo<-sandra$Sexo
options(digits=4)#nos dice el numero de  digitos que queremos tomar
myvar2 <- ordered(sexo<-sandra$Sexo, levels = c(1,2),labels = c("Hombre","Mujer")) 
myvar2
 [1] Mujer  Hombre Hombre Mujer  Mujer  Hombre Hombre Mujer  Mujer  Mujer  Mujer  Mujer  Hombre Mujer  Mujer 
[16] Mujer  Mujer  Hombre Hombre Mujer  Hombre Hombre Hombre Hombre
Levels: Hombre < Mujer
EP <- table(myvar2)
EP <- prop.table(EP)*100
EP
myvar2
Hombre  Mujer 
 45.83  54.17 
#Barplots
par(cex.axis= 0.7, cex.lab= 0.5, cex.sub= 0.7, las=1)   
bp1<-barplot(EP, main="SEXO", col=c("blue","pink"),  ylim=c(0,60), cex.main=0.8 )
text(bp1, EP+2,format(EP), cex=0.7)

sandra<-read.csv("datos codificados.csv")
peso<-sandra$Peso
mean(peso)
[1] 67.25
median(peso)
[1] 67.5
var(peso)
[1] 164.2
library("modeest")
hist(peso, breaks = "scott",main="histograma de peso", col= "cornflowerblue")

b=read.csv("datos codificados.csv")
 b=b$Peso
  
 hist(b, freq = F,
      ylab = "Densidad",
      xlab = "Peso(Kg)", main = "Estaturas estudiantes de seminario")
 curve(dnorm(x, mean(b), sd(b)),
       col = "blue", lwd = 5, add = TRUE)

sandra<-read.csv("datos codificados.csv")
hermanos<-sandra$Hermanos
mean(hermanos)
[1] 2.417
median(hermanos)
[1] 2
var(hermanos)
[1] 3.471
library("modeest")
mlv(hermanos,method="discrete")
Mode (most frequent value): 1 
Bickel's modal skewness: 0.4583 
Call: mlv.integer(x = hermanos, method = "discrete") 
hist(hermanos, breaks = 6,main="histograma de numero de hermanos", col= "cornflowerblue")
myvar2 <- ordered(hermanos<-sandra$Hermanos, levels = c(0,1,2,3,4,5,6)) 
myvar2
 [1] 1 1 4 1 6 4 0 0 1 4 5 2 2 0 3 5 2 1 6 2 3 1 3 1
Levels: 0 < 1 < 2 < 3 < 4 < 5 < 6
EP <- table(myvar2)
EP <- round(prop.table(EP)*100,3)
#Barplots
par(cex.axis= 0.7, cex.lab= 0.5, cex.sub= 0.7, las=1)   

bp1<- barplot(EP, main="numero de  hermanos", ylim=c(0,35), cex.main=0.8 )
text(bp1, EP+2,format(EP), cex=0.7)

sandra<-read.csv("datos codificados.csv")
hijos<-sandra$Hijos
var(hijos)
[1] 0.6793
mean(hijos)
[1] 0.375
median(hijos)
[1] 0
library("modeest")
mlv(hijos,method="discrete")
Mode (most frequent value): 0 
Bickel's modal skewness: 0.2083 
Call: mlv.integer(x = hijos, method = "discrete") 
myvar2 <- ordered(hijos<-sandra$Hijos, levels = c(0,1,2,3)) 
myvar2
 [1] 0 0 1 0 0 0 0 0 2 0 0 0 0 0 0 3 1 0 0 0 2 0 0 0
Levels: 0 < 1 < 2 < 3
EP <- table(myvar2)
EP <- round(prop.table(EP)*100, 2)
EP
myvar2
    0     1     2     3 
79.17  8.33  8.33  4.17 
lbls<- c( "sin hijos","con 1","con 2","con 3")
lbls <- paste(lbls, EP) # add percents to labels 
lbls <- paste(lbls,"%",sep=" ") # ad % to labels
library(ggplot2)
library(plotrix)
 pie3D(EP, labels =lbls, main = "Ocupacion  en el curso de Seminario")

estrato<-read.csv("datos codificados.csv",stringsAsFactors = F, header=T)
barplot(prop.table(table(sandra$Estrato)),col=c("blue","red","purple","cornflowerblue","yellow"),legend.text=c("1","2","3","4","5"))

myvar2 <- ordered(estrato<-sandra$Estrato, levels = c(1,2,3,4,5)) 
myvar2
 [1] 3 4 3 3 4 3 5 3 3 3 3 3 3 3 5 3 3 3 5 3 1 2 4 3
Levels: 1 < 2 < 3 < 4 < 5
EP <- table(myvar2)
EP <- round(prop.table(EP)*100,1)
EP
myvar2
   1    2    3    4    5 
 4.2  4.2 66.7 12.5 12.5 
lbls<- c( "estrato 1 ","estrato 2 "," estrato 3 ","estrato 4 "," estrato 5 ")
lbls <- paste(lbls, EP) # add percents to labels 
lbls <- paste(lbls,"%",sep=" ") # ad % to labels
library(plotrix)
 pie3D(EP, labels =lbls, main = "Ocupacion  en el curso de Seminario")

estado.civil<-read.csv("datos codificados.csv",stringsAsFactors = F, header=T)
barplot(prop.table(table(sandra$Estado.Civil)),col=c("red","purple"),legend.text=c("soltero","casado"))

sandra<-read.csv("datos codificados.csv")
estadocivil<-sandra$Estado.Civil
var(estadocivil)
[1] 0.1449
mean(estadocivil)
[1] 1.167
median(estadocivil)
[1] 1
library("modeest")
mlv(estadocivil,method="discrete")
Mode (most frequent value): 1 
Bickel's modal skewness: 0.1667 
Call: mlv.integer(x = estadocivil, method = "discrete") 
EP <- table(estadocivil)
EP <- prop.table(EP)*100
EP<- round(EP)
lbls<- c("soltero","casado")
lbls <- paste(lbls, EP) # add percents to labels 
lbls <- paste(lbls,"%",sep=" ") # ad % to labels
library(ggplot2)
library(plotrix)
 pie3D(EP, labels =lbls, main = "Estado Civil en el curso de Seminario")

matricula<-read.csv("datos codificados.csv",stringsAsFactors = F, header=T)
barplot(prop.table(table(sandra$Matricula)),col=c("yellow","green"),legend.text=c("si","no"))

sandra1<-read.csv("datos codificados.csv", stringsAsFactors = F, header= TRUE)
m<-read.csv("datos codificados.csv")
MA<-as.data.frame(prop.table(table(m$Matricula))*100)
colnames(MA)<-c("SE", "PORCENTAJE")
library(ggplot2)
ggplot(MA, aes(x = SE, y = PORCENTAJE, fill=SE)) + geom_bar(stat = "identity", width = 0.8)+  theme(legend.position = "right", legend.background = element_rect(fill = "white", size = 0.5, linetype = "solid", colour = "darkgrey"))+ theme(plot.title = element_text(hjust = 0.5)) + geom_text(aes(y = PORCENTAJE, label = paste(round(PORCENTAJE, 2), "%")), position = position_dodge(width = 0.5), size=4, vjust=-0.5, hjust=0.5 ,col="black") + scale_y_continuous(breaks = seq(0, 60, by = 5), limits = c(0,60))+labs(title = " Histograma  asistentes al curso de  seminario", x = "Matriculado", y = "Porcentaje de personas encuestadas", fill = "Matriculado")+scale_fill_brewer(palette = "Set2", labels=c("Si","No"))

el comando summary da un resumen de datos maximo y minimo, cuartiles, media, mediana

summary(sandra)
               Nombre       Region       Ocupacion       Estatura        Edad           Sexo     
 Alejandro Arcila : 1   Min.   :1.00   Min.   :1.00   Min.   :152   Min.   :21.0   Min.   :1.00  
 Andrés Casanova  : 1   1st Qu.:1.00   1st Qu.:1.00   1st Qu.:160   1st Qu.:23.0   1st Qu.:1.00  
 Angela           : 1   Median :1.00   Median :1.00   Median :168   Median :25.0   Median :2.00  
 Angie Aldemar    : 1   Mean   :1.58   Mean   :1.42   Mean   :168   Mean   :28.6   Mean   :1.54  
 Angie Cabezas    : 1   3rd Qu.:1.25   3rd Qu.:2.00   3rd Qu.:176   3rd Qu.:30.2   3rd Qu.:2.00  
 Carlos Zapata    : 1   Max.   :5.00   Max.   :3.00   Max.   :188   Max.   :49.0   Max.   :2.00  
 (Other)          :18                                                                            
      Peso         Hermanos        Hijos          Estrato      Estado.Civil    Matricula   
 Min.   :50.0   Min.   :0.00   Min.   :0.000   Min.   :1.00   Min.   :1.00   Min.   :1.00  
 1st Qu.:57.0   1st Qu.:1.00   1st Qu.:0.000   1st Qu.:3.00   1st Qu.:1.00   1st Qu.:1.00  
 Median :67.5   Median :2.00   Median :0.000   Median :3.00   Median :1.00   Median :1.00  
 Mean   :67.2   Mean   :2.42   Mean   :0.375   Mean   :3.25   Mean   :1.17   Mean   :1.42  
 3rd Qu.:75.2   3rd Qu.:4.00   3rd Qu.:0.000   3rd Qu.:3.25   3rd Qu.:1.00   3rd Qu.:2.00  
 Max.   :98.0   Max.   :6.00   Max.   :3.000   Max.   :5.00   Max.   :2.00   Max.   :2.00  
                                                                                           
