Se desarrollo una encuesta con los estudiantes de la asignatura Seminarios de Construcciones para la cual se realizaron preguntas que arrojaron las siguientes variables:
sandra<-read.csv("datos codificados.csv")
colnames(sandra)
[1] "Nombre" "Region" "Ocupacion" "Estatura" "Edad" "Sexo" "Peso"
[8] "Hermanos" "Hijos" "Estrato" "Estado.Civil" "Matricula"
#install.packages("ggplot2") #1- Instalar el paquete ggplot2
library(ggplot2) #2- Cargar el paquete en memoria
se definieron los tipos de variles como cualitativas o cuantitativas, las variables cualitativas son variables no numericas por tanto no podemos sacar grafica de histograma si no un diagrama de barras.
regiones<-sandra$Region
options(digits=3)
EP <- table(regiones)
EP <- round(prop.table(EP)*100,3)
lbls<- c("valle","cauca","quindio","nariño","bogota")
lbls <- paste(lbls, EP) # add percents to labels
lbls <- paste(lbls,"%",sep=" ") # ad % to labels
library(ggplot2)
library(plotrix)
pie3D(EP, labels =lbls, col=c("blue","red","purple","cornflowerblue","yellow"), main = "grafica regiones")
par(cex.axis= 0.7, cex.lab= 0.5, cex.sub= 0.7, las=1)
bp1<- barplot(EP,legend.text=c("valle","cauca","quindio","nariño","bogota"), main="regiones", col=c("blue","red","purple","cornflowerblue","yellow"), ylim=c(0,80), cex.main=0.8)
text(bp1, EP+2,format(EP), cex=0.7)
ocupaciones<-sandra$Ocupacion
EP <- table(ocupaciones)
EP <- prop.table(EP)*100
EP<- round(EP)
lbls<- c("pregrado","postgrado","maestria")
lbls <- paste(lbls, EP) # add percents to labels
lbls <- paste(lbls,"%",sep=" ") # ad % to labels
library(ggplot2)
library(plotrix)
pie3D(EP, labels =lbls, main = "Ocupacion en el curso de Seminario")
para poder hallar la moda se descarga el paquete modeest , luego se procede a llamarla libreria con library(“modeest”) ahora si con mvl podemos calcular la moda de los datos
sandra<-read.csv("datos codificados.csv")
estatura<-sandra$Estatura
mean(estatura)
[1] 168
median(estatura)
[1] 167.5
var(estatura)
[1] 108.4
sd(estatura)
[1] 10.41
estatura1 <- data.frame(estatura)
library("modeest")
mlv(estatura,method="discrete")
Mode (most frequent value): 160
Bickel's modal skewness: 0.4167
Call: mlv.integer(x = estatura, method = "discrete")
hist(estatura, breaks = 8, col= "cornflowerblue", main="Estatura estudiantes de seminario")
curve(dnorm(x, mean(b), sd(b)),
col = "blue", lwd = 3, add = TRUE)
label<- paste(estatura,"%",sep= " ")
b=read.csv("datos codificados.csv")
b=b$Estatura
hist(b, freq = F,
ylab = "Densidad",
xlab = "Alturas (cm)", main = "Estaturas estudiantes de seminario")
curve(dnorm(x, mean(b), sd(b)),
col = "blue", lwd = 3, add = TRUE)
sandra<-read.csv("datos codificados.csv")
edad<-sandra$Edad
hist(edad, breaks = "scott",main="histograma de edad", col= "cornflowerblue")
mean(edad)
[1] 28.6
median(edad)
[1] 25
var(edad)
[1] 78.2
library("modeest")
mlv(edad,method="discrete")
Mode (most frequent value): 23
Bickel's modal skewness: 0.417
Call: mlv.integer(x = edad, method = "discrete")
sexo<-sandra$Sexo
options(digits=4)#nos dice el numero de digitos que queremos tomar
myvar2 <- ordered(sexo<-sandra$Sexo, levels = c(1,2),labels = c("Hombre","Mujer"))
myvar2
[1] Mujer Hombre Hombre Mujer Mujer Hombre Hombre Mujer Mujer Mujer Mujer Mujer Hombre Mujer Mujer
[16] Mujer Mujer Hombre Hombre Mujer Hombre Hombre Hombre Hombre
Levels: Hombre < Mujer
EP <- table(myvar2)
EP <- prop.table(EP)*100
EP
myvar2
Hombre Mujer
45.83 54.17
#Barplots
par(cex.axis= 0.7, cex.lab= 0.5, cex.sub= 0.7, las=1)
bp1<-barplot(EP, main="SEXO", col=c("blue","pink"), ylim=c(0,60), cex.main=0.8 )
text(bp1, EP+2,format(EP), cex=0.7)
sandra<-read.csv("datos codificados.csv")
peso<-sandra$Peso
mean(peso)
[1] 67.25
median(peso)
[1] 67.5
var(peso)
[1] 164.2
library("modeest")
hist(peso, breaks = "scott",main="histograma de peso", col= "cornflowerblue")
b=read.csv("datos codificados.csv")
b=b$Peso
hist(b, freq = F,
ylab = "Densidad",
xlab = "Peso(Kg)", main = "Estaturas estudiantes de seminario")
curve(dnorm(x, mean(b), sd(b)),
col = "blue", lwd = 5, add = TRUE)
sandra<-read.csv("datos codificados.csv")
hermanos<-sandra$Hermanos
mean(hermanos)
[1] 2.417
median(hermanos)
[1] 2
var(hermanos)
[1] 3.471
library("modeest")
mlv(hermanos,method="discrete")
Mode (most frequent value): 1
Bickel's modal skewness: 0.4583
Call: mlv.integer(x = hermanos, method = "discrete")
hist(hermanos, breaks = 6,main="histograma de numero de hermanos", col= "cornflowerblue")
myvar2 <- ordered(hermanos<-sandra$Hermanos, levels = c(0,1,2,3,4,5,6))
myvar2
[1] 1 1 4 1 6 4 0 0 1 4 5 2 2 0 3 5 2 1 6 2 3 1 3 1
Levels: 0 < 1 < 2 < 3 < 4 < 5 < 6
EP <- table(myvar2)
EP <- round(prop.table(EP)*100,3)
#Barplots
par(cex.axis= 0.7, cex.lab= 0.5, cex.sub= 0.7, las=1)
bp1<- barplot(EP, main="numero de hermanos", ylim=c(0,35), cex.main=0.8 )
text(bp1, EP+2,format(EP), cex=0.7)
sandra<-read.csv("datos codificados.csv")
hijos<-sandra$Hijos
var(hijos)
[1] 0.6793
mean(hijos)
[1] 0.375
median(hijos)
[1] 0
library("modeest")
mlv(hijos,method="discrete")
Mode (most frequent value): 0
Bickel's modal skewness: 0.2083
Call: mlv.integer(x = hijos, method = "discrete")
myvar2 <- ordered(hijos<-sandra$Hijos, levels = c(0,1,2,3))
myvar2
[1] 0 0 1 0 0 0 0 0 2 0 0 0 0 0 0 3 1 0 0 0 2 0 0 0
Levels: 0 < 1 < 2 < 3
EP <- table(myvar2)
EP <- round(prop.table(EP)*100, 2)
EP
myvar2
0 1 2 3
79.17 8.33 8.33 4.17
lbls<- c( "sin hijos","con 1","con 2","con 3")
lbls <- paste(lbls, EP) # add percents to labels
lbls <- paste(lbls,"%",sep=" ") # ad % to labels
library(ggplot2)
library(plotrix)
pie3D(EP, labels =lbls, main = "Ocupacion en el curso de Seminario")
estrato<-read.csv("datos codificados.csv",stringsAsFactors = F, header=T)
barplot(prop.table(table(sandra$Estrato)),col=c("blue","red","purple","cornflowerblue","yellow"),legend.text=c("1","2","3","4","5"))
myvar2 <- ordered(estrato<-sandra$Estrato, levels = c(1,2,3,4,5))
myvar2
[1] 3 4 3 3 4 3 5 3 3 3 3 3 3 3 5 3 3 3 5 3 1 2 4 3
Levels: 1 < 2 < 3 < 4 < 5
EP <- table(myvar2)
EP <- round(prop.table(EP)*100,1)
EP
myvar2
1 2 3 4 5
4.2 4.2 66.7 12.5 12.5
lbls<- c( "estrato 1 ","estrato 2 "," estrato 3 ","estrato 4 "," estrato 5 ")
lbls <- paste(lbls, EP) # add percents to labels
lbls <- paste(lbls,"%",sep=" ") # ad % to labels
library(plotrix)
pie3D(EP, labels =lbls, main = "Ocupacion en el curso de Seminario")
estado.civil<-read.csv("datos codificados.csv",stringsAsFactors = F, header=T)
barplot(prop.table(table(sandra$Estado.Civil)),col=c("red","purple"),legend.text=c("soltero","casado"))
sandra<-read.csv("datos codificados.csv")
estadocivil<-sandra$Estado.Civil
var(estadocivil)
[1] 0.1449
mean(estadocivil)
[1] 1.167
median(estadocivil)
[1] 1
library("modeest")
mlv(estadocivil,method="discrete")
Mode (most frequent value): 1
Bickel's modal skewness: 0.1667
Call: mlv.integer(x = estadocivil, method = "discrete")
EP <- table(estadocivil)
EP <- prop.table(EP)*100
EP<- round(EP)
lbls<- c("soltero","casado")
lbls <- paste(lbls, EP) # add percents to labels
lbls <- paste(lbls,"%",sep=" ") # ad % to labels
library(ggplot2)
library(plotrix)
pie3D(EP, labels =lbls, main = "Estado Civil en el curso de Seminario")
matricula<-read.csv("datos codificados.csv",stringsAsFactors = F, header=T)
barplot(prop.table(table(sandra$Matricula)),col=c("yellow","green"),legend.text=c("si","no"))
sandra1<-read.csv("datos codificados.csv", stringsAsFactors = F, header= TRUE)
m<-read.csv("datos codificados.csv")
MA<-as.data.frame(prop.table(table(m$Matricula))*100)
colnames(MA)<-c("SE", "PORCENTAJE")
library(ggplot2)
ggplot(MA, aes(x = SE, y = PORCENTAJE, fill=SE)) + geom_bar(stat = "identity", width = 0.8)+ theme(legend.position = "right", legend.background = element_rect(fill = "white", size = 0.5, linetype = "solid", colour = "darkgrey"))+ theme(plot.title = element_text(hjust = 0.5)) + geom_text(aes(y = PORCENTAJE, label = paste(round(PORCENTAJE, 2), "%")), position = position_dodge(width = 0.5), size=4, vjust=-0.5, hjust=0.5 ,col="black") + scale_y_continuous(breaks = seq(0, 60, by = 5), limits = c(0,60))+labs(title = " Histograma asistentes al curso de seminario", x = "Matriculado", y = "Porcentaje de personas encuestadas", fill = "Matriculado")+scale_fill_brewer(palette = "Set2", labels=c("Si","No"))
el comando summary da un resumen de datos maximo y minimo, cuartiles, media, mediana
summary(sandra)
Nombre Region Ocupacion Estatura Edad Sexo
Alejandro Arcila : 1 Min. :1.00 Min. :1.00 Min. :152 Min. :21.0 Min. :1.00
Andrés Casanova : 1 1st Qu.:1.00 1st Qu.:1.00 1st Qu.:160 1st Qu.:23.0 1st Qu.:1.00
Angela : 1 Median :1.00 Median :1.00 Median :168 Median :25.0 Median :2.00
Angie Aldemar : 1 Mean :1.58 Mean :1.42 Mean :168 Mean :28.6 Mean :1.54
Angie Cabezas : 1 3rd Qu.:1.25 3rd Qu.:2.00 3rd Qu.:176 3rd Qu.:30.2 3rd Qu.:2.00
Carlos Zapata : 1 Max. :5.00 Max. :3.00 Max. :188 Max. :49.0 Max. :2.00
(Other) :18
Peso Hermanos Hijos Estrato Estado.Civil Matricula
Min. :50.0 Min. :0.00 Min. :0.000 Min. :1.00 Min. :1.00 Min. :1.00
1st Qu.:57.0 1st Qu.:1.00 1st Qu.:0.000 1st Qu.:3.00 1st Qu.:1.00 1st Qu.:1.00
Median :67.5 Median :2.00 Median :0.000 Median :3.00 Median :1.00 Median :1.00
Mean :67.2 Mean :2.42 Mean :0.375 Mean :3.25 Mean :1.17 Mean :1.42
3rd Qu.:75.2 3rd Qu.:4.00 3rd Qu.:0.000 3rd Qu.:3.25 3rd Qu.:1.00 3rd Qu.:2.00
Max. :98.0 Max. :6.00 Max. :3.000 Max. :5.00 Max. :2.00 Max. :2.00