##activar la libreria car
library(car)
## Loading required package: carData
data<- Prestige
head(data)
## education income women prestige census type
## gov.administrators 13.11 12351 11.16 68.8 1113 prof
## general.managers 12.26 25879 4.02 69.1 1130 prof
## accountants 12.77 9271 15.70 63.4 1171 prof
## purchasing.officers 11.42 8865 9.11 56.8 1175 prof
## chemists 14.62 8403 11.68 73.5 2111 prof
## physicists 15.64 11030 5.13 77.6 2113 prof
library(plyr)
data=rename(data, c("education"="educ","income"="ing",
"women"="mujeres","prestige"="prestigio",
"code"="cod","type"="tipo"))
## The following `from` values were not present in `x`: code
head(data)
## educ ing mujeres prestigio census tipo
## gov.administrators 13.11 12351 11.16 68.8 1113 prof
## general.managers 12.26 25879 4.02 69.1 1130 prof
## accountants 12.77 9271 15.70 63.4 1171 prof
## purchasing.officers 11.42 8865 9.11 56.8 1175 prof
## chemists 14.62 8403 11.68 73.5 2111 prof
## physicists 15.64 11030 5.13 77.6 2113 prof
head(data)
## educ ing mujeres prestigio census tipo
## gov.administrators 13.11 12351 11.16 68.8 1113 prof
## general.managers 12.26 25879 4.02 69.1 1130 prof
## accountants 12.77 9271 15.70 63.4 1171 prof
## purchasing.officers 11.42 8865 9.11 56.8 1175 prof
## chemists 14.62 8403 11.68 73.5 2111 prof
## physicists 15.64 11030 5.13 77.6 2113 prof
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:plyr':
##
## is.discrete, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
label(data$educ)<-"Promedio de años de estudio del trabajador nombrado"
label(data$ing)="Promedio de ingreso en dolares del nombrado "
label(data$mujeres)= "Porcentajes de mujeres nombradas "
label(data$prestigio)= "Valoración de prestigio de Pineo-Poter"
label(data$tipo)= "Tipo de ocupación en 3 niveles"
head(data)
## educ ing mujeres prestigio census tipo
## gov.administrators 13.11 12351 11.16 68.8 1113 prof
## general.managers 12.26 25879 4.02 69.1 1130 prof
## accountants 12.77 9271 15.70 63.4 1171 prof
## purchasing.officers 11.42 8865 9.11 56.8 1175 prof
## chemists 14.62 8403 11.68 73.5 2111 prof
## physicists 15.64 11030 5.13 77.6 2113 prof
data$Tipo= factor(data$tipo,labels = c("bc"="1","prof"="2","wx"="3"))
head(data)
## educ ing mujeres prestigio census tipo Tipo
## gov.administrators 13.11 12351 11.16 68.8 1113 prof 2
## general.managers 12.26 25879 4.02 69.1 1130 prof 2
## accountants 12.77 9271 15.70 63.4 1171 prof 2
## purchasing.officers 11.42 8865 9.11 56.8 1175 prof 2
## chemists 14.62 8403 11.68 73.5 2111 prof 2
## physicists 15.64 11030 5.13 77.6 2113 prof 2
data$Tipo= factor(data$Tipo,labels = c("1"="Cuello azul","2"="Profesional","3"="Cuello blanco"))
head(data)
## educ ing mujeres prestigio census tipo Tipo
## gov.administrators 13.11 12351 11.16 68.8 1113 prof Profesional
## general.managers 12.26 25879 4.02 69.1 1130 prof Profesional
## accountants 12.77 9271 15.70 63.4 1171 prof Profesional
## purchasing.officers 11.42 8865 9.11 56.8 1175 prof Profesional
## chemists 14.62 8403 11.68 73.5 2111 prof Profesional
## physicists 15.64 11030 5.13 77.6 2113 prof Profesional
porcentaje=round((table(data$Tipo)/sum(table(data$Tipo)))*100,2)
repartcion<- paste(porcentaje,"%")
color<- c("#0000FF","#FF00FF","#F3F781")
pie(table(repartcion),radius = 1,col = color);title("GRAFICO CIRCULAR PARA LA VARIABLE TIPO")
legend("topright",c("Cuello blanco","Profesional","Cuello azul"),fill = color,cex = 0.6)
boxplot(data$mujeres,main= "DIAGRAMA DE CAJAS DE LA VARIABLE MUJERES",outline = T,col = "pink")
hist(data$educ,probability = T,main = "HISTOGRAMA DE LA VARIABLE EDUCACION",xlab = "AÑOS PROMEDIO DE EDUCACION")
curve(dnorm(x,mean = mean(data$educ),
sd=sd(data$educ)),col="red",add = T)
hist(data$ing,probability = T,main = "HISTOGRAMA DE LA VARIABLE INGRESO",xlab = "INGRESO PROMEDIO")
curve(dnorm(x,mean = mean(data$ing),
sd=sd(data$ing)),col="green",add = T)
plot(data$ing,data$prestigio,main = "DIAGRAMA DE DISPERCION")
plot(data$ing,data$prestigio,main = "DIAGRAMA DE DISPERCION")
abline(lm(prestigio~ing,data = data),col="red")
pairs(Prestige)
library(ggplot2)
p1<-ggplot(data,aes(educ))+geom_histogram(color="red",
binwidth = 1,fill="blue",
alpha=0.4,aes(y=..density..))+theme_get()
p2<-p1+ xlab("AÑOS DE EDUCAION PORMEDIO")+ggtitle("HISTOGRAMA DE LA VARIABLE EDUCACION")
p2+ stat_function(fun=dnorm,args = list(mean=mean(data$educ),
sd=sd(data$educ)),col="red")
p1<-ggplot(data,aes(ing))+geom_histogram(color="#FACC2E",
binwidth = 2000,fill="#00FFFF",
alpha=0.4,aes(y=..density..))+theme_get()
p2<-p1+ xlab("INGRESOS PROMEDIO")+ggtitle("HISTOGRAMA DE LA VARIABLE INGRESO")
p3<-p2+ stat_function(fun=dnorm,args = list(mean=mean(data$ing),
sd=sd(data$ing)),col="#A4A4A4")
p3
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.
library(scales) # se carga antes esta libreria para los porcentajes de el diagrama circular
df<-data.frame(grupo=c("Cuello azul ","Profesional ","Cuello blanco "),cantidades=c(44,31,23))
df
## grupo cantidades
## 1 Cuello azul 44
## 2 Profesional 31
## 3 Cuello blanco 23
bp<- ggplot(df,aes(x="",y=cantidades,fill=grupo))+geom_bar(width = 1,stat = "identity")
bp
pie<- bp + coord_polar("y",start=0)
aa<-pie +scale_fill_brewer(palette = "Blues")+theme_minimal()+ggtitle("grafico circular de la varianle tipo")
aa+geom_text(aes(label=percent(cantidades/100)),position = position_stack(vjust = 0.5))
ggplot(data,aes(ing,prestigio))+geom_point()+geom_smooth(method = "lm",formula = "y~x",fill="#0040FF",colour="#FF0000",lty=2,lwd=2.5)+ggtitle("REGRESION LINEAL")
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.