##activar la libreria car

library(car)
## Loading required package: carData

selecsionar la base de datos Prestige

data<- Prestige
head(data)
##                     education income women prestige census type
## gov.administrators      13.11  12351 11.16     68.8   1113 prof
## general.managers        12.26  25879  4.02     69.1   1130 prof
## accountants             12.77   9271 15.70     63.4   1171 prof
## purchasing.officers     11.42   8865  9.11     56.8   1175 prof
## chemists                14.62   8403 11.68     73.5   2111 prof
## physicists              15.64  11030  5.13     77.6   2113 prof

cambio de nombre de la vatiable

library(plyr)
data=rename(data, c("education"="educ","income"="ing",
                    "women"="mujeres","prestige"="prestigio",
                    "code"="cod","type"="tipo"))
## The following `from` values were not present in `x`: code
head(data)
##                      educ   ing mujeres prestigio census tipo
## gov.administrators  13.11 12351   11.16      68.8   1113 prof
## general.managers    12.26 25879    4.02      69.1   1130 prof
## accountants         12.77  9271   15.70      63.4   1171 prof
## purchasing.officers 11.42  8865    9.11      56.8   1175 prof
## chemists            14.62  8403   11.68      73.5   2111 prof
## physicists          15.64 11030    5.13      77.6   2113 prof

visualizar los datos

head(data)
##                      educ   ing mujeres prestigio census tipo
## gov.administrators  13.11 12351   11.16      68.8   1113 prof
## general.managers    12.26 25879    4.02      69.1   1130 prof
## accountants         12.77  9271   15.70      63.4   1171 prof
## purchasing.officers 11.42  8865    9.11      56.8   1175 prof
## chemists            14.62  8403   11.68      73.5   2111 prof
## physicists          15.64 11030    5.13      77.6   2113 prof

craer las etiquetas de las variables

primero llamar a la libreria Hmisclibrary(Hmisc)

library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:plyr':
## 
##     is.discrete, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
label(data$educ)<-"Promedio de años de estudio del trabajador nombrado"
label(data$ing)="Promedio de ingreso en dolares del nombrado "
label(data$mujeres)= "Porcentajes de mujeres nombradas "
label(data$prestigio)= "Valoración de prestigio de Pineo-Poter"
label(data$tipo)= "Tipo de ocupación en 3 niveles"
head(data)
##                      educ   ing mujeres prestigio census tipo
## gov.administrators  13.11 12351   11.16      68.8   1113 prof
## general.managers    12.26 25879    4.02      69.1   1130 prof
## accountants         12.77  9271   15.70      63.4   1171 prof
## purchasing.officers 11.42  8865    9.11      56.8   1175 prof
## chemists            14.62  8403   11.68      73.5   2111 prof
## physicists          15.64 11030    5.13      77.6   2113 prof

cambio recodificacion de la variable tipo

data$Tipo= factor(data$tipo,labels = c("bc"="1","prof"="2","wx"="3"))
head(data)
##                      educ   ing mujeres prestigio census tipo Tipo
## gov.administrators  13.11 12351   11.16      68.8   1113 prof    2
## general.managers    12.26 25879    4.02      69.1   1130 prof    2
## accountants         12.77  9271   15.70      63.4   1171 prof    2
## purchasing.officers 11.42  8865    9.11      56.8   1175 prof    2
## chemists            14.62  8403   11.68      73.5   2111 prof    2
## physicists          15.64 11030    5.13      77.6   2113 prof    2

cambio recodificacion de la variable Tipo

data$Tipo= factor(data$Tipo,labels = c("1"="Cuello azul","2"="Profesional","3"="Cuello blanco"))
head(data)
##                      educ   ing mujeres prestigio census tipo        Tipo
## gov.administrators  13.11 12351   11.16      68.8   1113 prof Profesional
## general.managers    12.26 25879    4.02      69.1   1130 prof Profesional
## accountants         12.77  9271   15.70      63.4   1171 prof Profesional
## purchasing.officers 11.42  8865    9.11      56.8   1175 prof Profesional
## chemists            14.62  8403   11.68      73.5   2111 prof Profesional
## physicists          15.64 11030    5.13      77.6   2113 prof Profesional

grafico circular

porcentaje=round((table(data$Tipo)/sum(table(data$Tipo)))*100,2)
repartcion<- paste(porcentaje,"%")
color<- c("#0000FF","#FF00FF","#F3F781")
pie(table(repartcion),radius = 1,col = color);title("GRAFICO CIRCULAR PARA LA VARIABLE TIPO")
legend("topright",c("Cuello blanco","Profesional","Cuello azul"),fill = color,cex = 0.6)

diagrama de cajas

boxplot(data$mujeres,main= "DIAGRAMA DE CAJAS DE LA VARIABLE MUJERES",outline = T,col = "pink")

histograma de las variables educion e ingreso

hist(data$educ,probability = T,main = "HISTOGRAMA DE LA VARIABLE EDUCACION",xlab = "AÑOS PROMEDIO DE EDUCACION")
curve(dnorm(x,mean = mean(data$educ),
            sd=sd(data$educ)),col="red",add = T)

hist(data$ing,probability = T,main = "HISTOGRAMA DE LA VARIABLE INGRESO",xlab = "INGRESO PROMEDIO")
curve(dnorm(x,mean = mean(data$ing),
            sd=sd(data$ing)),col="green",add = T)

grafico de dispersion y linea de tendencia

plot(data$ing,data$prestigio,main = "DIAGRAMA DE DISPERCION")

plot(data$ing,data$prestigio,main = "DIAGRAMA DE DISPERCION")
abline(lm(prestigio~ing,data = data),col="red")

matriz de dispercion de todas las variables

pairs(Prestige)

GRAFICAS EN GGPLOT2

cargar la libreria ggplot2

library(ggplot2)

histogramas

p1<-ggplot(data,aes(educ))+geom_histogram(color="red",
                binwidth = 1,fill="blue",
                alpha=0.4,aes(y=..density..))+theme_get()
p2<-p1+ xlab("AÑOS DE EDUCAION PORMEDIO")+ggtitle("HISTOGRAMA DE LA VARIABLE EDUCACION")
p2+ stat_function(fun=dnorm,args = list(mean=mean(data$educ),
                                        sd=sd(data$educ)),col="red")

p1<-ggplot(data,aes(ing))+geom_histogram(color="#FACC2E",
                                          binwidth = 2000,fill="#00FFFF",
                                          alpha=0.4,aes(y=..density..))+theme_get()
p2<-p1+ xlab("INGRESOS PROMEDIO")+ggtitle("HISTOGRAMA DE LA VARIABLE INGRESO")
p3<-p2+ stat_function(fun=dnorm,args = list(mean=mean(data$ing),
                                        sd=sd(data$ing)),col="#A4A4A4")

p3
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.

diagrama circular

library(scales) # se carga antes esta libreria para los porcentajes de el diagrama circular  

df<-data.frame(grupo=c("Cuello azul ","Profesional ","Cuello blanco "),cantidades=c(44,31,23))
df
##            grupo cantidades
## 1   Cuello azul          44
## 2   Profesional          31
## 3 Cuello blanco          23
bp<- ggplot(df,aes(x="",y=cantidades,fill=grupo))+geom_bar(width = 1,stat = "identity")
bp

pie<- bp + coord_polar("y",start=0)
aa<-pie +scale_fill_brewer(palette = "Blues")+theme_minimal()+ggtitle("grafico circular de la varianle tipo")
aa+geom_text(aes(label=percent(cantidades/100)),position = position_stack(vjust = 0.5))

diagrama de dispercion

ggplot(data,aes(ing,prestigio))+geom_point()+geom_smooth(method = "lm",formula = "y~x",fill="#0040FF",colour="#FF0000",lty=2,lwd=2.5)+ggtitle("REGRESION LINEAL")
## Don't know how to automatically pick scale for object of type labelled/integer. Defaulting to continuous.