Manipulating data (LAPOP Dataset - BRAZIL)
databr<-readstata13::read.dta13("~/Dropbox/PIBIC2021/data/Brazil_LAPOP_AmericasBarometer2019.dta",
generate.factors=TRUE,
nonint.factors = TRUE)
table(databr$prov)
databr$NORTE<-ifelse(databr$prov=="Rondônia" |
databr$prov=="Acre" |
databr$prov=="Amazonas" |
databr$prov=="Roraima" |
databr$prov=="Pará" |
databr$prov=="Amapá", 1, 0)
databr$NORDESTE<-ifelse(databr$prov=="Maranhão" | databr$prov=="Ceará" | databr$prov=="Rio Grande do Norte" | databr$prov=="Paraiba" | databr$prov=="Pernambuco" | databr$prov=="Alagoas" | databr$prov=="Sergipe" | databr$prov=="Bahia",1,0)
databr$CENTRO_OESTE<-ifelse(databr$prov=="Mato Grosso do Sul" | databr$prov=="Mato Grosso" | databr$prov=="Goiás" | databr$prov=="Distrito Federal",1,0)
databr$SUDESTE<-ifelse(databr$prov=="Minas Gerais" | databr$prov=="Espírito Santo" | databr$prov=="Rio de Janeiro" | databr$prov=="São Paulo" | databr$prov=="Paraná" | databr$prov=="Santa Catarina" | databr$prov=="Rio Grande do Sul",1,0)
table(databr$NORTE)
table(databr$NORDESTE)
table(databr$CENTRO_OESTE)
table(databr$SUDESTE)
databr$mulher<-ifelse(databr$q1=="Mujer", 1, 0)
databr$homem<-ifelse(databr$q1=="Mujer", 0, 1)
table(databr$mulher)
table(databr$homem)
databr$proguns<-ifelse(databr$arm2=="Sí",1,0)
table(databr$proguns)
table(databr$w14a)
databr$antiabortion<-as.numeric(databr$w14a)-1
table(databr$antiabortion)
x<-rep(1:3, 10); x
car::recode(x, " 1:2='A'; 3='B' ")
Reverse Coding Variables
table(databr$capital1)
databr$pro_penamorte<-as.numeric(databr$capital1)*-1+2
table(databr$pro_penamorte)
table(databr$m1)
databr$evaljair<-as.numeric(databr$m1)*-1+6
table(databr$evaljair)
databr$relig=ifelse(databr$q3cn=="Católico", 1,
ifelse(databr$q3cn=="Evangélica y Pentecostal", 2,
ifelse(databr$q3cn=="Agnóstico o ateo (no cree en Dios)", 3,
0)))
table(databr$relig)
Cleaning NAs
levels(databr$vb3n)
databr$votebr18<-databr$vb3n
databr$votebr18[databr$votebr18=="Ninguno (fue a votar pero dejó la boleta en blanco)"]<-NA
databr$votebr18[databr$votebr18=="Ninguno (anuló su voto)"]<-NA
table(databr$votebr18)
databr$votebr18<-ifelse(databr$votebr18=="Jair Bolsonaro (PSL)",3,
ifelse(databr$votebr18=="Fernando Haddad (PT)",2,1))
table(databr$votebr18)
class(databr$votebr18)
databr$votebr18<-factor(databr$votebr18,
levels = c(1,2,3),
labels = c("Outros Candidatos", "Haddad", "Bolsonaro"))
table(databr$votebr18)
class(databr$votebr18)
table(databr$relig)
databr$relig<-factor(databr$relig, levels=c(1,2,3),
labels=c("Católicos", "Evangélicos", "Ateus"))
class(databr$relig); table(databr$relig)
Subsetting Voters and Other Groups
table(databr$l1)
databr$ideologia<-(databr$l1)
levels(databr$ideologia)
databr$ideologia<-factor(databr$ideologia, levels=c("Izquierda", "2", "3", "4", "5","6", "7","8","9","Derecha"),
labels=c(1:10))
table(databr$ideologia)
class(databr$ideologia)
databr$ideologia<-as.numeric(databr$ideologia)
psych::describe(as.numeric(databr$ideologia))
rightideo<-subset(databr, ideologia<5)
leftideo<-subset(databr, ideologia>5)
databr$rightideo<-ifelse(databr$ideologia<5, 1, 0)
databr$leftideo<-ifelse(databr$ideologia>5, 1, 0)
rightideo2<-subset(databr, rightideo==1)
leftideo2<-subset(databr, leftideo==0)
Creating an Index
databr$child1<-ifelse(databr$ab1=="Respeto a los mayores",1,0)
databr$child2<-ifelse(databr$ab2=="Obediencia",1,0)
databr$child3<-ifelse(databr$ab5=="Disciplina",1,0)
psych::describe(databr$child1)
psych::describe(databr$child2)
psych::describe(databr$child3)
databr$childrearing<-databr$child1+databr$child2+databr$child3
table(databr$childrearing)
table(databr$d5); table(databr$d6)
databr$gay1<-as.numeric(factor(databr$d5, levels=c("Desaprueba firmemente",
"2", "3", "4", "5","6", "7","8","9",
"Aprueba firmemente"), labels=c(1:10)))
databr$gay2<-as.numeric(factor(databr$d6, levels=c("Desaprueba firmemente",
"2", "3", "4", "5","6", "7","8","9",
"Aprueba firmemente"), labels=c(1:10)))
databr$gayrights=(databr$gay1+databr$gay2)/2
table(databr$gayrights)
Dealing with Missing Values
typeof(NA_real_)
typeof(NA_integer_)
typeof(NA_character_)
typeof(NA_complex_)
typeof(NA)
x <- c(1:4, NA, 6:7, NA); x
is.na(x)
table(is.na(x))
any(is.na(x))
all(is.na(x))
databr$x<-NA
any(is.na(databr$x))
all(is.na(databr$x))
df <- data.frame(col1 = c(1:3, NA),
col2 = c("this", NA,"is", "text"),
col3 = c(TRUE, FALSE, TRUE, TRUE),
col4 = c(2.5, 4.2, 3.2, NA),
stringsAsFactors = FALSE)
df_noNA <- na.omit(df)
df_noNA
table(is.na(df_noNA))
df2 <- data.frame(col1 = c(1:3, 99), col2 = c(1, 4.2, 99, 3.2))
df2
df2$col1[df2$col1==99]<-NA
df2
df2[df2 == 99] <- NA
df2
df2$col2[is.na(df2$col2)]<-0
df2
df3 <- data.frame(col1 = c(1:3, 0),
col2 = c("this", NA,"is", "text"),
col3 = c(TRUE, FALSE, TRUE, TRUE),
col4 = c(2.5, 4.2, 3.2, NA),
stringsAsFactors = FALSE)
df3
complete.cases(df3)
df3[complete.cases(df3), ]
df3[!complete.cases(df3), ]
!is.na()
table(databr$ideologia, useNA="always")