Example #1: Which languages have the greatest number of linguistically isolated households?
STEP 1: Import Pre-Cleaned Data
hhl<-read.csv("https://raw.githubusercontent.com/kitadasmalley/languageExplorer/main/data/yearHHL_Ex.csv", header=TRUE)
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
## embedded nul(s) found in input
STEP 2: Import Tidyverse
# Install if needed
#install.packages("tidyverse)
library(tidyverse)
STEP 3: Add Labels
# LABELS
modeLabs<-data.frame(RESMODE=c(1, 2, 3),
resmodeLab=c("1. Mail", "2. CATI/CAPI", "3. Internet"))
langLabs<-data.frame(HHL=c(1, 2, 3, 4, 5),
hhlLab=c("1. English", "2. Spanish", "3. Indo European", "4. Asian / Pacific Islander", "5. Other"),
jitter=c(-.2, -.1, 0, .1, .2))
lngiLab<-data.frame(LNGI=c(1, 2),
lngiLab=c("1. English Proficient",
"2. Limited English"))
hhl<-hhl%>%
filter(HHL!=1)%>%
left_join(langLabs)%>%
left_join(lngiLab)
## Joining, by = "HHL"
## Joining, by = "LNGI"
STEP 4: Create barchart: Distribution of Household Language (HHL) and English Proficiency
ggplot(hhl, aes(x=reorder(hhlLab, desc(hhlLab)), y=weightHouse, fill=lngiLab))+ # create ggplot graphic
geom_bar(stat = "identity")+ # Bar chart
facet_grid(YEAR~.)+
scale_fill_discrete("Proficiency")+
xlab("")+
ylab("Number of Households")+
scale_y_continuous(labels = scales::comma)+
theme_minimal()+
coord_flip()

STEP 5: Create barchart for conditional distribution
### Conditional Distributions
hhl%>%
filter(HHL!=1)%>%
left_join(langLabs)%>%
left_join(lngiLab)%>%
ggplot(aes(x=reorder(hhlLab, desc(hhlLab)), y=weightHouse, fill=lngiLab))+
geom_bar(stat = "identity", position="fill")+
facet_grid(YEAR~.)+
scale_fill_discrete("Proficiency")+
xlab("")+
ylab("Number of Households")+
scale_y_continuous(labels = scales::comma)+
theme_minimal()+
coord_flip()
## Joining, by = c("HHL", "hhlLab", "jitter")
## Joining, by = c("LNGI", "lngiLab")

Example #2: How are linguistically isolated households interviewed?
STEP 1: Import Pre-Cleaned and Prepared Data
lang.prop2<-read.csv("https://raw.githubusercontent.com/kitadasmalley/languageExplorer/main/data/condDistrLang.csv",
header=TRUE)
lang.prop2$hhlLab<-factor(lang.prop2$hhlLab,
levels = c("English", "Spanish", "Indo European", "Asian / Pacific Islander"))
lang.prop22<-lang.prop2%>%
select(-resmodeLab)%>%
left_join(modeLabs)
## Joining, by = "RESMODE"
#str(lang.prop22)
STEP 2: Conditional Distribution for Modes
ggplot(data=lang.prop22)+
geom_bar(aes(x=YEAR, y=sampProp, fill=resmodeLab),
stat="identity", position=position_dodge())+
facet_grid(lngiLab~hhlLab)+ #Facet for proficiency and household language
theme_bw()+
scale_fill_discrete("Response Mode: ")+
theme(legend.position = "bottom")+
ylab("Conditional Probability")+
scale_x_continuous(breaks=seq(2007, 2017, by=4))

Example #3: How complete are the data records that are collected from linguistically isolated households?
STEP 1: Import Data from Statistical Models
allRHL<-read.csv("https://raw.githubusercontent.com/kitadasmalley/languageExplorer/main/data/meanRHLall.csv",
header=TRUE)
STEP 3:Timeseries Plot with 95% confidence intervals
ggplot(allRHL2, aes(x=yearJ, y=rate, color=as.factor(resmodeLab)))+
geom_line(alpha=.5, aes(lty=lngiLab))+
geom_point(size=3, aes(pch=lngiLab))+
#geom_pointrange(aes(ymin=rate-1.96*se, ymax=rate+1.96*se))+
geom_errorbar(alpha=.3, aes(ymin=rate-1.96*se, ymax=rate+1.96*se), size=.5, width=.5)+
facet_grid(~hhlLab)+
theme_bw()+
scale_x_continuous(breaks= seq(2007, 2017, by=4 ))+
theme(legend.position = "bottom")+
scale_color_discrete("Mode: ")+
scale_shape_discrete("Proficiency: ")+
scale_linetype("Proficiency: ")+
xlab("YEAR")+
ylab("Allocation Rate")
