Language Explorer Demo Examples

Example #1: Which languages have the greatest number of linguistically isolated households?

STEP 1: Import Pre-Cleaned Data

hhl<-read.csv("https://raw.githubusercontent.com/kitadasmalley/languageExplorer/main/data/yearHHL_Ex.csv", header=TRUE)

## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
## embedded nul(s) found in input

STEP 2: Import Tidyverse

# Install if needed
#install.packages("tidyverse)

library(tidyverse)

STEP 3: Add Labels

# LABELS
modeLabs<-data.frame(RESMODE=c(1, 2, 3),
                     resmodeLab=c("1. Mail", "2. CATI/CAPI", "3. Internet"))

langLabs<-data.frame(HHL=c(1, 2, 3, 4, 5),
                     hhlLab=c("1. English", "2. Spanish", "3. Indo European", "4. Asian / Pacific Islander", "5. Other"),
                     jitter=c(-.2, -.1, 0, .1, .2))

lngiLab<-data.frame(LNGI=c(1, 2),
                    lngiLab=c("1. English Proficient",
                              "2. Limited English"))

hhl<-hhl%>%
  filter(HHL!=1)%>%
  left_join(langLabs)%>%
  left_join(lngiLab)

## Joining, by = "HHL"

## Joining, by = "LNGI"

STEP 4: Create barchart: Distribution of Household Language (HHL) and English Proficiency

ggplot(hhl, aes(x=reorder(hhlLab, desc(hhlLab)), y=weightHouse, fill=lngiLab))+ # create ggplot graphic
  geom_bar(stat = "identity")+ # Bar chart
  facet_grid(YEAR~.)+
  scale_fill_discrete("Proficiency")+
  xlab("")+
  ylab("Number of Households")+
  scale_y_continuous(labels = scales::comma)+
  theme_minimal()+
  coord_flip()

STEP 5: Create barchart for conditional distribution

### Conditional Distributions
hhl%>%
  filter(HHL!=1)%>%
  left_join(langLabs)%>%
  left_join(lngiLab)%>%
  ggplot(aes(x=reorder(hhlLab, desc(hhlLab)), y=weightHouse, fill=lngiLab))+
  geom_bar(stat = "identity", position="fill")+
  facet_grid(YEAR~.)+
  scale_fill_discrete("Proficiency")+
  xlab("")+
  ylab("Number of Households")+
  scale_y_continuous(labels = scales::comma)+
  theme_minimal()+
  coord_flip()

## Joining, by = c("HHL", "hhlLab", "jitter")

## Joining, by = c("LNGI", "lngiLab")

Example #2: How are linguistically isolated households interviewed?

STEP 1: Import Pre-Cleaned and Prepared Data

lang.prop2<-read.csv("https://raw.githubusercontent.com/kitadasmalley/languageExplorer/main/data/condDistrLang.csv",
                     header=TRUE)

lang.prop2$hhlLab<-factor(lang.prop2$hhlLab,
                          levels = c("English", "Spanish", "Indo European", "Asian / Pacific Islander"))

lang.prop22<-lang.prop2%>%
  select(-resmodeLab)%>%
  left_join(modeLabs)

## Joining, by = "RESMODE"

#str(lang.prop22)

STEP 2: Conditional Distribution for Modes

ggplot(data=lang.prop22)+
      geom_bar(aes(x=YEAR, y=sampProp, fill=resmodeLab),
               stat="identity", position=position_dodge())+
      facet_grid(lngiLab~hhlLab)+ #Facet for proficiency and household language
      theme_bw()+
      scale_fill_discrete("Response Mode: ")+
      theme(legend.position = "bottom")+
      ylab("Conditional Probability")+
      scale_x_continuous(breaks=seq(2007, 2017, by=4))

Example #3: How complete are the data records that are collected from linguistically isolated households?

STEP 1: Import Data from Statistical Models

allRHL<-read.csv("https://raw.githubusercontent.com/kitadasmalley/languageExplorer/main/data/meanRHLall.csv",
                 header=TRUE)

STEP 2: Format data and labels

allRHL2<-allRHL%>%
  left_join(modeLabs)%>%
  left_join(langLabs)%>%
  left_join(lngiLab)%>%
  mutate(yearJ=YEAR+jitter)%>%
  filter(HHL!=5)

## Joining, by = "RESMODE"

## Joining, by = "HHL"

## Joining, by = "LNGI"

STEP 3:Timeseries Plot with 95% confidence intervals

 ggplot(allRHL2, aes(x=yearJ, y=rate, color=as.factor(resmodeLab)))+
      geom_line(alpha=.5, aes(lty=lngiLab))+
      geom_point(size=3, aes(pch=lngiLab))+
      #geom_pointrange(aes(ymin=rate-1.96*se, ymax=rate+1.96*se))+
      geom_errorbar(alpha=.3, aes(ymin=rate-1.96*se, ymax=rate+1.96*se), size=.5, width=.5)+
      facet_grid(~hhlLab)+
      theme_bw()+
      scale_x_continuous(breaks= seq(2007, 2017, by=4 ))+
      theme(legend.position = "bottom")+
      scale_color_discrete("Mode: ")+
      scale_shape_discrete("Proficiency: ")+
      scale_linetype("Proficiency: ")+
      xlab("YEAR")+
      ylab("Allocation Rate")

Language Explorer Demo Examples

Heather Kitada Smalley

10/8/2020

Example #1: Which languages have the greatest number of linguistically isolated households?

STEP 1: Import Pre-Cleaned Data

STEP 2: Import Tidyverse

STEP 3: Add Labels

STEP 4: Create barchart: Distribution of Household Language (HHL) and English Proficiency

STEP 5: Create barchart for conditional distribution

Example #2: How are linguistically isolated households interviewed?

STEP 1: Import Pre-Cleaned and Prepared Data

STEP 2: Conditional Distribution for Modes

Example #3: How complete are the data records that are collected from linguistically isolated households?

STEP 1: Import Data from Statistical Models

STEP 2: Format data and labels

STEP 3:Timeseries Plot with 95% confidence intervals