VISA Forecast

click the following URL Link to Shiny for action https://orcathekillerwhale.shinyapps.io/VISApredictShiny/

Data Source

The data are compiled from VISA bulletins from Jan 2014 to Sep 2018 which is published monthly. This site is managed by US department of state. For example, the Sept 2018 bulletin is given in the following url. https://travel.state.gov/content/travel/en/legal/visa-law0/visa-bulletin/2018/visa-bulletin-for-september-2018.html. The data are read into the csv file using HTML Data used are under the following categories

Analytic model

A Linear least square model is used to fit the data. Its R squared value is displayed to denote the percentage that the linear model in achieving to remove the variance of an untrended prediction (the closer the R squared is to One,e ie 100%, the better).

The VISA Bulletin Table in section A is read into readVISAtableOne.csv automatically from the above url in the following format

Instruction for use

In the left panel, user will need to enter the priority date of the applicant, tick on the checkbox of the respective category in which the application belongs. Once commited, user will need to press the “submit” button to look at the updated curve on the right.

There are two messages for the data entry:
On the checkboxes, if there were no box or more than one boxes being checked. Error message will appear. The curve will still display the result of the last choice of the VISA category from the checkboxes. On the date entry, if the dates were improper, error message will appear. The curve will be plotted base on a nominal priority date of 2013-01-01.

Codes to read data into csv file. This program will not be run here to expedite the response. The file readVISAtableOne.csv will be saved for processing.

library(XML)
library(RCurl)
library(lubridate)
calendar<-c("january","february","march","april","may","june","july","august","september","october","november","december")
#flag<-0
tt<-NULL
n=12
for (y in 2014:2018){
  if (y==2018) {n=9}
  for (m in 1:n){
    ya=y
    if (m>9){ya=y+1}  
theurl<-getURL(paste0("https://travel.state.gov/content/travel/en/legal/visa-law0/visa-bulletin/",ya,"/visa-bulletin-for-",calendar[m],"-",y,".html"))

d<-as.Date(paste0("11-",m,"-",y),format="%d-%m-%Y")
tables <- readHTMLTable(theurl)
t<-tables[[1]]
t<-t[2:6,1:2]
dt<-cbind(d,t)
tt<-rbind(dt,tt)
  }
}
names(tt)<-c("Bulletin Dates","VISA.Category","Priority.Dates")
tt$Priority.Dates<-dmy(tt$Priority.Dates)
print(tt)
write.table(tt,"readVISAtableOne.csv")

Codes to Interactively compute the prediction from the data in csv file. This program will be run here.

library(shiny)
# Define UI
ui<-fluidPage(

  # Application title
  titlePanel("VISA Availability Date Prediction"),

  # Sidebar with a slider input for number of bins
  sidebarLayout(
    sidebarPanel(
      textInput("pDate","Enter Priority Date in YYYY-MM-DD","2013-01-01"),
      checkboxInput("F1","F1",value=FALSE),
      checkboxInput("F2A","F2A",value=FALSE),
      checkboxInput("F2B","F2B",value=FALSE),
      checkboxInput("F3","F3",value=FALSE),
      checkboxInput("F4","F4",value=FALSE),
      textOutput("message"),
      textOutput("m"),
      submitButton("submit")
    ),

    # Show a plot of the generated distribution
    mainPanel(
      
      plotOutput("predPlot")
    )
  )
)

# Define server
server<-function(input, output) {
  x<-reactive({ifelse((input$F1+input$F2A+input$F2B+input$F3+input$F4>1)|(input$F1+input$F2A+input$F2B+input$F3+input$F4==0),"Error: Select one and only one","VISA cat OK")})
  output$message<-renderText(x())
  output$predPlot<-renderPlot({
    library(dplyr)
    library(stats)
    tread<-read.table("readVISAtableOne.csv")
    cat<-"F1"
    if (input$F1==TRUE){cat="F1"}
    if (input$F2A==TRUE){cat="F2A"}
    if (input$F2B==TRUE){cat="F2B"}
    if (input$F3==TRUE){cat="F3"}
    if (input$F4==TRUE){cat="F4"}
    t<-filter(tread,VISA.Category==cat)
    print(t)
    t$Priority.Dates<-as.Date(as.character(t$Priority.Dates),format="%Y-%m-%d")
    t$Bulletin.Dates<-as.Date(as.character(t$Bulletin.Dates),format="%Y-%m-%d")

    t$Priority.Dates<-as.numeric(t$Priority.Dates,origin="1970-1-1")
    t$Bulletin.Dates<-as.numeric(t$Bulletin.Dates,origin="1970-1-1")
    modFit<-lm(Bulletin.Dates~Priority.Dates,data=t)
    pDate<-as.Date.character(input$pDate,format="%Y-%m-%d")
    if (is.na(pDate)){pDate<-as.Date.character("2013-01-01",format="%Y-%m-%d")
    output$m<-renderText("Priority Date error, reverted to 2013-01-01")}
    else {output$m<-renderText("Date ok")}
    pD<-as.numeric(pDate,origin="1970-1-1")
    d<-as.data.frame(pD)
    names(d)<-"Priority.Dates"
    p<-predict(modFit,d,interval="prediction")
    pp<-as.Date(p,origin="1970-1-1")

    names(pp)<-c("predicted","earlist","latest")
    print(c("priority date (yyyy-mm-dd)","2013-1-2"))
    print("F1")
    print(pp)
    print(c("Rsquared",summary(modFit)$r.squared))
    rsq<-round(summary(modFit)$r.squared,2)
    p3<-predict(modFit,t,interval="prediction")
    p4<-as.Date(p3,origin="1970-1-1")

    t$Priority.Dates<-as.Date(t$Priority.Dates,origin="1970-1-1")
    t$Bulletin.Dates<-as.Date(t$Bulletin.Dates,origin="1970-1-1")
    x<-c(t$Priority.Dates,pDate)
y<-c(t$Bulletin.Dates,pp[1])
z<-as.numeric(x,origin="1970-1-1")
z<-as.data.frame(z)
names(z)<-"Priority.Dates"
  p3<-predict(modFit,z,interval="prediction")
     p4<-as.Date(p3,origin="1970-1-1")
 plot(x=x,y=y,main=paste("Current",cat,"VISA Bulletin Dated",as.Date(t[1,1],origin="1970-1-1"),
                        "\n Predicted=",pp[1],"for Priority Date=",pDate,paste("Rsquared=",rsq),
                        "\n 95% confidence =(",pp[2],"~",pp[3],")",
                        "\n"))

matlines(as.Date(x,origin="1970-1-1"),p4,type="l")
  })
}


shinyApp(ui = ui, server = server)