#Web Scraping 
#Download Weather historical records for multiple cities for customized dates from html page
library(XML);library(RCurl)
## Warning: package 'RCurl' was built under R version 3.1.2
## Loading required package: bitops
x_file ='http://www.wunderground.com/history/airport/KSFO/2015/12/13/CustomHistory.html/cgi-bin/histGraphAll?day=13&year=2015&month=12&dayend=16&yearend=2015&monthend=12&ID=KATL&type=6&width=614&format=1'
x_url = getURLContent(x_file, ssl.verifypeer=F)
x_data=read.csv(textConnection(x_url), header=T)
head(x_data,1)
##          PST Max.TemperatureF Mean.TemperatureF Min.TemperatureF
## 1 2015-12-13               57                53               48
##   Max.Dew.PointF MeanDew.PointF Min.DewpointF Max.Humidity Mean.Humidity
## 1             55             46            36           96            79
##   Min.Humidity Max.Sea.Level.PressureIn Mean.Sea.Level.PressureIn
## 1           61                    30.16                        30
##   Min.Sea.Level.PressureIn Max.VisibilityMiles Mean.VisibilityMiles
## 1                    29.86                  10                    8
##   Min.VisibilityMiles Max.Wind.SpeedMPH Mean.Wind.SpeedMPH
## 1                   1                37                 19
##   Max.Gust.SpeedMPH PrecipitationIn CloudCover Events WindDirDegrees.br...
## 1                47            0.76          6   Rain            244<br />
#######################################################
AirpotList <- read.csv("~/Documents/S&K/Kunal/Major_Airports.csv", header=T)
AirpotList$airportCode <- paste0("K",AirpotList[,4])
AirpotList <- AirpotList$airportCode; AirpotList <- AirpotList[1:5]
#AirpotList <- c("KATL", "KSFO", "KMIA")
#######################################################
V <- c("Date", "Max.TemperatureF", "Mean.TemperatureF", "Min.TemperatureF", "Max.Humidity", "Events")
x_data=NULL
Kunal_fn <- function(i) {
    a2 = AirpotList[i]
    a3 = "/2015/1/1/"
    a6 = "dayend=25&yearend=2015&monthend=12&"
    a8 = AirpotList[i]
        a1 = 'http://www.wunderground.com/history/airport/'
        a4 = 'CustomHistory.html/cgi-bin/histGraphAll?'
        a5= 'day=13&year=2015&month=12&'
        a7 = 'ID='
        a9 = '&type=6&width=614&format=1'
    
    URL <- paste0(a1, a2, a3,a4,a5,a6,a7,a8,a9)
    x_file=URL
    x_url = getURLContent(x_file, ssl.verifypeer=F)
    x_data=read.csv(textConnection(x_url), header=T)
    x_data <- x_data[,c(1:4,8,22)]
    colnames(x_data) <- V
    x_data <- cbind(x_data, Airport=AirpotList[i])
    x_data1 <- x_data
return(x_data1)
}
#######################################################
length(AirpotList)
## [1] 5
TempData <- lapply(1:length(AirpotList), function(x) Kunal_fn(x))
head(TempData[[1]]) # Atlanta
##       Date Max.TemperatureF Mean.TemperatureF Min.TemperatureF
## 1 2015-1-1               58                45               32
## 2 2015-1-2               49                46               43
## 3 2015-1-3               62                55               48
## 4 2015-1-4               66                55               44
## 5 2015-1-5               48                41               34
## 6 2015-1-6               54                42               30
##   Max.Humidity                Events Airport
## 1           78                  Rain    KATL
## 2           93                  Rain    KATL
## 3          100              Fog-Rain    KATL
## 4           97 Fog-Rain-Thunderstorm    KATL
## 5           76                          KATL
## 6           72                          KATL
tail(TempData[[5]])
##           Date Max.TemperatureF Mean.TemperatureF Min.TemperatureF
## 354 2015-12-20               54                39               23
## 355 2015-12-21               56                48               39
## 356 2015-12-22               63                57               50
## 357 2015-12-23               67                63               59
## 358 2015-12-24               73                68               63
## 359 2015-12-25               74                69               63
##     Max.Humidity                Events Airport
## 354           81                          KCLT
## 355           74                          KCLT
## 356          100              Fog-Rain    KCLT
## 357           97 Fog-Rain-Thunderstorm    KCLT
## 358           93                  Rain    KCLT
## 359           93 Fog-Rain-Thunderstorm    KCLT