library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(knitr)
H1B3 <- read.csv("/Users/christinakasman/Desktop/H1B3.csv",  na.strings = c("", "NA"))
library(zoo)
H1B3$X[H1B3$X == ""] <- NA
H1B3$X <- na.locf(H1B3$X)
head(as.data.frame(H1B3))
##                                                 X  X.1  X.2
## 1  USCIS Field Office or Service Center Location  <NA> <NA>
## 2  USCIS Field Office or Service Center Location  <NA> <NA>
## 3  USCIS Field Office or Service Center Location  <NA> <NA>
## 4  USCIS Field Office or Service Center Location  <NA> <NA>
## 5                                          Total  <NA> <NA>
## 6                          Field Office by State6 <NA> <NA>
##                                                       X.3       X.4
## 1  Applications by Category of Admission and Case Status       <NA>
## 2                                           Family-based       <NA>
## 3                                  Applications Received2 Approved3
## 4                                                    <NA>      <NA>
## 5                                                   93914     71393
## 6                                                    <NA>      <NA>
##       X.5      X.6                                          X.7       X.8
## 1    <NA>     <NA>                                         <NA>      <NA>
## 2    <NA>     <NA> Employment-based received at service center1      <NA>
## 3 Denied4 Pending5                       Applications Received2 Approved3
## 4    <NA>                                                  <NA>      <NA>
## 5    9854   305370                                        41920     29689
## 6    <NA>     <NA>                                         <NA>      <NA>
##   Number.of.I.485.Applications.to.Register.Permanent.Residence.or.Adjust.Status.by.Category.of...............Admission..Case.Status..and.USCIS.Field.Office.or.Service.Center.Location...............April.1...June.30..2017
## 1                                                                                                                                                                                                                       <NA>
## 2                                                                                                                                                                                                                       <NA>
## 3                                                                                                                                                                                                                    Denied4
## 4                                                                                                                                                                                                                       <NA>
## 5                                                                                                                                                                                                                       1705
## 6                                                                                                                                                                                                                       <NA>
##        X.9                   X.10      X.11    X.12     X.13
## 1     <NA>                   <NA>      <NA>    <NA>     <NA>
## 2     <NA>    Humanitarian-based1      <NA>    <NA>     <NA>
## 3 Pending5 Applications Received2 Approved3 Denied4 Pending5
## 4     <NA>                   <NA>      <NA>    <NA>     <NA>
## 5   148547                  47773     36736    1262   122768
## 6     <NA>                   <NA>      <NA>    <NA>     <NA>
##                     X.14      X.15    X.16     X.17                   X.18
## 1                   <NA>      <NA>    <NA>     <NA>                   <NA>
## 2                Others1      <NA>    <NA>     <NA>                 Total 
## 3 Applications Received2 Approved3 Denied4 Pending5 Applications Received2
## 4                   <NA>      <NA>    <NA>     <NA>                   <NA>
## 5                  11367      7986     931    32356                 194974
## 6                   <NA>      <NA>    <NA>     <NA>                   <NA>
##        X.19    X.20     X.21 X.22 X.23 X.24 X.25 X.26 X.27 X.28 X.29 X.30
## 1      <NA>    <NA>     <NA>   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 2      <NA>    <NA>     <NA>   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 3 Approved3 Denied4 Pending5   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 4      <NA>    <NA>     <NA>   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 5    145804   13752   609041   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 6      <NA>    <NA>     <NA>   NA   NA   NA   NA   NA   NA   NA   NA   NA
##   X.31 X.32 X.33 X.34 X.35 X.36 X.37 X.38 X.39 X.40 X.41 X.42
## 1   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 2   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 3   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 4   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 5   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA
## 6   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA   NA

** I was interested in looking at Family-based Applications received per state**

my.new.data <-  H1B3[-c(1:7) , -c(8:44 )]  
colnames(my.new.data)[1] <- "State"
colnames(my.new.data)[2] <- "City"
colnames(my.new.data)[3] <- "Code"
colnames(my.new.data)[4] <- "Applications_Received"
colnames(my.new.data)[5] <- "Approved"
colnames(my.new.data)[6] <- "Denied"
colnames(my.new.data)[7] <- "Pending"
my.new.data2 = my.new.data %>% na.omit()
data3 <- my.new.data2 %>% group_by(State) %>% 
  summarise(Denied = sum(as.numeric(Denied)))
head(data3[order(data3$Denied, decreasing = TRUE),])
## # A tibble: 6 x 2
##              State Denied
##             <fctr>  <dbl>
## 1      California     394
## 2         Florida     344
## 3        New York     170
## 4           Texas     170
## 5  North Carolina     129
## 6      Washington     127

California had the highest number of denied H1B Visas

data3 <- my.new.data2 %>% group_by(State) %>% 
  summarise(Approved = sum(as.numeric(Approved)))
head(data3[order(data3$Approved, decreasing = TRUE),])
## # A tibble: 6 x 2
##                   State Approved
##                  <fctr>    <dbl>
## 1           California       509
## 2              Florida       364
## 3                Texas       203
## 4  U.S. Virgin Islands       167
## 5             New York       155
## 6       North Carolina       143

California had the highest number of approved H1B Visas

Data Set 3: Unicef Child Mortality Data

unicef<- read.csv("/Users/christinakasman/Desktop/unicef-u5mr.csv")
unicef2 <- gather(unicef,year, "mortality", U5MR.1950:U5MR.2015)
unicef2 <- unicef2 %>% na.omit()
library(stringr)
yr <- unlist(str_extract_all(unicef2, "R.[[:digit:]]{4}"))
yr <- unlist(str_extract_all(unicef2, "[[:digit:]]{4}"))
unicef3 <- cbind(unicef2, yr)
unicef4 <- unicef3[ , -c(2)]
head(unicef4[order(unicef4$mortality, decreasing = TRUE),])
##      CountryName mortality   yr
## 2656        Mali     443.5 1963
## 2852        Mali     435.8 1964
## 3048        Mali     428.5 1965
## 3244        Mali     421.8 1966
## 3440        Mali     415.4 1967
## 3636        Mali     409.8 1968

Mali had the highest number of children mortalities in 1963

plot1 <- unicef3 %>% group_by(CountryName) %>% 
  summarise(total = sum(mortality))
head(plot1[order(plot1$total, decreasing = TRUE),])
## # A tibble: 6 x 2
##    CountryName   total
##         <fctr>   <dbl>
## 1 Burkina Faso 16312.6
## 2 Sierra Leone 15005.2
## 3         Mali 14506.3
## 4        Benin 14238.7
## 5      Senegal 13285.3
## 6      Liberia 13215.8

Burkina Faso has the highest number of child mortalities overall

plot <- unicef3 %>% group_by(yr) %>% 
  summarise(total = sum(mortality))
head(plot[order(plot$total, decreasing = TRUE),])
## # A tibble: 6 x 2
##       yr   total
##   <fctr>   <dbl>
## 1   1969 18279.6
## 2   1970 18244.9
## 3   1972 18239.8
## 4   1971 18150.9
## 5   1975 17814.9
## 6   1973 17750.8

1969 had the highest number of Child Mortalities overall

library(ggplot2)
ggplot(plot, aes(yr, total)) + geom_bar(stat = "identity", fill="#009E73") +ggtitle("Child Mortality over time") + theme(
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank()) + xlab("Year")