##Load libraries
library(dplyr) #all data
library(tidyr) #all data
library(readxl)#Import data 2
library(ggplot2)#all data
library(plotrix) #data1
library(plotly) #data 2 https://plot.ly/ggplot2/animations/
library(gapminder) #data2 https://plot.ly/ggplot2/animations/
theurl <- "https://data.ny.gov/api/views/rikd-mt35/rows.csv?accessType=DOWNLOAD"
thedata <- read.table(file = theurl, header = TRUE, sep = ",")
glimpse(thedata)
## Observations: 3,055
## Variables: 13
## $ County <fct> Albany, Albany, Albany, Albany, Albany, Alba...
## $ Year <int> 1970, 1971, 1972, 1973, 1974, 1975, 1976, 19...
## $ Total <int> 1226, 1833, 3035, 3573, 4255, 4173, 4601, 48...
## $ Felony.Total <int> 688, 829, 1054, 1134, 1329, 1259, 1435, 1342...
## $ Drug.Felony <int> 97, 131, 211, 244, 281, 209, 201, 122, 85, 1...
## $ Violent.Felony <int> 191, 231, 256, 274, 308, 344, 434, 403, 433,...
## $ DWI.Felony <int> 5, 6, 8, 28, 17, 12, 26, 45, 58, 65, 79, 81,...
## $ Other.Felony <int> 395, 461, 579, 588, 723, 694, 774, 772, 909,...
## $ Misdemeanor.Total <int> 538, 1004, 1981, 2439, 2926, 2914, 3166, 347...
## $ Drug.Misd <int> 207, 204, 285, 369, 437, 398, 362, 270, 157,...
## $ DWI.Misd <int> 48, 111, 297, 497, 619, 463, 574, 858, 1540,...
## $ Property.Misd <int> 95, 272, 541, 668, 885, 977, 1011, 1133, 133...
## $ Other.Misd <int> 188, 417, 858, 905, 985, 1076, 1219, 1216, 1...
Gather all offense totals and display GGPlots
head(thedata)
## County Year Total Felony.Total Drug.Felony Violent.Felony DWI.Felony
## 1 Albany 1970 1226 688 97 191 5
## 2 Albany 1971 1833 829 131 231 6
## 3 Albany 1972 3035 1054 211 256 8
## 4 Albany 1973 3573 1134 244 274 28
## 5 Albany 1974 4255 1329 281 308 17
## 6 Albany 1975 4173 1259 209 344 12
## Other.Felony Misdemeanor.Total Drug.Misd DWI.Misd Property.Misd
## 1 395 538 207 48 95
## 2 461 1004 204 111 272
## 3 579 1981 285 297 541
## 4 588 2439 369 497 668
## 5 723 2926 437 619 885
## 6 694 2914 398 463 977
## Other.Misd
## 1 188
## 2 417
## 3 858
## 4 905
## 5 985
## 6 1076
tddata1.1<-thedata%>%
select( -Felony.Total, -Misdemeanor.Total, -Year, -Total) %>%
gather(offense, value, 2:9)%>%
group_by(offense)%>%
summarise_each(funs(sum), value)%>%
mutate(pctallcrime = round(value/sum(thedata$Total)*100,5))%>%
arrange(desc(value))
tddata1.1
## # A tibble: 8 x 3
## offense value pctallcrime
## <chr> <int> <dbl>
## 1 Other.Misd 4946081 22.4
## 2 Property.Misd 4659585 21.1
## 3 Other.Felony 3358148 15.2
## 4 Drug.Misd 2862559 13.0
## 5 Violent.Felony 2471367 11.2
## 6 DWI.Misd 1925200 8.72
## 7 Drug.Felony 1648439 7.47
## 8 DWI.Felony 198436 0.899
options("scipen" = 20)
lbls <- paste(tddata1.1$offense, round(tddata1.1$pctallcrime,0))
lbls <- paste(lbls,"%",sep="")
pie(tddata1.1$value, labels = lbls, col = rainbow(length(tddata1.1$value)), main="Pie Chart of Offenses")
ggplot(data= tddata1.1) +
geom_point(mapping = aes(x = pctallcrime, y = value, size = pctallcrime, color = offense))
ggplot(data= tddata1.1) +
geom_bar(mapping = aes(x = offense, y = value, fill= offense), stat = "identity", position = "identity")+
#theme(axis.text.x=element_text(angle=-90))
theme(axis.text.x = element_blank())
Gather all offense totals and display GGplots by Counties
tddata1.2<-thedata%>%
select( -Felony.Total, -Misdemeanor.Total, -Year, -Total) %>%
gather(offense, value, 2:9)%>%
group_by(County)%>%
summarise_each(funs(sum), value)%>%
mutate(pctallcrime = round(value/sum(thedata$Total)*100,5))%>%
arrange(desc(value))%>%
top_n(10, value)
tddata1.2
## # A tibble: 10 x 3
## County value pctallcrime
## <fct> <int> <dbl>
## 1 New York 4112305 18.6
## 2 Kings 3289267 14.9
## 3 Bronx 2596596 11.8
## 4 Queens 1881895 8.53
## 5 Erie 1100599 4.99
## 6 Suffolk 1030156 4.67
## 7 Nassau 816633 3.70
## 8 Monroe 766274 3.47
## 9 Westchester 704951 3.19
## 10 Onondaga 492757 2.23
lbls <- paste(tddata1.2$County, round(tddata1.1$pctallcrime,0))
lbls <- paste(lbls,"%",sep="")
pie(tddata1.2$value, labels = lbls, col = rainbow(length(tddata1.2$value)), main="Pie Chart by Top 10 County")
ggplot(data= tddata1.2) +
geom_point(mapping = aes(x = pctallcrime, y = value, size = pctallcrime, color = County))
tddata1.3<-thedata%>%
select( -Felony.Total, -Misdemeanor.Total, -Year, -Total) %>%
gather(offense, value, 2:9)%>%
group_by(County, offense)%>%
summarise_each(funs(sum), value)%>%
mutate(pctallcrime = round(value/sum(thedata$Total)*100,5))%>%
arrange(desc(value))
ggplot(data= tddata1.3) +
geom_bar(mapping = aes(x = County, y = value, fill= offense), stat = "identity", position = "identity")+
theme(axis.text.x=element_text(angle=-90, size=9))
theurl2 <- "http://www.un.org/en/development/desa/population/migration/data/empirical2/data/UN_MigFlow_Totals.xlsx"
destfile <- "UN_MigFlow_Totals.xlsx"
download.file(theurl2, destfile, mode = "wb")
thedata2<-read_xlsx("UN_MigFlow_Totals.xlsx", skip = 16)
glimpse(thedata2)
## Observations: 229
## Variables: 38
## $ CntName <chr> "Armenia", "Armenia", "Australia", "Australia", "Aust...
## $ Criteria <chr> "Residence", "Residence", "Residence", "Residence", "...
## $ Type <chr> "Emigrants", "Immigrants", "Emigrants", "Immigrants",...
## $ Coverage <chr> "Both", "Both", "Both", "Both", "Citizens", "Foreigne...
## $ `1980` <chr> "..", "..", "90860", "184290", "..", "..", "..", ".."...
## $ `1981` <chr> "..", "..", "85600", "212690", "..", "..", "..", ".."...
## $ `1982` <chr> "..", "..", "92340", "195200", "..", "..", "..", ".."...
## $ `1983` <chr> "..", "..", "100510", "153570", "..", "..", "..", ".....
## $ `1984` <chr> "..", "..", "96360", "153530", "..", "..", "..", ".."...
## $ `1985` <chr> "..", "..", "93440", "172550", "..", "..", "..", ".."...
## $ `1986` <chr> "..", "..", "92450", "196690", "..", "..", "..", ".."...
## $ `1987` <chr> "..", "..", "97770", "221620", "..", "..", "..", ".."...
## $ `1988` <chr> "..", "..", "104770", "253860", "..", "..", "..", ".....
## $ `1989` <chr> "..", "..", "120040", "238050", "..", "..", "..", ".....
## $ `1990` <chr> "..", "..", "137470", "234050", "..", "..", "..", ".....
## $ `1991` <chr> "..", "..", "143710", "237240", "..", "..", "..", ".....
## $ `1992` <chr> "..", "..", "143660", "220460", "..", "..", "..", ".....
## $ `1993` <chr> "..", "..", "140420", "197940", "..", "..", "..", ".....
## $ `1994` <chr> "..", "..", "141680", "221920", "..", "..", "..", ".....
## $ `1995` <chr> "..", "..", "149360", "253940", "..", "..", "..", ".....
## $ `1996` <chr> "..", "..", "158260", "261330", "17136", "46725", "12...
## $ `1997` <chr> "..", "..", "176560", "260220", "18830", "48264", "13...
## $ `1998` <chr> "..", "..", "179600", "268390", "19407", "44865", "13...
## $ `1999` <chr> "..", "..", "185670", "289870", "19644", "47279", "14...
## $ `2000` <chr> "12030", "1767", "206120", "317560", "18224", "46248"...
## $ `2001` <chr> "11901", "1764", "216130", "356410", "21644", "51010"...
## $ `2002` <chr> "10433", "1715", "222940", "361990", "30353", "44478"...
## $ `2003` <chr> "8482", "1926", "224890", "388450", "23056", "48940",...
## $ `2004` <chr> "8451", "1514", "212200", "350990", "21703", "50018",...
## $ `2005` <chr> "9303", "1497", "206690", "363470", "20333", "49800",...
## $ `2006` <chr> "8053", "1335", "204800", "402210", "19387", "55045",...
## $ `2007` <chr> "7461", "1112", "216580", "460650", "17828", "32070",...
## $ `2008` <chr> "6121", "864", "220280", "535970", "18168", "33395", ...
## $ `2009` <chr> "4100", "861", "..", "..", "16376", "36868", "8988", ...
## $ `2010` <chr> "..", "..", "..", "..", "16059", "35592", "8817", "62...
## $ `2011` <chr> "..", "..", "..", "..", "14401", "36796", "8082", "74...
## $ `2012` <chr> "..", "..", "..", "..", "15443", "36369", "8272", "83...
## $ `2013` <chr> "..", "..", "..", "..", "15368", "38703", "9237", "92...
Gather all year totals and display dynamic year to year changes in plotly
head(thedata2)
## # A tibble: 6 x 38
## CntName Criteria Type Coverage `1980` `1981` `1982` `1983` `1984`
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Armenia Residence Emigra~ Both .. .. .. .. ..
## 2 Armenia Residence Immigr~ Both .. .. .. .. ..
## 3 Austral~ Residence Emigra~ Both 90860 85600 92340 100510 96360
## 4 Austral~ Residence Immigr~ Both 184290 212690 195200 153570 153530
## 5 Austria Citizensh~ Emigra~ Citizens .. .. .. .. ..
## 6 Austria Citizensh~ Emigra~ Foreigne~ .. .. .. .. ..
## # ... with 29 more variables: `1985` <chr>, `1986` <chr>, `1987` <chr>,
## # `1988` <chr>, `1989` <chr>, `1990` <chr>, `1991` <chr>, `1992` <chr>,
## # `1993` <chr>, `1994` <chr>, `1995` <chr>, `1996` <chr>, `1997` <chr>,
## # `1998` <chr>, `1999` <chr>, `2000` <chr>, `2001` <chr>, `2002` <chr>,
## # `2003` <chr>, `2004` <chr>, `2005` <chr>, `2006` <chr>, `2007` <chr>,
## # `2008` <chr>, `2009` <chr>, `2010` <chr>, `2011` <chr>, `2012` <chr>,
## # `2013` <chr>
tddata2<-thedata2%>%
gather(year, total, "1980":"2013")%>%
filter(total != "..")
tddata2$total<- as.integer(tddata2$total)
tddata2<- tddata2%>%
group_by(CntName,year)%>%
summarise_each(funs(sum), total)%>%
arrange(year)#%>%
#filter(CntName %in% c("United States of America","Germany","France"))
#
#View(tddata2)
#tddata2 = NULL
require(plotly)
p <- ggplot(tddata2, aes(total/1000000, year, color = reorder(CntName, -total)))+
geom_point(aes(size = total, frame = year, ids = CntName))+
scale_fill_continuous()+
labs(color = "Country Names", x = "Migration by Millions", y = "Years")
p <- ggplotly(p) %>%
animation_opts(2000, easing = "elastic", redraw = TRUE)%>%
add_annotations(
yref="paper",
xref="paper",
y=1.1,
x=0,
text="Migration By Year/Country",
showarrow=F,
font=list(size=17)
) %>%
layout(title=FALSE)
p
# p = NULL
theurl3<-"https://raw.githubusercontent.com/fivethirtyeight/data/master/airline-safety/airline-safety.csv"
thedata3 <- read.table(file = theurl3, header = TRUE, sep = ",")
glimpse(thedata3)
## Observations: 56
## Variables: 8
## $ airline <fct> Aer Lingus, Aeroflot*, Aerolineas Argen...
## $ avail_seat_km_per_week <dbl> 320906734, 1197672318, 385803648, 59687...
## $ incidents_85_99 <int> 2, 76, 6, 3, 2, 14, 2, 3, 5, 7, 3, 21, ...
## $ fatal_accidents_85_99 <int> 0, 14, 0, 1, 0, 4, 1, 0, 0, 2, 1, 5, 0,...
## $ fatalities_85_99 <int> 0, 128, 0, 64, 0, 79, 329, 0, 0, 50, 1,...
## $ incidents_00_14 <int> 0, 6, 1, 5, 2, 6, 4, 5, 5, 4, 7, 17, 1,...
## $ fatal_accidents_00_14 <int> 0, 1, 0, 0, 0, 2, 1, 1, 1, 0, 0, 3, 0, ...
## $ fatalities_00_14 <int> 0, 88, 0, 0, 0, 337, 158, 7, 88, 0, 0, ...
Gather all incident totals and dipsplay GGPlot with Top 10 Airlines Incidents,then compare incidents to kilometers per week.
head(thedata3)
## airline avail_seat_km_per_week incidents_85_99
## 1 Aer Lingus 320906734 2
## 2 Aeroflot* 1197672318 76
## 3 Aerolineas Argentinas 385803648 6
## 4 Aeromexico* 596871813 3
## 5 Air Canada 1865253802 2
## 6 Air France 3004002661 14
## fatal_accidents_85_99 fatalities_85_99 incidents_00_14
## 1 0 0 0
## 2 14 128 6
## 3 0 0 1
## 4 1 64 5
## 5 0 0 2
## 6 4 79 6
## fatal_accidents_00_14 fatalities_00_14
## 1 0 0
## 2 1 88
## 3 0 0
## 4 0 0
## 5 0 0
## 6 2 337
tddata3<-thedata3%>%
gather(incidents, count, 3:8)
tddata3$count<- as.integer(tddata3$count)
tddata3<-tddata3%>%
filter(count > 0)%>%
select(airline, count, avail_seat_km_per_week)%>%
group_by(airline)%>%
summarise_each(funs(sum), count, avail_seat_km_per_week)%>%
arrange(desc(count))%>%
top_n(10)
ggplot(data= tddata3) +
geom_bar(mapping = aes(x = reorder(airline, -count), y = count, fill= airline), stat = "identity", position = "identity", show.legend = FALSE)+
theme(axis.text.x=element_text(angle=-90))
ggplot(data= tddata3) +
geom_point(mapping = aes(x = avail_seat_km_per_week, y =count, color = airline, size = count))
#theme(axis.text.x = element_blank())