library(tidyr)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.2
Load the Data
h1b <- read.csv(url("https://raw.githubusercontent.com/mikegankhuyag/607-Projects/master/Project%202/2007_2017_H1B_trend.csv"), stringsAsFactors = FALSE)
str(h1b)
## 'data.frame': 149 obs. of 13 variables:
## $ USCIS : chr "" "Note: Unless noted otherwise, all data are based on petitions received during a fiscal year. Note: FY2017 dat"| __truncated__ "" "Trend of H1B Petitions FY 2007 Through 2017: Receipt Volume Overview" ...
## $ Number.of.H.1B.Petition.Filings..FY2007...FY2017: chr "" "" "" "" ...
## $ X : chr "" "" "" "" ...
## $ X.1 : chr "" "" "" "" ...
## $ X.2 : chr "" "" "" "" ...
## $ X.3 : chr "" "" "" "" ...
## $ X.4 : chr "" "" "" "" ...
## $ X.5 : chr "" "" "" "" ...
## $ X.6 : chr "" "" "" "" ...
## $ X.7 : chr "" "" "" "" ...
## $ X.8 : chr "" "" "" "" ...
## $ X.9 : chr "" "" "" "" ...
## $ X.10 : chr "" "" "" "" ...
View(h1b)
I’m particulary interested in which continent has the most h1b’s. So lets take the data containing countries.
Country <- data.frame(h1b[18:37,])
colnames(Country) <- c("Countries",2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,"Total")
head(Country)
## Countries 2007 2008 2009 2010 2011
## 18 India 166,575 157,608 122,475 135,931 155,791
## 19 China, People's Republic of 26,370 24,434 22,411 21,119 23,227
## 20 Philippines 12,230 10,713 10,407 8,887 9,098
## 21 South Korea 10,730 10,277 10,704 8,721 7,480
## 22 Canada 8,562 7,111 7,871 7,342 6,761
## 23 Taiwan 5,394 4,088 4,308 4,325 4,511
## 2012 2013 2014 2015 2016 2017 Total
## 18 197,940 201,114 227,172 269,677 300,902 247,927 2,183,112
## 19 22,528 23,924 27,733 32,485 35,720 36,362 296,313
## 20 9,400 7,399 6,772 4,147 3,704 3,161 85,918
## 21 7,204 5,576 4,897 4,298 4,269 3,203 77,359
## 22 6,688 5,478 5,267 5,050 4,547 3,551 68,228
## 23 4,172 3,520 3,267 2,555 2,287 2,200 40,627
Lets add a column identifying the correct continent for each row.
Country$Continent <- c("Asia", "Asia","Asia","Asia","North America","Asia","North America","Europe","Asia","Europe","South America","Asia","Asia","Europe","Europe","Asia","Europe","Asia","South America","Europe")
Country
## Countries 2007 2008 2009 2010 2011
## 18 India 166,575 157,608 122,475 135,931 155,791
## 19 China, People's Republic of 26,370 24,434 22,411 21,119 23,227
## 20 Philippines 12,230 10,713 10,407 8,887 9,098
## 21 South Korea 10,730 10,277 10,704 8,721 7,480
## 22 Canada 8,562 7,111 7,871 7,342 6,761
## 23 Taiwan 5,394 4,088 4,308 4,325 4,511
## 24 Mexico 4,259 3,680 3,599 3,260 3,439
## 25 United Kingdom 5,105 4,241 4,270 3,651 3,241
## 26 Pakistan 4,259 3,803 3,683 3,012 3,033
## 27 France 4,112 3,687 3,035 2,660 2,531
## 28 Brazil 3,056 2,498 2,495 2,595 2,644
## 29 Nepal 2,775 2,538 2,724 2,467 2,169
## 30 Japan 2,913 2,374 2,253 2,225 2,172
## 31 Turkey 2,415 2,028 2,041 2,023 2,020
## 32 Germany 3,168 2,482 2,182 1,875 1,737
## 33 Iran 2,531 1,930 1,952 1,897 1,755
## 34 Italy 1,353 1,533 1,437 1,361 1,613
## 35 Russia 2,446 1,760 1,544 1,434 1,570
## 36 Venezuela 1,262 1,159 1,302 1,299 1,398
## 37 Spain 1,079 974 933 1,018 1,233
## 2012 2013 2014 2015 2016 2017 Total Continent
## 18 197,940 201,114 227,172 269,677 300,902 247,927 2,183,112 Asia
## 19 22,528 23,924 27,733 32,485 35,720 36,362 296,313 Asia
## 20 9,400 7,399 6,772 4,147 3,704 3,161 85,918 Asia
## 21 7,204 5,576 4,897 4,298 4,269 3,203 77,359 Asia
## 22 6,688 5,478 5,267 5,050 4,547 3,551 68,228 North America
## 23 4,172 3,520 3,267 2,555 2,287 2,200 40,627 Asia
## 24 3,602 2,985 2,769 2,462 2,315 2,239 34,609 North America
## 25 3,130 2,330 1,988 1,697 1,528 1,783 32,964 Europe
## 26 2,765 2,381 2,497 2,512 2,401 1,536 31,882 Asia
## 27 2,292 2,192 2,024 2,048 1,998 1,474 28,053 Europe
## 28 2,557 2,346 2,353 2,111 1,992 1,517 26,164 South America
## 29 2,066 1,788 1,598 1,512 1,504 1,249 22,390 Asia
## 30 2,030 1,755 1,664 1,553 1,481 1,077 21,497 Asia
## 31 1,966 1,658 1,665 1,711 1,709 1,177 20,413 Europe
## 32 1,650 1,319 1,256 1,164 1,006 1,127 18,966 Europe
## 33 1,676 1,362 1,331 1,230 1,152 1,332 18,148 Asia
## 34 1,922 1,722 1,865 1,894 1,639 918 17,257 Europe
## 35 1,499 1,318 1,323 1,275 1,154 948 16,271 Asia
## 36 1,540 1,370 1,339 1,247 1,208 873 13,997 South America
## 37 1,140 1,230 1,201 1,110 1,094 861 11,873 Europe
I want the numbers to read as numbers and I want to remove the commas.
cont <- select(Country,Countries,Continent,8:12)
cont$`2013` <- as.numeric(gsub(",","",cont$`2013`))
cont$`2014` <- as.numeric(gsub(",","",cont$`2014`))
cont$`2015` <- as.numeric(gsub(",","",cont$`2015`))
cont$`2016` <- as.numeric(gsub(",","",cont$`2016`))
cont$`2017` <- as.numeric(gsub(",","",cont$`2017`))
cont
## Countries Continent 2013 2014 2015 2016
## 18 India Asia 201114 227172 269677 300902
## 19 China, People's Republic of Asia 23924 27733 32485 35720
## 20 Philippines Asia 7399 6772 4147 3704
## 21 South Korea Asia 5576 4897 4298 4269
## 22 Canada North America 5478 5267 5050 4547
## 23 Taiwan Asia 3520 3267 2555 2287
## 24 Mexico North America 2985 2769 2462 2315
## 25 United Kingdom Europe 2330 1988 1697 1528
## 26 Pakistan Asia 2381 2497 2512 2401
## 27 France Europe 2192 2024 2048 1998
## 28 Brazil South America 2346 2353 2111 1992
## 29 Nepal Asia 1788 1598 1512 1504
## 30 Japan Asia 1755 1664 1553 1481
## 31 Turkey Europe 1658 1665 1711 1709
## 32 Germany Europe 1319 1256 1164 1006
## 33 Iran Asia 1362 1331 1230 1152
## 34 Italy Europe 1722 1865 1894 1639
## 35 Russia Asia 1318 1323 1275 1154
## 36 Venezuela South America 1370 1339 1247 1208
## 37 Spain Europe 1230 1201 1110 1094
## 2017
## 18 247927
## 19 36362
## 20 3161
## 21 3203
## 22 3551
## 23 2200
## 24 2239
## 25 1783
## 26 1536
## 27 1474
## 28 1517
## 29 1249
## 30 1077
## 31 1177
## 32 1127
## 33 1332
## 34 918
## 35 948
## 36 873
## 37 861
Lets group the data by the continents we created.
cont$Total5yrs = rowSums(cont[3:7])
h1b_continents <- summarise(group_by(cont,Continent),totalvisas = sum(Total5yrs))
h1b_continents
## # A tibble: 4 x 2
## Continent totalvisas
## <chr> <dbl>
## 1 Asia 1503204
## 2 Europe 46388
## 3 North America 36663
## 4 South America 16356
Create a visual graph containing the data.
ggplot(h1b_continents, aes(x="", y=totalvisas, fill=Continent)) +
geom_bar(width = 1, stat = "identity")
ggplot(h1b_continents, aes(x="", y=totalvisas, fill=Continent)) +
geom_bar(width = 1, stat = "identity") +
coord_polar("y", start = 0)
Loading the data set and obserrving the structure.
Businesses <- read.csv(url("https://raw.githubusercontent.com/mikegankhuyag/607-Projects/master/Project%202/1988_2014_payroll_firmsize.csv"))
str(Businesses)
## 'data.frame': 132 obs. of 16 variables:
## $ Private.Firms..Establishments..Employment..Annual.Payroll.and.Receipts.by.Firm.Size..1988.2014: Factor w/ 13 levels "","($000)","* Employment is measured in March, thus some firms (start-ups after March, closures before March, and seasonal "| __truncated__,..: 1 1 9 1 8 1 1 1 1 1 ...
## $ X : Factor w/ 29 levels "","1988","1989",..: 1 1 29 1 28 27 26 25 24 23 ...
## $ X.1 : Factor w/ 27 levels "","0","1,030,932,886",..: 1 1 27 1 22 21 20 19 18 16 ...
## $ X.2 : Factor w/ 116 levels "","1,858,652,147",..: 1 115 116 1 74 73 69 67 70 72 ...
## $ X.3 : Factor w/ 53 levels "","0","0 *","110,778,665",..: 1 53 3 1 52 52 52 52 52 52 ...
## $ X.4 : Factor w/ 115 levels "","0-4 *","1,038,627,904",..: 1 1 2 1 78 73 68 66 72 70 ...
## $ X.5 : Factor w/ 115 levels "","1,001,313",..: 1 1 61 1 115 110 111 105 103 2 ...
## $ X.6 : Factor w/ 115 levels "","1,085,595,864",..: 1 1 5 1 53 50 46 44 60 56 ...
## $ X.7 : Factor w/ 115 levels "","<20","18,319,642",..: 1 1 2 1 83 79 74 73 78 80 ...
## $ X.8 : Factor w/ 115 levels "","16,833,702",..: 1 1 23 1 63 61 56 54 52 59 ...
## $ X.9 : Factor w/ 115 levels "","1,717,787,820",..: 1 1 3 1 112 108 105 100 102 104 ...
## $ X.10 : Factor w/ 115 levels "","<500","1,007,156,385",..: 1 1 2 1 63 61 56 54 57 59 ...
## $ X.11 : Factor w/ 115 levels "","1,015,309",..: 1 1 83 1 56 55 51 49 45 48 ...
## $ X.12 : logi NA NA NA NA NA NA ...
## $ X.13 : logi NA NA NA NA NA NA ...
## $ X.14 : logi NA NA NA NA NA NA ...
Get rid of the Empty rows.
B2 <- Businesses[-c(1:4,32,60,88,116,122:132),-c(14:16) ]
B2
## Private.Firms..Establishments..Employment..Annual.Payroll.and.Receipts.by.Firm.Size..1988.2014
## 5 Firms
## 6
## 7
## 8
## 9
## 10
## 11
## 12
## 13
## 14
## 15
## 16
## 17
## 18
## 19
## 20
## 21
## 22
## 23
## 24
## 25
## 26
## 27
## 28
## 29
## 30
## 31
## 33 Establishments
## 34
## 35
## 36
## 37
## 38
## 39
## 40
## 41
## 42
## 43
## 44
## 45
## 46
## 47
## 48
## 49
## 50
## 51
## 52
## 53
## 54
## 55
## 56
## 57
## 58
## 59
## 61 Employment
## 62
## 63
## 64
## 65
## 66
## 67
## 68
## 69
## 70
## 71
## 72
## 73
## 74
## 75
## 76
## 77
## 78
## 79
## 80
## 81
## 82
## 83
## 84
## 85
## 86
## 87
## 89 Annual payroll
## 90 ($000)
## 91
## 92
## 93
## 94
## 95
## 96
## 97
## 98
## 99
## 100
## 101
## 102
## 103
## 104
## 105
## 106
## 107
## 108
## 109
## 110
## 111
## 112
## 113
## 114
## 115
## 117 Estimated
## 118 Receipts
## 119 ($000)
## 120
## 121
## X X.1 X.2 X.3 X.4
## 5 2014 23,836,937 5,825,458 N.A. 3,598,185
## 6 2013 23,005,620 5,775,055 N.A. 3,575,290
## 7 2012 22,735,915 5,726,160 N.A. 3,543,991
## 8 2011 22,491,080 5,684,424 N.A. 3,532,058
## 9 2010 22,110,628 5,734,538 N.A. 3,575,240
## 10 2009 21,695,828 5,767,306 N.A. 3,558,708
## 11 2008 21,351,320 5,930,132 N.A. 3,617,764
## 12 2007 21,708,021 6,049,655 N.A. 3,705,275
## 13 2006 20,768,555 6,022,127 794,622 3,670,028
## 14 2005 20,392,068 5,983,546 823,832 3,677,879
## 15 2004 19,523,741 5,885,784 802,034 3,579,714
## 16 2003 18,649,114 5,767,127 770,299 3,504,432
## 17 2002 17,646,062 5,697,759 770,041 3,465,647
## 18 2001 16,979,498 5,657,774 703,837 3,401,676
## 19 2000 16,529,955 5,652,544 726,862 3,396,732
## 20 1999 16,152,604 5,607,743 709,074 3,389,161
## 21 1998 15,708,727 5,579,177 711,899 3,376,351
## 22 1997 15,439,609 5,541,918 719,978 3,358,048
## 23 1996 N.A. 5,478,047 717,991 3,327,783
## 24 1995 N.A. 5,369,068 688,584 3,249,573
## 25 1994 N.A. 5,276,964 691,141 3,208,235
## 26 1993 N.A. 5,193,642 671,306 3,139,518
## 27 1992 14,325,000 5,095,356 644,453 3,075,280
## 28 1991 N.A. 5,051,025 N.A. 3,036,304
## 29 1990 N.A. 5,073,795 N.A. 3,020,935
## 30 1989 N.A. 5,021,315 N.A. 3,003,224
## 31 1988 N.A. 4,954,645 N.A. 2,979,905
## 33 2014 23,836,937 7,563,084 N.A. 3,603,935
## 34 2013 23,005,620 7,488,353 N.A. 3,580,637
## 35 2012 22,735,915 7,431,808 N.A. 3,549,102
## 36 2011 22,491,080 7,354,043 N.A. 3,540,155
## 37 2010 22,110,628 7,396,629 N.A. 3,582,826
## 38 2009 21,695,828 7,433,465 N.A. 3,565,433
## 39 2008 21,351,320 7,601,170 N.A. 3,624,614
## 40 2007 21,708,021 7,705,018 N.A. 3,710,700
## 41 2006 20,768,555 7,601,160 796,218 3,677,153
## 42 2005 20,392,068 7,499,702 824,952 3,684,047
## 43 2004 19,523,741 7,387,724 803,355 3,585,607
## 44 2003 18,649,114 7,254,745 772,325 3,510,352
## 45 2002 17,646,062 7,200,770 771,135 3,470,515
## 46 2001 16,979,498 7,095,302 705,612 3,409,596
## 47 2000 16,529,955 7,070,048 730,027 3,406,001
## 48 1999 16,152,604 7,008,444 711,990 3,397,778
## 49 1998 15,708,727 6,941,822 713,512 3,382,819
## 50 1997 15,439,609 6,894,869 721,844 3,364,434
## 51 1996 N.A. 6,738,476 720,241 3,338,051
## 52 1995 N.A. 6,612,721 690,772 3,259,795
## 53 1994 N.A. 6,509,065 693,992 3,218,076
## 54 1993 N.A. 6,401,233 673,408 3,147,991
## 55 1992 14,325,000 6,319,300 646,065 3,082,325
## 56 1991 N.A. 6,200,859 N.A. 3,048,830
## 57 1990 N.A. 6,175,559 N.A. 3,032,253
## 58 1989 N.A. 6,106,922 N.A. 3,014,009
## 59 1988 N.A. 6,016,367 N.A. 2,989,964
## 61 2014 0 121,069,944 0 5,940,248
## 62 2013 0 118,266,253 0 5,926,660
## 63 2012 0 115,938,468 0 5,906,506
## 64 2011 0 113,425,965 0 5,857,662
## 65 2010 0 111,970,095 0 5,926,452
## 66 2009 0 114,509,626 0 5,966,190
## 67 2008 0 120,903,551 0 6,086,291
## 68 2007 0 120,604,265 0 6,139,463
## 69 2006 0 119,917,165 0 5,959,585
## 70 2005 0 116,317,003 0 5,936,859
## 71 2004 0 115,074,924 0 5,844,637
## 72 2003 0 113,398,043 0 5,768,407
## 73 2002 0 112,400,654 0 5,697,652
## 74 2001 0 115,061,184 0 5,630,017
## 75 2000 0 114,064,976 0 5,592,980
## 76 1999 0 110,705,661 0 5,606,302
## 77 1998 0 108,117,731 0 5,584,470
## 78 1997 0 105,299,123 0 5,546,306
## 79 1996 0 102,187,297 0 5,485,712
## 80 1995 0 100,314,946 0 5,395,432
## 81 1994 0 96,721,594 0 5,318,961
## 82 1993 0 94,773,913 0 5,258,195
## 83 1992 0 92,825,797 0 5,178,909
## 84 1991 0 92,307,559 0 5,151,143
## 85 1990 0 93,469,275 0 5,116,914
## 86 1989 0 91,626,094 0 5,054,429
## 87 1988 0 87,844,303 0 5,006,203
## 89 2014 N.A. 5,940,186,911 N.A. 251,757,114
## 90 2013 N.A. 5,621,697,325 N.A. 241,347,624
## 91 2012 N.A. 5,414,255,995 N.A. 237,897,059
## 92 2011 N.A. 5,164,897,905 N.A. 230,422,086
## 93 2010 N.A. 4,940,983,370 N.A. 226,541,056
## 94 2009 N.A. 4,855,545,239 N.A. 219,913,105
## 95 2008 N.A. 5,130,509,179 N.A. 232,062,907
## 96 2007 N.A. 5,026,778,232 N.A. 234,921,325
## 97 2006 N.A. 4,792,429,911 42,278,863 229,730,040
## 98 2005 N.A. 4,482,722,481 42,182,002 220,009,104
## 99 2004 N.A. 4,253,995,732 40,043,549 205,948,113
## 100 2003 N.A. 4,040,888,841 38,404,329 197,241,064
## 101 2002 N.A. 3,943,179,606 38,127,022 193,789,233
## 102 2001 N.A. 3,989,086,323 34,289,996 187,981,555
## 103 2000 N.A. 3,879,430,052 38,594,167 186,175,556
## 104 1999 N.A. 3,554,692,909 34,264,682 177,377,607
## 105 1998 N.A. 3,309,405,533 31,634,539 168,432,551
## 106 1997 N.A. 3,047,907,469 29,732,398 158,448,270
## 107 1996 N.A. 2,848,623,049 27,583,182 150,825,321
## 108 1995 N.A. 2,665,921,824 25,787,172 141,537,925
## 109 1994 N.A. 2,487,959,727 24,081,138 134,649,352
## 110 1993 N.A. 2,363,208,106 22,361,727 128,968,107
## 111 1992 N.A. 2,272,392,408 21,432,778 124,592,441
## 112 1991 N.A. 2,145,015,851 N.A. 118,233,813
## 113 1990 N.A. 2,103,971,179 N.A. 116,856,518
## 114 1989 N.A. 1,989,941,554 N.A. 112,462,139
## 115 1988 N.A. 1,858,652,147 N.A. 108,800,891
## 117 2012 1,030,932,886 32,637,809,977 N.A. 1,442,441,113
## 118 2007 991,791,563 29,746,741,904 N.A. 1,434,680,823
## 119 2002 770,032,328 22,062,528,196 215,139,058 1,152,672,423
## 120 1997 586,315,756 18,242,632,687 190,570,902 1,038,627,904
## 121 1992 13,605,183,510 110,778,665 820,739,417
## X.5 X.6 X.7 X.8 X.9
## 5 998,953 608,502 5,205,640 513,179 87,563
## 6 992,281 600,551 5,168,122 503,033 85,264
## 7 992,716 593,641 5,130,348 494,170 83,423
## 8 978,993 592,963 5,104,014 481,496 81,243
## 9 968,075 617,089 5,160,404 475,125 81,773
## 10 1,001,313 610,777 5,170,798 495,673 83,326
## 11 1,044,065 633,141 5,294,970 526,307 90,386
## 12 1,060,250 644,842 5,410,367 532,391 88,586
## 13 1,060,787 646,816 5,377,631 535,865 90,560
## 14 1,050,062 629,946 5,357,887 520,897 87,285
## 15 1,043,448 632,682 5,255,844 526,355 86,538
## 16 1,025,497 620,387 5,150,316 515,056 84,829
## 17 1,010,804 613,880 5,090,331 508,249 82,334
## 18 1,019,105 616,064 5,036,845 518,258 85,304
## 19 1,021,210 617,087 5,035,029 515,977 84,385
## 20 1,012,954 605,693 5,007,808 501,848 81,347
## 21 1,011,849 600,167 4,988,367 494,357 80,075
## 22 1,006,897 593,696 4,958,641 487,491 79,707
## 23 996,356 585,844 4,909,983 476,312 76,136
## 24 981,094 576,866 4,807,533 469,869 76,222
## 25 964,985 563,097 4,736,317 452,383 73,267
## 26 962,481 559,602 4,661,601 445,900 71,512
## 27 945,802 551,912 4,572,994 439,084 69,156
## 28 941,296 551,299 4,528,899 439,811 68,338
## 29 952,030 562,610 4,535,575 453,732 70,465
## 30 937,202 553,449 4,493,875 443,959 69,608
## 31 923,580 540,988 4,444,473 430,640 66,708
## 33 1,010,467 641,096 5,255,498 690,583 360,894
## 34 1,003,971 634,233 5,218,841 684,963 360,590
## 35 1,005,042 630,811 5,184,955 687,272 360,207
## 36 993,101 626,981 5,160,237 651,624 350,197
## 37 982,019 652,662 5,217,507 648,386 354,313
## 38 1,015,178 646,145 5,226,756 672,753 353,510
## 39 1,056,947 667,463 5,349,024 705,430 359,902
## 40 1,073,875 682,410 5,466,985 723,385 355,853
## 41 1,073,496 678,524 5,429,173 697,755 345,719
## 42 1,062,907 662,197 5,409,151 679,382 331,999
## 43 1,055,937 666,574 5,308,118 692,677 330,447
## 44 1,037,709 655,427 5,203,488 687,107 331,496
## 45 1,024,081 652,930 5,147,526 692,775 332,508
## 46 1,033,719 650,345 5,093,660 670,477 315,856
## 47 1,035,370 652,461 5,093,832 674,106 312,112
## 48 1,027,212 643,106 5,068,096 670,822 309,211
## 49 1,025,904 639,805 5,048,528 674,503 307,294
## 50 1,022,901 639,090 5,026,425 682,580 308,633
## 51 1,013,353 624,610 4,976,014 636,285 280,635
## 52 998,264 618,268 4,876,327 638,616 283,993
## 53 982,695 608,804 4,809,575 631,324 283,782
## 54 980,865 608,922 4,737,778 631,873 285,184
## 55 964,863 606,276 4,653,464 634,713 283,719
## 56 961,391 593,302 4,603,523 593,248 260,595
## 57 970,580 599,529 4,602,362 590,496 254,747
## 58 956,347 592,901 4,563,257 586,494 252,335
## 59 943,442 583,301 4,516,707 581,622 244,697
## 61 6,570,776 8,176,519 20,687,543 20,121,588 17,085,461
## 62 6,523,516 8,058,077 20,508,253 19,697,707 16,617,417
## 63 6,527,943 7,974,340 20,408,789 19,387,249 16,266,855
## 64 6,431,931 7,961,281 20,250,874 18,880,001 15,867,437
## 65 6,358,931 8,288,385 20,573,768 18,554,372 15,868,540
## 66 6,580,830 8,191,289 20,738,309 19,389,940 16,153,254
## 67 6,878,051 8,497,391 21,461,733 20,684,691 17,547,567
## 68 6,974,591 8,656,182 21,770,236 20,922,960 17,173,728
## 69 6,973,537 8,676,398 21,609,520 21,076,875 17,537,345
## 70 6,898,483 8,453,854 21,289,196 20,444,349 16,911,040
## 71 6,852,769 8,499,681 21,197,087 20,642,614 16,757,751
## 72 6,732,132 8,329,813 20,830,352 20,186,989 16,430,229
## 73 6,639,666 8,246,053 20,583,371 19,874,069 15,908,852
## 74 6,698,077 8,274,541 20,602,635 20,370,447 16,410,367
## 75 6,708,674 8,285,731 20,587,385 20,276,634 16,260,025
## 76 6,652,370 8,129,615 20,388,287 19,703,162 15,637,643
## 77 6,643,285 8,047,650 20,275,405 19,377,614 15,411,390
## 78 6,610,374 7,962,136 20,118,816 19,109,691 15,316,863
## 79 6,541,288 7,854,502 19,881,502 18,643,192 14,649,808
## 80 6,440,349 7,734,080 19,569,861 18,422,228 14,660,421
## 81 6,332,580 7,543,777 19,195,318 17,693,995 14,118,375
## 82 6,313,651 7,498,345 19,070,191 17,420,634 13,825,238
## 83 6,202,861 7,390,874 18,772,644 17,121,010 13,307,187
## 84 6,174,730 7,386,939 18,712,812 17,146,411 13,143,390
## 85 6,251,632 7,543,360 18,911,906 17,710,042 13,544,849
## 86 6,152,151 7,420,196 18,626,776 17,353,444 13,373,640
## 87 6,060,724 7,252,715 18,319,642 16,833,702 12,761,379
## 89 235,546,762 309,924,445 797,228,321 838,405,832 803,652,747
## 90 228,080,290 297,246,083 766,673,997 799,075,150 752,414,284
## 91 224,438,258 290,990,699 753,326,016 783,571,581 730,638,284
## 92 218,085,669 284,251,614 732,759,369 746,085,051 690,509,553
## 93 212,039,611 283,246,473 721,827,140 719,061,251 665,644,629
## 94 212,718,822 278,321,099 710,953,026 719,054,001 654,811,946
## 95 222,504,912 293,534,352 748,102,171 774,589,335 706,476,693
## 96 222,419,546 292,088,277 749,429,148 768,546,555 686,862,018
## 97 214,137,111 282,193,078 726,060,229 741,917,153 660,815,715
## 98 206,178,084 269,416,918 695,604,106 700,453,403 616,524,232
## 99 195,519,100 257,802,789 659,270,002 670,418,442 587,676,161
## 100 187,418,785 246,561,569 631,221,418 635,269,094 552,003,350
## 101 182,383,776 241,410,588 617,583,597 623,716,021 535,749,956
## 102 178,881,075 236,986,003 603,848,633 624,313,095 539,384,914
## 103 174,383,913 230,564,411 591,123,880 608,446,434 527,544,627
## 104 166,598,812 217,571,005 561,547,424 564,974,625 474,607,339
## 105 159,689,162 207,062,798 535,184,511 531,231,157 446,353,485
## 106 150,877,445 193,804,539 503,130,254 494,617,183 418,452,574
## 107 144,692,446 185,490,873 481,008,640 465,229,685 384,020,002
## 108 137,083,047 175,388,093 454,009,065 437,065,364 361,060,815
## 109 131,666,587 166,475,972 432,791,911 408,053,078 335,573,696
## 110 127,133,193 159,153,336 415,254,636 385,005,072 316,183,732
## 111 122,381,613 152,830,640 399,804,694 368,969,129 298,174,483
## 112 116,794,212 146,516,583 381,544,608 352,032,797 279,436,898
## 113 114,006,469 144,450,673 375,313,660 352,390,861 279,451,864
## 114 108,002,714 136,794,734 357,259,587 332,733,188 264,144,335
## 115 103,041,106 130,326,463 342,168,460 315,751,201 244,647,178
## 117 1,148,667,880 1,403,816,975 3,994,925,968 3,910,542,918 3,911,370,787
## 118 1,144,930,232 1,395,498,431 3,975,109,486 3,792,920,977 3,612,050,221
## 119 888,342,543 1,085,595,864 3,126,610,830 2,884,696,648 2,547,423,855
## 120 797,161,654 951,050,012 2,786,839,570 2,519,756,576 2,161,615,554
## 121 705,146,922 859,446,404 2,385,332,743 2,292,331,108 1,717,787,820
## X.10 X.11
## 5 5,806,382 19,076
## 6 5,756,419 18,636
## 7 5,707,941 18,219
## 8 5,666,753 17,671
## 9 5,717,302 17,236
## 10 5,749,797 17,509
## 11 5,911,663 18,469
## 12 6,031,344 18,311
## 13 6,004,056 18,071
## 14 5,966,069 17,477
## 15 5,868,737 17,047
## 16 5,750,201 16,926
## 17 5,680,914 16,845
## 18 5,640,407 17,367
## 19 5,635,391 17,153
## 20 5,591,003 16,740
## 21 5,562,799 16,378
## 22 5,525,839 16,079
## 23 5,462,431 15,616
## 24 5,353,624 15,444
## 25 5,261,967 14,997
## 26 5,179,013 14,629
## 27 5,081,234 14,122
## 28 5,037,048 13,977
## 29 5,059,772 14,023
## 30 5,007,442 13,873
## 31 4,941,821 12,824
## 33 6,306,975 1,256,109
## 34 6,264,394 1,223,959
## 35 6,232,434 1,199,374
## 36 6,162,058 1,191,985
## 37 6,220,206 1,176,422
## 38 6,253,019 1,180,446
## 39 6,414,356 1,186,813
## 40 6,546,223 1,158,795
## 41 6,472,647 1,128,513
## 42 6,420,532 1,079,170
## 43 6,331,242 1,056,482
## 44 6,222,091 1,032,654
## 45 6,172,809 1,027,961
## 46 6,079,993 1,015,309
## 47 6,080,050 989,998
## 48 6,048,129 960,315
## 49 6,030,325 911,497
## 50 6,017,638 877,231
## 51 5,892,934 845,542
## 52 5,798,936 813,785
## 53 5,724,681 784,384
## 54 5,654,835 746,398
## 55 5,571,896 747,404
## 56 5,457,366 743,493
## 57 5,447,605 727,954
## 58 5,402,086 704,836
## 59 5,343,026 673,341
## 61 57,894,592 63,175,352
## 62 56,823,377 61,442,876
## 63 56,062,893 59,875,575
## 64 54,998,312 58,427,653
## 65 54,996,680 56,973,415
## 66 56,281,503 58,228,123
## 67 59,693,991 61,209,560
## 68 59,866,924 60,737,341
## 69 60,223,740 59,693,425
## 70 58,644,585 57,672,418
## 71 58,597,452 56,477,472
## 72 57,447,570 55,950,473
## 73 56,366,292 56,034,362
## 74 57,383,449 57,677,735
## 75 57,124,044 56,940,932
## 76 55,729,092 54,976,569
## 77 55,064,409 53,053,322
## 78 54,545,370 50,753,753
## 79 53,174,502 49,012,795
## 80 52,652,510 47,662,436
## 81 51,007,688 45,713,906
## 82 50,316,063 44,457,850
## 83 49,200,841 43,624,956
## 84 49,002,613 43,304,946
## 85 50,166,797 43,302,478
## 86 49,353,860 42,272,234
## 87 47,914,723 39,929,580
## 89 2,439,286,900 3,500,900,011
## 90 2,318,163,431 3,303,533,894
## 91 2,267,535,881 3,146,720,114
## 92 2,169,353,973 2,995,543,932
## 93 2,106,533,020 2,834,450,349
## 94 2,084,818,973 2,770,726,266
## 95 2,229,168,199 2,901,340,980
## 96 2,204,837,721 2,821,940,511
## 97 2,128,793,097 2,663,636,814
## 98 2,012,581,741 2,470,140,740
## 99 1,917,364,605 2,336,631,127
## 100 1,818,493,862 2,222,394,979
## 101 1,777,049,574 2,166,130,032
## 102 1,767,546,642 2,221,539,681
## 103 1,727,114,941 2,152,315,111
## 104 1,601,129,388 1,953,563,521
## 105 1,512,769,153 1,796,636,380
## 106 1,416,200,011 1,631,707,458
## 107 1,330,258,327 1,518,364,722
## 108 1,252,135,244 1,413,786,580
## 109 1,176,418,685 1,311,541,042
## 110 1,116,443,440 1,246,764,666
## 111 1,066,948,306 1,205,444,102
## 112 1,013,014,303 1,132,001,548
## 113 1,007,156,385 1,096,814,794
## 114 954,137,110 1,035,804,444
## 115 902,566,839 956,085,308
## 117 11,816,839,673 20,820,970,304
## 118 11,380,080,684 18,366,661,220
## 119 8,558,731,333 13,503,796,863
## 120 7,468,211,700 10,774,420,987
## 121 6,395,451,671 7,209,731,839
Seperate the 4 tables consolidated together.
Firms <- data.frame(B2[1:27,2:13])
Establishment <- data.frame(B2[28:54,2:13])
Employment <- data.frame(B2[55:81,2:13])
Payroll <- data.frame(B2[82:108,2:13])
Column names for four tables
colnames(Firms) <- c("Year", "Non-employers", "Employer Totals", "0*", "Less than 4" ,"5 to 9", "10 to 19" ,"Less than 20", "20 to 99", "100 to 499", "Less than 500", "over 500")
colnames(Establishment) <- c("Year", "Non-employers", "Employer Totals", "0*", "Less than 4" ,"5 to 9", "10 to 19" ,"Less than 20", "20 to 99", "100 to 499", "Less than 500", "over 500")
colnames(Employment) <- c("Year", "Non-employers", "Employer Totals", "0*", "Less than 4" ,"5 to 9", "10 to 19" ,"Less than 20", "20 to 99", "100 to 499", "Less than 500", "over 500")
colnames(Payroll) <- c("Year", "Non-employers", "Employer Totals", "0*", "Less than 4" ,"5 to 9", "10 to 19" ,"Less than 20", "20 to 99", "100 to 499", "Less than 500", "over 500")
Change the order of the datasets.
Firms <- arrange(Firms, -desc(Year))
Establishment <- arrange(Establishment, -desc(Year))
Employment <- arrange(Employment, -desc(Year))
Payroll <- arrange(Payroll, -desc(Year))
head(Firms)
## Year Non-employers Employer Totals 0* Less than 4 5 to 9 10 to 19
## 1 1988 N.A. 4,954,645 N.A. 2,979,905 923,580 540,988
## 2 1989 N.A. 5,021,315 N.A. 3,003,224 937,202 553,449
## 3 1990 N.A. 5,073,795 N.A. 3,020,935 952,030 562,610
## 4 1991 N.A. 5,051,025 N.A. 3,036,304 941,296 551,299
## 5 1992 14,325,000 5,095,356 644,453 3,075,280 945,802 551,912
## 6 1993 N.A. 5,193,642 671,306 3,139,518 962,481 559,602
## Less than 20 20 to 99 100 to 499 Less than 500 over 500
## 1 4,444,473 430,640 66,708 4,941,821 12,824
## 2 4,493,875 443,959 69,608 5,007,442 13,873
## 3 4,535,575 453,732 70,465 5,059,772 14,023
## 4 4,528,899 439,811 68,338 5,037,048 13,977
## 5 4,572,994 439,084 69,156 5,081,234 14,122
## 6 4,661,601 445,900 71,512 5,179,013 14,629
head(Establishment)
## Year Non-employers Employer Totals 0* Less than 4 5 to 9 10 to 19
## 1 1988 N.A. 6,016,367 N.A. 2,989,964 943,442 583,301
## 2 1989 N.A. 6,106,922 N.A. 3,014,009 956,347 592,901
## 3 1990 N.A. 6,175,559 N.A. 3,032,253 970,580 599,529
## 4 1991 N.A. 6,200,859 N.A. 3,048,830 961,391 593,302
## 5 1992 14,325,000 6,319,300 646,065 3,082,325 964,863 606,276
## 6 1993 N.A. 6,401,233 673,408 3,147,991 980,865 608,922
## Less than 20 20 to 99 100 to 499 Less than 500 over 500
## 1 4,516,707 581,622 244,697 5,343,026 673,341
## 2 4,563,257 586,494 252,335 5,402,086 704,836
## 3 4,602,362 590,496 254,747 5,447,605 727,954
## 4 4,603,523 593,248 260,595 5,457,366 743,493
## 5 4,653,464 634,713 283,719 5,571,896 747,404
## 6 4,737,778 631,873 285,184 5,654,835 746,398
head(Employment)
## Year Non-employers Employer Totals 0* Less than 4 5 to 9 10 to 19
## 1 1988 0 87,844,303 0 5,006,203 6,060,724 7,252,715
## 2 1989 0 91,626,094 0 5,054,429 6,152,151 7,420,196
## 3 1990 0 93,469,275 0 5,116,914 6,251,632 7,543,360
## 4 1991 0 92,307,559 0 5,151,143 6,174,730 7,386,939
## 5 1992 0 92,825,797 0 5,178,909 6,202,861 7,390,874
## 6 1993 0 94,773,913 0 5,258,195 6,313,651 7,498,345
## Less than 20 20 to 99 100 to 499 Less than 500 over 500
## 1 18,319,642 16,833,702 12,761,379 47,914,723 39,929,580
## 2 18,626,776 17,353,444 13,373,640 49,353,860 42,272,234
## 3 18,911,906 17,710,042 13,544,849 50,166,797 43,302,478
## 4 18,712,812 17,146,411 13,143,390 49,002,613 43,304,946
## 5 18,772,644 17,121,010 13,307,187 49,200,841 43,624,956
## 6 19,070,191 17,420,634 13,825,238 50,316,063 44,457,850
head(Payroll)
## Year Non-employers Employer Totals 0* Less than 4 5 to 9
## 1 1988 N.A. 1,858,652,147 N.A. 108,800,891 103,041,106
## 2 1989 N.A. 1,989,941,554 N.A. 112,462,139 108,002,714
## 3 1990 N.A. 2,103,971,179 N.A. 116,856,518 114,006,469
## 4 1991 N.A. 2,145,015,851 N.A. 118,233,813 116,794,212
## 5 1992 N.A. 2,272,392,408 21,432,778 124,592,441 122,381,613
## 6 1993 N.A. 2,363,208,106 22,361,727 128,968,107 127,133,193
## 10 to 19 Less than 20 20 to 99 100 to 499 Less than 500
## 1 130,326,463 342,168,460 315,751,201 244,647,178 902,566,839
## 2 136,794,734 357,259,587 332,733,188 264,144,335 954,137,110
## 3 144,450,673 375,313,660 352,390,861 279,451,864 1,007,156,385
## 4 146,516,583 381,544,608 352,032,797 279,436,898 1,013,014,303
## 5 152,830,640 399,804,694 368,969,129 298,174,483 1,066,948,306
## 6 159,153,336 415,254,636 385,005,072 316,183,732 1,116,443,440
## over 500
## 1 956,085,308
## 2 1,035,804,444
## 3 1,096,814,794
## 4 1,132,001,548
## 5 1,205,444,102
## 6 1,246,764,666
library(lubridate) # for working with dates
## Warning: package 'lubridate' was built under R version 3.4.2
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(ggplot2) # for creating graphs
library(scales) # to access breaks/formatting functions
## Warning: package 'scales' was built under R version 3.4.2
library(gridExtra) # for arranging plots
## Warning: package 'gridExtra' was built under R version 3.4.2
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
qplot(x=Firms$Year, y=Firms$`Less than 4`,
data=Firms, na.rm=TRUE,
main="Number of 0-4 Size Firms",
xlab="Year", ylab="Number of Firms")
ggplot(Firms, aes(Firms$Year, Firms$`Less than 4`)) +
geom_point(na.rm = TRUE, color = "red") +
ggtitle("Number of Small Businesses with Less than 4 Employers")+
xlab("Year") + ylab("Number of Businesses")
Less_than_4 <- ggplot()+
geom_point(data = Firms, aes(Firms$Year, Firms$`Less than 4`), color = "red") +
geom_point(data = Employment, aes(Employment$Year, Employment$`Less than 4`), color = "blue") +
geom_point(data = Establishment, aes(Establishment$Year, Establishment$`Less than 4`), color = "green")+
ggtitle("Number of Small Businesses with Less than 4 Employers")+
xlab("Year") + ylab("Number of Businesses")+
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
Less_than_4 + scale_y_discrete(breaks = c(0,1,5000,5500,6000,10000))
five_9 <- ggplot()+
geom_point(data = Firms, aes(Firms$Year, Firms$`5 to 9`), color = "red") +
geom_point(data = Employment, aes(Employment$Year, Employment$`5 to 9`), color = "blue") +
geom_point(data = Establishment, aes(Establishment$Year, Establishment$`5 to 9`), color = "green")+
geom_point(data = Payroll, aes(Payroll$Year, Payroll$`5 to 9`), color = "black")+
ggtitle("Number of Small Businesses with 5 to 9 Employers")+
xlab("Year") + ylab("Number of Businesses")+
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
five_9
Small_business <- bind_cols(Firms,Employment, Establishment)
str(Small_business)
## 'data.frame': 27 obs. of 36 variables:
## $ Year : Factor w/ 29 levels "","1988","1989",..: 2 3 4 5 6 7 8 9 10 11 ...
## $ Non-employers : Factor w/ 27 levels "","0","1,030,932,886",..: 26 26 26 26 4 26 26 26 26 5 ...
## $ Employer Totals : Factor w/ 116 levels "","1,858,652,147",..: 48 49 52 51 53 56 57 58 60 61 ...
## $ 0* : Factor w/ 53 levels "","0","0 *","110,778,665",..: 52 52 52 52 22 24 28 26 36 37 ...
## $ Less than 4 : Factor w/ 115 levels "","0-4 *","1,038,627,904",..: 23 36 38 40 42 44 46 48 50 52 ...
## $ 5 to 9 : Factor w/ 115 levels "","1,001,313",..: 92 93 97 94 96 100 102 107 113 5 ...
## $ 10 to 19 : Factor w/ 115 levels "","1,085,595,864",..: 33 36 38 34 35 37 39 40 42 47 ...
## $ Less than 20 : Factor w/ 115 levels "","<20","18,319,642",..: 40 41 44 43 46 50 51 53 56 57 ...
## $ 20 to 99 : Factor w/ 115 levels "","16,833,702",..: 42 46 49 45 44 47 48 51 53 55 ...
## $ 100 to 499 : Factor w/ 115 levels "","1,717,787,820",..: 81 87 89 84 86 91 92 96 95 97 ...
## $ Less than 500 : Factor w/ 115 levels "","<500","1,007,156,385",..: 30 35 37 36 38 39 40 42 46 47 ...
## $ over 500 : Factor w/ 115 levels "","1,015,309",..: 28 30 32 31 33 34 35 36 37 38 ...
## $ Year1 : Factor w/ 29 levels "","1988","1989",..: 2 3 4 5 6 7 8 9 10 11 ...
## $ Non-employers1 : Factor w/ 27 levels "","0","1,030,932,886",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ Employer Totals1: Factor w/ 116 levels "","1,858,652,147",..: 108 109 112 110 111 113 114 4 5 6 ...
## $ 0*1 : Factor w/ 53 levels "","0","0 *","110,778,665",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ Less than 41 : Factor w/ 115 levels "","0-4 *","1,038,627,904",..: 88 89 90 91 92 93 94 95 96 97 ...
## $ 5 to 91 : Factor w/ 115 levels "","1,001,313",..: 62 63 66 64 65 67 68 71 74 77 ...
## $ 10 to 191 : Factor w/ 115 levels "","1,085,595,864",..: 87 90 92 88 89 91 93 94 95 97 ...
## $ Less than 201 : Factor w/ 115 levels "","<20","18,319,642",..: 3 4 7 5 6 8 9 10 11 14 ...
## $ 20 to 991 : Factor w/ 115 levels "","16,833,702",..: 2 5 8 4 3 6 7 9 11 13 ...
## $ 100 to 4991 : Factor w/ 115 levels "","1,717,787,820",..: 4 7 8 5 6 9 10 12 11 13 ...
## $ Less than 5001 : Factor w/ 115 levels "","<500","1,007,156,385",..: 31 34 68 32 33 69 70 71 72 73 ...
## $ over 5001 : Factor w/ 115 levels "","1,015,309",..: 73 74 75 76 77 78 79 80 81 82 ...
## $ Year2 : Factor w/ 29 levels "","1988","1989",..: 2 3 4 5 6 7 8 9 10 11 ...
## $ Non-employers2 : Factor w/ 27 levels "","0","1,030,932,886",..: 26 26 26 26 4 26 26 26 26 5 ...
## $ Employer Totals2: Factor w/ 116 levels "","1,858,652,147",..: 79 82 83 84 85 86 87 88 89 90 ...
## $ 0*2 : Factor w/ 53 levels "","0","0 *","110,778,665",..: 52 52 52 52 23 25 29 27 38 39 ...
## $ Less than 42 : Factor w/ 115 levels "","0-4 *","1,038,627,904",..: 24 37 39 41 43 45 47 49 51 53 ...
## $ 5 to 92 : Factor w/ 115 levels "","1,001,313",..: 95 98 104 99 101 106 109 114 10 14 ...
## $ 10 to 192 : Factor w/ 115 levels "","1,085,595,864",..: 41 43 48 45 52 55 54 61 63 70 ...
## $ Less than 202 : Factor w/ 115 levels "","<20","18,319,642",..: 42 45 47 48 49 52 54 55 58 65 ...
## $ 20 to 992 : Factor w/ 115 levels "","16,833,702",..: 74 75 76 77 83 82 81 86 85 96 ...
## $ 100 to 4992 : Factor w/ 115 levels "","1,717,787,820",..: 34 35 36 37 42 45 43 44 41 50 ...
## $ Less than 5002 : Factor w/ 115 levels "","<500","1,007,156,385",..: 41 43 44 45 49 53 58 62 65 91 ...
## $ over 5002 : Factor w/ 115 levels "","1,015,309",..: 101 103 104 105 107 106 108 109 110 111 ...
L4 <- Small_business[,c(1,5,17,29)]
colnames(L4) <- c("Year","Firms","Employer","Establishments")
L4
## Year Firms Employer Establishments
## 1 1988 2,979,905 5,006,203 2,989,964
## 2 1989 3,003,224 5,054,429 3,014,009
## 3 1990 3,020,935 5,116,914 3,032,253
## 4 1991 3,036,304 5,151,143 3,048,830
## 5 1992 3,075,280 5,178,909 3,082,325
## 6 1993 3,139,518 5,258,195 3,147,991
## 7 1994 3,208,235 5,318,961 3,218,076
## 8 1995 3,249,573 5,395,432 3,259,795
## 9 1996 3,327,783 5,485,712 3,338,051
## 10 1997 3,358,048 5,546,306 3,364,434
## 11 1998 3,376,351 5,584,470 3,382,819
## 12 1999 3,389,161 5,606,302 3,397,778
## 13 2000 3,396,732 5,592,980 3,406,001
## 14 2001 3,401,676 5,630,017 3,409,596
## 15 2002 3,465,647 5,697,652 3,470,515
## 16 2003 3,504,432 5,768,407 3,510,352
## 17 2004 3,579,714 5,844,637 3,585,607
## 18 2005 3,677,879 5,936,859 3,684,047
## 19 2006 3,670,028 5,959,585 3,677,153
## 20 2007 3,705,275 6,139,463 3,710,700
## 21 2008 3,617,764 6,086,291 3,624,614
## 22 2009 3,558,708 5,966,190 3,565,433
## 23 2010 3,575,240 5,926,452 3,582,826
## 24 2011 3,532,058 5,857,662 3,540,155
## 25 2012 3,543,991 5,906,506 3,549,102
## 26 2013 3,575,290 5,926,660 3,580,637
## 27 2014 3,598,185 5,940,248 3,603,935
ggplot()+
geom_point(data = L4, aes(L4$Year, L4$Employer), color = "blue", col = "Employer") +
geom_point(data = L4, aes(L4$Year, L4$Establishment), color = "green")+
geom_point(data = L4, aes(L4$Year, L4$Firms), color = "red") +
ggtitle("Number of Small Businesses with Less than 4 Employers")+
xlab("Year") + ylab("Number of Businesses")+
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
## Warning: The plyr::rename operation has created duplicates for the
## following name(s): (`colour`)
ggplot(L4, aes(L4$Year, y= value , color = variable))+
geom_point(aes(y = L4$Employer, col = "Employers"))+
geom_point(aes(y = L4$Establishments, col = "Establishments"))+
geom_point(aes(y = L4$Firms, col = "Firms"))+
ggtitle("Number of Small Businesses with Less than 4 Employers")+
xlab("Year") + ylab("Number of Businesses")+
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
Load the data
collision <- read.csv(url("https://raw.githubusercontent.com/mikegankhuyag/607-Projects/master/Project%202/NYC_Collision.csv"), stringsAsFactors = FALSE)
head(collision)
## Motor.Vehicle.Collision.Report.Statistics.Citywide X
## 1 August 2017
## 2 Collisions
## 3 GeoCode GeoCodeLabel
## 4 C CITYWIDE
## 5 M MANHATTAN
## 6 001 1st Precinct
## X.1 X.2
## 1
## 2
## 3 Number_of_Motor_Vehicle_Collisions Vehicles_or_Motorists_Involved
## 4 18727 37086
## 5 3916 7571
## 6 288 561
## X.3 X.4 X.5
## 1
## 2
## 3 Injury_or_Fatal_Collisions MotoristsInjured MotoristsKilled
## 4 3701 2325 9
## 5 579 249 1
## 6 42 24 0
## X.6 X.7 X.8 X.9
## 1
## 2
## 3 PassengInjured PassengKilled CyclistsInjured CyclistsKilled
## 4 1648 2 477 1
## 5 205 0 130 0
## 6 12 0 6 0
## X.10 X.11 X.12
## 1
## 2
## 3 PedestrInjured PedestrKilled Bicycle
## 4 705 7 606
## 5 139 3 183
## 6 10 1 8
str(collision)
## 'data.frame': 94 obs. of 14 variables:
## $ Motor.Vehicle.Collision.Report.Statistics.Citywide: chr "August 2017" "Collisions" "GeoCode" "C" ...
## $ X : chr "" "" "GeoCodeLabel" "CITYWIDE" ...
## $ X.1 : chr "" "" "Number_of_Motor_Vehicle_Collisions" "18727" ...
## $ X.2 : chr "" "" "Vehicles_or_Motorists_Involved" "37086" ...
## $ X.3 : chr "" "" "Injury_or_Fatal_Collisions" "3701" ...
## $ X.4 : chr "" "" "MotoristsInjured" "2325" ...
## $ X.5 : chr "" "" "MotoristsKilled" "9" ...
## $ X.6 : chr "" "" "PassengInjured" "1648" ...
## $ X.7 : chr "" "" "PassengKilled" "2" ...
## $ X.8 : chr "" "" "CyclistsInjured" "477" ...
## $ X.9 : chr "" "" "CyclistsKilled" "1" ...
## $ X.10 : chr "" "" "PedestrInjured" "705" ...
## $ X.11 : chr "" "" "PedestrKilled" "7" ...
## $ X.12 : chr "" "" "Bicycle" "606" ...
I currently live in Manhattan, so I am most interested in typeo of collisions in Manhattan.
collision2 <- data.frame(collision[6:27,])
collision2
## Motor.Vehicle.Collision.Report.Statistics.Citywide
## 6 001
## 7 005
## 8 006
## 9 007
## 10 009
## 11 010
## 12 013
## 13 014
## 14 017
## 15 018
## 16 019
## 17 020
## 18 022
## 19 023
## 20 024
## 21 025
## 22 026
## 23 028
## 24 030
## 25 032
## 26 033
## 27 034
## X X.1 X.2 X.3 X.4 X.5 X.6 X.7 X.8 X.9 X.10 X.11
## 6 1st Precinct 288 561 42 24 0 12 0 6 0 10 1
## 7 5th Precinct 203 376 40 12 0 14 0 13 0 9 0
## 8 6th Precinct 130 249 18 6 0 2 0 7 0 3 0
## 9 7th Precinct 102 195 22 9 0 8 0 6 0 6 0
## 10 9th Precinct 126 241 21 9 0 6 0 6 0 5 0
## 11 10th Precinct 246 479 20 7 0 7 0 6 0 3 1
## 12 13th Precinct 220 414 47 16 0 11 0 13 0 16 0
## 13 Midtown South Precinct 323 630 28 9 0 2 0 9 0 9 0
## 14 17th Precinct 282 544 54 27 0 25 0 10 0 10 0
## 15 Midtown North Precinct 333 636 34 11 0 10 0 10 0 8 0
## 16 19th Precinct 405 792 59 32 0 11 0 11 0 14 0
## 17 20th Precinct 134 256 16 10 0 1 0 4 0 5 0
## 18 Central Park Precinct 5 6 1 0 0 0 0 0 0 1 0
## 19 23rd Precinct 135 259 12 7 0 7 0 1 0 2 0
## 20 24th Precinct 109 209 21 10 1 10 0 4 0 5 0
## 21 25th Precinct 219 432 45 24 0 32 0 5 0 8 1
## 22 26th Precinct 76 148 10 2 0 1 0 5 0 2 0
## 23 28th Precinct 85 161 20 4 0 5 0 5 0 7 0
## 24 30th Precinct 79 157 7 5 0 8 0 0 0 1 0
## 25 32nd Precinct 133 252 24 8 0 10 0 4 0 5 0
## 26 33rd Precinct 136 277 20 11 0 18 0 2 0 2 0
## 27 34th Precinct 147 297 18 6 0 5 0 3 0 8 0
## X.12
## 6 8
## 7 17
## 8 8
## 9 6
## 10 8
## 11 8
## 12 16
## 13 10
## 14 11
## 15 19
## 16 18
## 17 6
## 18 2
## 19 10
## 20 6
## 21 5
## 22 5
## 23 6
## 24 1
## 25 7
## 26 3
## 27 3
manhattan_collision <- t(data.frame(collision2[,2:14]))
colnames(manhattan_collision) <- manhattan_collision[1,]
manhattan_collision2 <- data.frame(manhattan_collision[2:13,])
manhattan_collision2
## X1st.Precinct X5th.Precinct X6th.Precinct X7th.Precinct X9th.Precinct
## X.1 288 203 130 102 126
## X.2 561 376 249 195 241
## X.3 42 40 18 22 21
## X.4 24 12 6 9 9
## X.5 0 0 0 0 0
## X.6 12 14 2 8 6
## X.7 0 0 0 0 0
## X.8 6 13 7 6 6
## X.9 0 0 0 0 0
## X.10 10 9 3 6 5
## X.11 1 0 0 0 0
## X.12 8 17 8 6 8
## X10th.Precinct X13th.Precinct Midtown.South.Precinct X17th.Precinct
## X.1 246 220 323 282
## X.2 479 414 630 544
## X.3 20 47 28 54
## X.4 7 16 9 27
## X.5 0 0 0 0
## X.6 7 11 2 25
## X.7 0 0 0 0
## X.8 6 13 9 10
## X.9 0 0 0 0
## X.10 3 16 9 10
## X.11 1 0 0 0
## X.12 8 16 10 11
## Midtown.North.Precinct X19th.Precinct X20th.Precinct
## X.1 333 405 134
## X.2 636 792 256
## X.3 34 59 16
## X.4 11 32 10
## X.5 0 0 0
## X.6 10 11 1
## X.7 0 0 0
## X.8 10 11 4
## X.9 0 0 0
## X.10 8 14 5
## X.11 0 0 0
## X.12 19 18 6
## Central.Park.Precinct X23rd.Precinct X24th.Precinct X25th.Precinct
## X.1 5 135 109 219
## X.2 6 259 209 432
## X.3 1 12 21 45
## X.4 0 7 10 24
## X.5 0 0 1 0
## X.6 0 7 10 32
## X.7 0 0 0 0
## X.8 0 1 4 5
## X.9 0 0 0 0
## X.10 1 2 5 8
## X.11 0 0 0 1
## X.12 2 10 6 5
## X26th.Precinct X28th.Precinct X30th.Precinct X32nd.Precinct
## X.1 76 85 79 133
## X.2 148 161 157 252
## X.3 10 20 7 24
## X.4 2 4 5 8
## X.5 0 0 0 0
## X.6 1 5 8 10
## X.7 0 0 0 0
## X.8 5 5 0 4
## X.9 0 0 0 0
## X.10 2 7 1 5
## X.11 0 0 0 0
## X.12 5 6 1 7
## X33rd.Precinct X34th.Precinct
## X.1 136 147
## X.2 277 297
## X.3 20 18
## X.4 11 6
## X.5 0 0
## X.6 18 5
## X.7 0 0
## X.8 2 3
## X.9 0 0
## X.10 2 8
## X.11 0 0
## X.12 3 3
Change the data to numeric values
manhattan_collision2$X1st.Precinct <-as.numeric(as.character( manhattan_collision2$X1st.Precinct))
manhattan_collision2$X5th.Precinct <-as.numeric(as.character( manhattan_collision2$X5th.Precinct))
manhattan_collision2$X6th.Precinct <-as.numeric(as.character( manhattan_collision2$X6th.Precinct))
manhattan_collision2$X7th.Precinct <-as.numeric(as.character( manhattan_collision2$X7th.Precinct))
manhattan_collision2$X9th.Precinct <-as.numeric(as.character( manhattan_collision2$X9th.Precinct))
manhattan_collision2$X10th.Precinct <-as.numeric(as.character( manhattan_collision2$X10th.Precinct))
manhattan_collision2$X13th.Precinct <-as.numeric(as.character( manhattan_collision2$X13th.Precinct))
manhattan_collision2$Midtown.South.Precinct <-as.numeric(as.character( manhattan_collision2$Midtown.South.Precinct))
manhattan_collision2$X17th.Precinct <-as.numeric(as.character( manhattan_collision2$X17th.Precinct))
manhattan_collision2$Midtown.North.Precinct <-as.numeric(as.character( manhattan_collision2$Midtown.North.Precinct))
manhattan_collision2$X19th.Precinct <-as.numeric(as.character( manhattan_collision2$X19th.Precinct))
manhattan_collision2$X20th.Precinct <-as.numeric(as.character( manhattan_collision2$X20th.Precinct))
manhattan_collision2$Central.Park.Precinct <-as.numeric(as.character( manhattan_collision2$Central.Park.Precinct))
manhattan_collision2$X23rd.Precinct <-as.numeric(as.character( manhattan_collision2$X23rd.Precinct))
manhattan_collision2$X25th.Precinct <-as.numeric(as.character( manhattan_collision2$X25th.Precinct))
manhattan_collision2$X24th.Precinct <-as.numeric(as.character( manhattan_collision2$X24th.Precinct))
manhattan_collision2$X26th.Precinct <-as.numeric(as.character( manhattan_collision2$X26th.Precinct))
manhattan_collision2$X28th.Precinct <-as.numeric(as.character( manhattan_collision2$X28th.Precinct))
manhattan_collision2$X30th.Precinct <-as.numeric(as.character( manhattan_collision2$X30th.Precinct))
manhattan_collision2$X32nd.Precinct <-as.numeric(as.character( manhattan_collision2$X32nd.Precinct))
manhattan_collision2$X34th.Precinct <-as.numeric(as.character( manhattan_collision2$X34th.Precinct))
manhattan_collision2$X33rd.Precinct <-as.numeric(as.character( manhattan_collision2$X33rd.Precinct))
manhattan_collision2
## X1st.Precinct X5th.Precinct X6th.Precinct X7th.Precinct X9th.Precinct
## X.1 288 203 130 102 126
## X.2 561 376 249 195 241
## X.3 42 40 18 22 21
## X.4 24 12 6 9 9
## X.5 0 0 0 0 0
## X.6 12 14 2 8 6
## X.7 0 0 0 0 0
## X.8 6 13 7 6 6
## X.9 0 0 0 0 0
## X.10 10 9 3 6 5
## X.11 1 0 0 0 0
## X.12 8 17 8 6 8
## X10th.Precinct X13th.Precinct Midtown.South.Precinct X17th.Precinct
## X.1 246 220 323 282
## X.2 479 414 630 544
## X.3 20 47 28 54
## X.4 7 16 9 27
## X.5 0 0 0 0
## X.6 7 11 2 25
## X.7 0 0 0 0
## X.8 6 13 9 10
## X.9 0 0 0 0
## X.10 3 16 9 10
## X.11 1 0 0 0
## X.12 8 16 10 11
## Midtown.North.Precinct X19th.Precinct X20th.Precinct
## X.1 333 405 134
## X.2 636 792 256
## X.3 34 59 16
## X.4 11 32 10
## X.5 0 0 0
## X.6 10 11 1
## X.7 0 0 0
## X.8 10 11 4
## X.9 0 0 0
## X.10 8 14 5
## X.11 0 0 0
## X.12 19 18 6
## Central.Park.Precinct X23rd.Precinct X24th.Precinct X25th.Precinct
## X.1 5 135 109 219
## X.2 6 259 209 432
## X.3 1 12 21 45
## X.4 0 7 10 24
## X.5 0 0 1 0
## X.6 0 7 10 32
## X.7 0 0 0 0
## X.8 0 1 4 5
## X.9 0 0 0 0
## X.10 1 2 5 8
## X.11 0 0 0 1
## X.12 2 10 6 5
## X26th.Precinct X28th.Precinct X30th.Precinct X32nd.Precinct
## X.1 76 85 79 133
## X.2 148 161 157 252
## X.3 10 20 7 24
## X.4 2 4 5 8
## X.5 0 0 0 0
## X.6 1 5 8 10
## X.7 0 0 0 0
## X.8 5 5 0 4
## X.9 0 0 0 0
## X.10 2 7 1 5
## X.11 0 0 0 0
## X.12 5 6 1 7
## X33rd.Precinct X34th.Precinct
## X.1 136 147
## X.2 277 297
## X.3 20 18
## X.4 11 6
## X.5 0 0
## X.6 18 5
## X.7 0 0
## X.8 2 3
## X.9 0 0
## X.10 2 8
## X.11 0 0
## X.12 3 3
Find the sum and the percentages
manhattan_collision2$Total <- rowSums(manhattan_collision2[2:22])
Total_incidents <- sum(manhattan_collision2$Total)
manhattan_collision2$Percent <- (manhattan_collision2$Total/Total_incidents)
manhattan_collision2$Percent <- round(manhattan_collision2$Percent*100,digits = 2)
manhattan_collision2
## X1st.Precinct X5th.Precinct X6th.Precinct X7th.Precinct X9th.Precinct
## X.1 288 203 130 102 126
## X.2 561 376 249 195 241
## X.3 42 40 18 22 21
## X.4 24 12 6 9 9
## X.5 0 0 0 0 0
## X.6 12 14 2 8 6
## X.7 0 0 0 0 0
## X.8 6 13 7 6 6
## X.9 0 0 0 0 0
## X.10 10 9 3 6 5
## X.11 1 0 0 0 0
## X.12 8 17 8 6 8
## X10th.Precinct X13th.Precinct Midtown.South.Precinct X17th.Precinct
## X.1 246 220 323 282
## X.2 479 414 630 544
## X.3 20 47 28 54
## X.4 7 16 9 27
## X.5 0 0 0 0
## X.6 7 11 2 25
## X.7 0 0 0 0
## X.8 6 13 9 10
## X.9 0 0 0 0
## X.10 3 16 9 10
## X.11 1 0 0 0
## X.12 8 16 10 11
## Midtown.North.Precinct X19th.Precinct X20th.Precinct
## X.1 333 405 134
## X.2 636 792 256
## X.3 34 59 16
## X.4 11 32 10
## X.5 0 0 0
## X.6 10 11 1
## X.7 0 0 0
## X.8 10 11 4
## X.9 0 0 0
## X.10 8 14 5
## X.11 0 0 0
## X.12 19 18 6
## Central.Park.Precinct X23rd.Precinct X24th.Precinct X25th.Precinct
## X.1 5 135 109 219
## X.2 6 259 209 432
## X.3 1 12 21 45
## X.4 0 7 10 24
## X.5 0 0 1 0
## X.6 0 7 10 32
## X.7 0 0 0 0
## X.8 0 1 4 5
## X.9 0 0 0 0
## X.10 1 2 5 8
## X.11 0 0 0 1
## X.12 2 10 6 5
## X26th.Precinct X28th.Precinct X30th.Precinct X32nd.Precinct
## X.1 76 85 79 133
## X.2 148 161 157 252
## X.3 10 20 7 24
## X.4 2 4 5 8
## X.5 0 0 0 0
## X.6 1 5 8 10
## X.7 0 0 0 0
## X.8 5 5 0 4
## X.9 0 0 0 0
## X.10 2 7 1 5
## X.11 0 0 0 0
## X.12 5 6 1 7
## X33rd.Precinct X34th.Precinct Total Percent
## X.1 136 147 3628 30.17
## X.2 277 297 7010 58.30
## X.3 20 18 537 4.47
## X.4 11 6 225 1.87
## X.5 0 0 1 0.01
## X.6 18 5 193 1.61
## X.7 0 0 0 0.00
## X.8 2 3 124 1.03
## X.9 0 0 0 0.00
## X.10 2 8 129 1.07
## X.11 0 0 2 0.02
## X.12 3 3 175 1.46
Add a column for the type of collisions.
manhattan_collision2$Type <- c("Number_of_Motor_Vehicle_Collisions","Vehicles_or_Motorists_Involved","Injury_or_Fatal_Collisions","MotoristsInjured", "MotoristsKilled","PassengInjured","PassengKilled","CyclistsInjured","CyclistsKilled","PedestrInjured","PedestrKilled","Bicycle")
manhattan_collision3 <- manhattan_collision2 %>% select(Type,everything())
manhattan_collision3
## Type X1st.Precinct X5th.Precinct
## X.1 Number_of_Motor_Vehicle_Collisions 288 203
## X.2 Vehicles_or_Motorists_Involved 561 376
## X.3 Injury_or_Fatal_Collisions 42 40
## X.4 MotoristsInjured 24 12
## X.5 MotoristsKilled 0 0
## X.6 PassengInjured 12 14
## X.7 PassengKilled 0 0
## X.8 CyclistsInjured 6 13
## X.9 CyclistsKilled 0 0
## X.10 PedestrInjured 10 9
## X.11 PedestrKilled 1 0
## X.12 Bicycle 8 17
## X6th.Precinct X7th.Precinct X9th.Precinct X10th.Precinct
## X.1 130 102 126 246
## X.2 249 195 241 479
## X.3 18 22 21 20
## X.4 6 9 9 7
## X.5 0 0 0 0
## X.6 2 8 6 7
## X.7 0 0 0 0
## X.8 7 6 6 6
## X.9 0 0 0 0
## X.10 3 6 5 3
## X.11 0 0 0 1
## X.12 8 6 8 8
## X13th.Precinct Midtown.South.Precinct X17th.Precinct
## X.1 220 323 282
## X.2 414 630 544
## X.3 47 28 54
## X.4 16 9 27
## X.5 0 0 0
## X.6 11 2 25
## X.7 0 0 0
## X.8 13 9 10
## X.9 0 0 0
## X.10 16 9 10
## X.11 0 0 0
## X.12 16 10 11
## Midtown.North.Precinct X19th.Precinct X20th.Precinct
## X.1 333 405 134
## X.2 636 792 256
## X.3 34 59 16
## X.4 11 32 10
## X.5 0 0 0
## X.6 10 11 1
## X.7 0 0 0
## X.8 10 11 4
## X.9 0 0 0
## X.10 8 14 5
## X.11 0 0 0
## X.12 19 18 6
## Central.Park.Precinct X23rd.Precinct X24th.Precinct X25th.Precinct
## X.1 5 135 109 219
## X.2 6 259 209 432
## X.3 1 12 21 45
## X.4 0 7 10 24
## X.5 0 0 1 0
## X.6 0 7 10 32
## X.7 0 0 0 0
## X.8 0 1 4 5
## X.9 0 0 0 0
## X.10 1 2 5 8
## X.11 0 0 0 1
## X.12 2 10 6 5
## X26th.Precinct X28th.Precinct X30th.Precinct X32nd.Precinct
## X.1 76 85 79 133
## X.2 148 161 157 252
## X.3 10 20 7 24
## X.4 2 4 5 8
## X.5 0 0 0 0
## X.6 1 5 8 10
## X.7 0 0 0 0
## X.8 5 5 0 4
## X.9 0 0 0 0
## X.10 2 7 1 5
## X.11 0 0 0 0
## X.12 5 6 1 7
## X33rd.Precinct X34th.Precinct Total Percent
## X.1 136 147 3628 30.17
## X.2 277 297 7010 58.30
## X.3 20 18 537 4.47
## X.4 11 6 225 1.87
## X.5 0 0 1 0.01
## X.6 18 5 193 1.61
## X.7 0 0 0 0.00
## X.8 2 3 124 1.03
## X.9 0 0 0 0.00
## X.10 2 8 129 1.07
## X.11 0 0 2 0.02
## X.12 3 3 175 1.46
Observe the data
ggplot(manhattan_collision2, aes(x=Type, y=Total, fill=Type)) +
geom_bar(width = 1, stat = "identity")+
ggtitle("Manhattan Collisions in August")+
xlab("Type") + ylab("Amount")+
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
ggplot(manhattan_collision2, aes(x="", y=manhattan_collision3$Percent, fill=Type )) +
geom_bar(width = 1, stat = "identity")+
ggtitle("Percentage of Manhattan Collisions in August")+
xlab("") + ylab("Amount")+
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))