library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1     v purrr   0.3.2
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   0.8.3     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts ------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)
library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

Principles of Data Visualization and Introduction to ggplot2

I have provided you with data about the 5,000 fastest growing companies in the US, as compiled by Inc. magazine. lets read this in:

inc <- read.csv("https://raw.githubusercontent.com/charleyferrari/CUNY_DATA_608/master/module1/Data/inc5000_data.csv", header= TRUE)

And lets preview this data:

head(inc)
##   Rank                         Name Growth_Rate   Revenue
## 1    1                         Fuhu      421.48 1.179e+08
## 2    2        FederalConference.com      248.31 4.960e+07
## 3    3                The HCI Group      245.45 2.550e+07
## 4    4                      Bridger      233.08 1.900e+09
## 5    5                       DataXu      213.37 8.700e+07
## 6    6 MileStone Community Builders      179.38 4.570e+07
##                       Industry Employees         City State
## 1 Consumer Products & Services       104   El Segundo    CA
## 2          Government Services        51     Dumfries    VA
## 3                       Health       132 Jacksonville    FL
## 4                       Energy        50      Addison    TX
## 5      Advertising & Marketing       220       Boston    MA
## 6                  Real Estate        63       Austin    TX
summary(inc)
##       Rank                          Name       Growth_Rate     
##  Min.   :   1   (Add)ventures         :   1   Min.   :  0.340  
##  1st Qu.:1252   @Properties           :   1   1st Qu.:  0.770  
##  Median :2502   1-Stop Translation USA:   1   Median :  1.420  
##  Mean   :2502   110 Consulting        :   1   Mean   :  4.612  
##  3rd Qu.:3751   11thStreetCoffee.com  :   1   3rd Qu.:  3.290  
##  Max.   :5000   123 Exteriors         :   1   Max.   :421.480  
##                 (Other)               :4995                    
##     Revenue                                  Industry      Employees      
##  Min.   :2.000e+06   IT Services                 : 733   Min.   :    1.0  
##  1st Qu.:5.100e+06   Business Products & Services: 482   1st Qu.:   25.0  
##  Median :1.090e+07   Advertising & Marketing     : 471   Median :   53.0  
##  Mean   :4.822e+07   Health                      : 355   Mean   :  232.7  
##  3rd Qu.:2.860e+07   Software                    : 342   3rd Qu.:  132.0  
##  Max.   :1.010e+10   Financial Services          : 260   Max.   :66803.0  
##                      (Other)                     :2358   NA's   :12       
##             City          State     
##  New York     : 160   CA     : 701  
##  Chicago      :  90   TX     : 387  
##  Austin       :  88   NY     : 311  
##  Houston      :  76   VA     : 283  
##  San Francisco:  75   FL     : 282  
##  Atlanta      :  74   IL     : 273  
##  (Other)      :4438   (Other):2764

Think a bit on what these summaries mean. Use the space below to add some more relevant non-visual exploratory information you think helps you understand this data:

glimpse(inc)
## Observations: 5,001
## Variables: 8
## $ Rank        <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,...
## $ Name        <fct> Fuhu, FederalConference.com, The HCI Group, Bridge...
## $ Growth_Rate <dbl> 421.48, 248.31, 245.45, 233.08, 213.37, 179.38, 17...
## $ Revenue     <dbl> 1.179e+08, 4.960e+07, 2.550e+07, 1.900e+09, 8.700e...
## $ Industry    <fct> Consumer Products & Services, Government Services,...
## $ Employees   <int> 104, 51, 132, 50, 220, 63, 27, 75, 97, 15, 149, 16...
## $ City        <fct> El Segundo, Dumfries, Jacksonville, Addison, Bosto...
## $ State       <fct> CA, VA, FL, TX, MA, TX, TN, CA, UT, RI, VA, CA, FL...

Above information helped me to clarify that the data set has 5000 company information based on 8 variables (Rank, Name, Growth_Rate, Revenue, Industry, Employees, City and State) as mentioned in introduction.

Top10_Rank <- inc %>% arrange(desc(Growth_Rate)) %>% head(10) %>% select(c(Rank, Name, Growth_Rate,Industry, State))
Top10_Rank
##    Rank                         Name Growth_Rate
## 1     1                         Fuhu      421.48
## 2     2        FederalConference.com      248.31
## 3     3                The HCI Group      245.45
## 4     4                      Bridger      233.08
## 5     5                       DataXu      213.37
## 6     6 MileStone Community Builders      179.38
## 7     7        Value Payment Systems      174.04
## 8     8         Emerge Digital Group      170.64
## 9     9                    Goal Zero      169.81
## 10   10                     Yagoozon      166.89
##                        Industry State
## 1  Consumer Products & Services    CA
## 2           Government Services    VA
## 3                        Health    FL
## 4                        Energy    TX
## 5       Advertising & Marketing    MA
## 6                   Real Estate    TX
## 7            Financial Services    TN
## 8       Advertising & Marketing    CA
## 9  Consumer Products & Services    UT
## 10                       Retail    RI

Above table gives us a glimpse of Top 10 companies with highest Growth rate.

inc %>%
  filter(State == "NY") %>%
  arrange(desc(Growth_Rate)) %>%
  select(Rank, Name, Growth_Rate, State)
##     Rank                                         Name Growth_Rate State
## 1     26                                 BeenVerified       84.43    NY
## 2     30                                     Sailthru       73.22    NY
## 3     37                                 YellowHammer       67.40    NY
## 4     38                                    Conductor       67.02    NY
## 5     48                    Cinium Financial Services       53.65    NY
## 6     70                                     33Across       44.99    NY
## 7     71                                   LiveIntent       44.85    NY
## 8    124                             Quantum Networks       29.44    NY
## 9    126                     Renegade Furniture Group       29.26    NY
## 10   153                                  Regal Wings       25.13    NY
## 11   174                                   Refinery29       23.01    NY
## 12  3424                          Stemp Systems Group       19.37    NY
## 13   219                           Novisync Solutions       19.28    NY
## 14   232                               Rethink Autism       18.27    NY
## 15   250                                Gravity Media       17.24    NY
## 16   264                                    MSR Promo       16.30    NY
## 17   267                                   NUE Agency       16.04    NY
## 18   275                          Technical Solutions       15.85    NY
## 19   280                                 RosettaBooks       15.49    NY
## 20   285                                          SET       15.28    NY
## 21   299                 Ultraflex Power Technologies       14.54    NY
## 22   309                                     Ivy Exec       14.01    NY
## 23   314                                      Panjiva       13.77    NY
## 24   388                   Odyssey Telecommunications       11.40    NY
## 25   400                               Forward Health       11.07    NY
## 26   418                                         Grok       10.77    NY
## 27   450                              Carrot Creative       10.24    NY
## 28   465                                Bustin Boards        9.92    NY
## 29   490                          MASS Communications        9.33    NY
## 30   497                                  RCS Capital        9.19    NY
## 31   502                                    Largetail        9.10    NY
## 32   515                             Integra Networks        8.83    NY
## 33   521                          Happy Family Brands        8.70    NY
## 34   522                         Rockhedge Herb Farms        8.70    NY
## 35   526                             Surface Magazine        8.63    NY
## 36   568                                LocalResponse        7.94    NY
## 37   609                             SmartWatt Energy        7.54    NY
## 38   632                             Armadale Capital        7.27    NY
## 39   656                           ForwardThink Group        7.01    NY
## 40   660                                       OnDeck        6.98    NY
## 41   661                         Quality In Real Time        6.94    NY
## 42   669                                   Touchstorm        6.84    NY
## 43   701                        Thrillist Media Group        6.52    NY
## 44   709                                 Driven Local        6.45    NY
## 45   714                              The Classy Home        6.43    NY
## 46   717                            Merchant Industry        6.42    NY
## 47   728                           Vector Media Group        6.30    NY
## 48   744                                  BulbAmerica        6.16    NY
## 49   750                                   Livestream        6.13    NY
## 50   752                                 Private Prep        6.12    NY
## 51   769                                Peeled Snacks        5.94    NY
## 52   788                               Digital Energy        5.79    NY
## 53   809                               The Ruby Group        5.57    NY
## 54   815                                  Fingerpaint        5.55    NY
## 55   818                          Salzinger & Company        5.53    NY
## 56   830                        Launchpad Advertising        5.40    NY
## 57   832                      In the Know Experiences        5.39    NY
## 58   854                                    Hipercept        5.23    NY
## 59   883                 Envisage Information Systems        5.04    NY
## 60   890                                     Payoneer        5.00    NY
## 61   896                                    Optimatic        4.97    NY
## 62   922                              Rebecca Minkoff        4.75    NY
## 63   935                                        Ioxus        4.67    NY
## 64   971                       Imagine Easy Solutions        4.45    NY
## 65  1010                             Resolute Digital        4.26    NY
## 66  1020                                BarkerGilmore        4.22    NY
## 67  1028                     Direct Transport Systems        4.15    NY
## 68  1036                                       Enstoa        4.11    NY
## 69  1048                                 Curbed Media        4.06    NY
## 70  1054                                        Tarte        4.03    NY
## 71  1067                          Dionysus Consulting        3.96    NY
## 72  1069                          Systems Made Simple        3.94    NY
## 73  1091                          Blue Fountain Media        3.85    NY
## 74  1095                          UpSurge Media Group        3.84    NY
## 75  1100                                 ArkNet Media        3.82    NY
## 76  1119                                     Adafruit        3.75    NY
## 77  1129                               Likeable Media        3.72    NY
## 78  1166                     Manhattan Strategy Group        3.60    NY
## 79  1181                                     Sub Rosa        3.56    NY
## 80  1190                                    Usablenet        3.52    NY
## 81  1203                                     InRhythm        3.47    NY
## 82  1210                                     Trepoint        3.43    NY
## 83  1214                           ALAC International        3.42    NY
## 84  1234                                     Berricle        3.34    NY
## 85  1259                              Decorplanet.com        3.25    NY
## 86  1293                                Madison Logic        3.14    NY
## 87  1371                                     Impelsys        2.94    NY
## 88  1382                            City Sounds of NY        2.92    NY
## 89  1423                                Media6Degrees        2.82    NY
## 90  1442                        Global Warranty Group        2.78    NY
## 91  1443                              Oliver Staffing        2.78    NY
## 92  1451                    Professional Data Systems        2.76    NY
## 93  1458                            Dynomighty Design        2.74    NY
## 94  1485                        Cipher Tech Solutions        2.69    NY
## 95  1499                         Sterling Infosystems        2.66    NY
## 96  1503                           AdCorp Media Group        2.65    NY
## 97  1540                                   Gaffos.com        2.58    NY
## 98  1559                                     ModiFace        2.55    NY
## 99  1565                                       Droga5        2.54    NY
## 100 1609                              Enter:Marketing        2.47    NY
## 101 1612                             Andiamo Partners        2.46    NY
## 102 1622                              LJM Consultants        2.43    NY
## 103 1640                                     BlueWolf        2.38    NY
## 104 1646     New York Technology Partners - Rochester        2.37    NY
## 105 1706                                          MKG        2.27    NY
## 106 1709                                  Tribeca Nik        2.27    NY
## 107 1717                               Society Awards        2.26    NY
## 108 1723                                 Spicer Group        2.25    NY
## 109 1755                     Deep Blue Communications        2.19    NY
## 110 1767                               Vcorp Services        2.17    NY
## 111 1876                                  Servomation        2.03    NY
## 112 1877                                 MaeTec Power        2.03    NY
## 113 1879                              Durante Rentals        2.03    NY
## 114 1895                                nfrastructure        2.00    NY
## 115 1920               Reliance Star Payment Services        1.97    NY
## 116 1954                                  CleanEdison        1.94    NY
## 117 1982                             Imagine Swimming        1.89    NY
## 118 1985                      U.S. Energy Development        1.89    NY
## 119 1986                         Medical Supply Depot        1.89    NY
## 120 2067                      Signature Systems Group        1.81    NY
## 121 2077                                  Sachin Babi        1.80    NY
## 122 2089                                  React2Media        1.78    NY
## 123 2116           Charter School Business Management        1.76    NY
## 124 2142                       Liberty Jet Management        1.73    NY
## 125 2173                                PM Pediatrics        1.70    NY
## 126 2179                            Christmas Central        1.69    NY
## 127 2198                                    KJT Group        1.68    NY
## 128 2209                                        inVNT        1.67    NY
## 129 2218                                 Globo Mobile        1.67    NY
## 130 2234                                  WorkCompEDI        1.66    NY
## 131 2280                          Celerant Technology        1.60    NY
## 132 2292                         IntraLogic Solutions        1.59    NY
## 133 2336                                   BountyJobs        1.56    NY
## 134 2352                              LDJ Productions        1.54    NY
## 135 2372                     DiGennaro Communications        1.52    NY
## 136 2399                                         Rain        1.51    NY
## 137 2405                                     Sullivan        1.50    NY
## 138 2432                             Bear Metal Works        1.48    NY
## 139 2436                           Shinetech Software        1.48    NY
## 140 2452                                  Cafe Grumpy        1.46    NY
## 141 2475                  Systems Management/Planning        1.45    NY
## 142 2494                                          RVM        1.42    NY
## 143 2503                      Marvin Traub Associates        1.42    NY
## 144 2506                                       QueBIT        1.42    NY
## 145 2526                         Soleo Communications        1.40    NY
## 146 2542                             The Fortus Group        1.39    NY
## 147 2549                  Central Park West Dentistry        1.39    NY
## 148 2556                 Precision Pipeline Solutions        1.39    NY
## 149 2561                                   1st Equity        1.38    NY
## 150 2562               Advantage Communications Group        1.38    NY
## 151 2585                              Primary Support        1.36    NY
## 152 2622                                    Leviathan        1.34    NY
## 153 2656                                     TekScape        1.32    NY
## 154 2666                    Adventure in Food Trading        1.32    NY
## 155 2675                         Cedar Petrochemcials        1.31    NY
## 156 2678                               Treo Solutions        1.31    NY
## 157 2702                                Ryan Printing        1.30    NY
## 158 2706                           Primary Wave Media        1.30    NY
## 159 2718                                 CreativeFeed        1.29    NY
## 160 2747                             Stage 2 Networks        1.27    NY
## 161 2751                                      IOMedia        1.27    NY
## 162 2764                           Weiss & Associates        1.26    NY
## 163 2784                                 SportsSignup        1.25    NY
## 164 2795                                      Onco360        1.24    NY
## 165 2799                  School Choice International        1.24    NY
## 166 2805                      LeadDog Marketing Group        1.23    NY
## 167 2828                                           D4        1.22    NY
## 168 2830                                 ReSource Pro        1.22    NY
## 169 2831                              Taylor Creative        1.22    NY
## 170 2877                                Myriad Supply        1.19    NY
## 171 2889                            Logistic Dynamics        1.18    NY
## 172 2896                                         Telx        1.18    NY
## 173 2924                                 WTP Advisors        1.16    NY
## 174 2964                                      DataArt        1.14    NY
## 175 2971                                     ConServe        1.14    NY
## 176 2981                             Flexible Systems        1.13    NY
## 177 2995                           Pride Technologies        1.13    NY
## 178 2997                           Fibertech Networks        1.13    NY
## 179 3009                            5Linx Enterprises        1.12    NY
## 180 3021                                       Linium        1.11    NY
## 181 3054                                DDS Companies        1.09    NY
## 182 3056                                   VHMnetwork        1.09    NY
## 183 3060                                    Nasty Pig        1.09    NY
## 184 3097                             Gates Automotive        1.07    NY
## 185 3103                                SmartSign.com        1.07    NY
## 186 3115                               MyOTCStore.com        1.06    NY
## 187 3117                                Team Epiphany        1.06    NY
## 188 3136                       FSO Onsite Outsourcing        1.05    NY
## 189 3146                              Verge Marketing        1.04    NY
## 190 3153                       Juice Pharma Worldwide        1.04    NY
## 191 3168                              Atrium Staffing        1.03    NY
## 192 3193                         Access Display Group        1.02    NY
## 193 3210                                Dance With Me        1.01    NY
## 194 3251                             Modus Associates        0.99    NY
## 195 3262                               Sriven Systems        0.98    NY
## 196 3289                                  Butler/Till        0.97    NY
## 197 3313                            MyDailyMoment.com        0.96    NY
## 198 3351                            Bespoke Education        0.94    NY
## 199 3355                              Home Clean Home        0.94    NY
## 200 3358                                     HR Works        0.93    NY
## 201 3383                             Graduationsource        0.92    NY
## 202 3387                              Mitchell/Martin        0.92    NY
## 203 3401                      Alphaserve Technologies        0.92    NY
## 204 3500                              Magnolia Bakery        0.88    NY
## 205 3532                              The Urban Group        0.86    NY
## 206 3535                                Tully Rinckey        0.86    NY
## 207 3542                                    SweetRiot        0.86    NY
## 208 3544                       Proactive Technologies        0.86    NY
## 209 3549                           Influence Graphics        0.86    NY
## 210 3584                                     Infusion        0.84    NY
## 211 3592                                Liberty Pumps        0.84    NY
## 212 3594                                    Post Beam        0.84    NY
## 213 3601                                Best Cleaners        0.84    NY
## 214 3603                              Gerstein Fisher        0.84    NY
## 215 3632                Integrated Systems Management        0.83    NY
## 216 3643                                    Docutrend        0.82    NY
## 217 3661 Environmental Products & Services of Vermont        0.81    NY
## 218 3663                            Liquid Technology        0.81    NY
## 219 3685                 Eminent Technology Solutions        0.80    NY
## 220 3704                Infinity Consulting Solutions        0.79    NY
## 221 3716                              LISS Consulting        0.79    NY
## 222 3764       American Portfolios Financial Services        0.77    NY
## 223 3787                              Optimax Systems        0.76    NY
## 224 3789                                 Xtreme Trips        0.76    NY
## 225 3864                                ClassBook.com        0.73    NY
## 226 3871                       eTransMedia Technology        0.72    NY
## 227 3891                                     Net@Work        0.72    NY
## 228 3899                    Denihan Hospitality Group        0.71    NY
## 229 3924              Finger Lakes Technologies Group        0.70    NY
## 230 3925                                         Sinu        0.70    NY
## 231 3943                                Gersh Academy        0.70    NY
## 232 4000                                Send Word Now        0.67    NY
## 233 4003                         Ovation Travel Group        0.67    NY
## 234 4007                       Spear Physical Therapy        0.67    NY
## 235 4008                                Nutricap Labs        0.67    NY
## 236 4039                                Empire Office        0.66    NY
## 237 4054                                         BUMI        0.65    NY
## 238 4117                                   NorthPoint        0.64    NY
## 239 4128                         Cvision Technologies        0.63    NY
## 240 4153                              Everyday Health        0.62    NY
## 241 4154           Business Communications Management        0.62    NY
## 242 4158                                     Edulence        0.62    NY
## 243 4162                                  JR Products        0.62    NY
## 244 4166                               Motivators.com        0.62    NY
## 245 4170               Creative Environment Solutions        0.62    NY
## 246 4224                                     Fragomen        0.60    NY
## 247 4245                Computer Resources of America        0.59    NY
## 248 4250                                     NiCoForm        0.59    NY
## 249 4287                                   LinguaLinx        0.57    NY
## 250 4307                                   ItsHot.com        0.57    NY
## 251 4335                           New York Kids Club        0.56    NY
## 252 4336                             Cyrus Innovation        0.56    NY
## 253 4352                                      Kitware        0.55    NY
## 254 4363                                 TransPerfect        0.55    NY
## 255 4399                                Direct Agents        0.54    NY
## 256 4422                            BPA International        0.53    NY
## 257 4432                                  C2C Outdoor        0.53    NY
## 258 4447                         Select Office Suites        0.52    NY
## 259 4448                           The Atlantic Group        0.52    NY
## 260 4465                                     OpenLink        0.52    NY
## 261 4467                               Blue Telescope        0.52    NY
## 262 4471                     KJ Technology Consulting        0.51    NY
## 263 4474                     Sam Schwartz Engineering        0.51    NY
## 264 4475                        Presentation Products        0.51    NY
## 265 4490                         Catseye Pest Control        0.51    NY
## 266 4522                             Geneva Worldwide        0.50    NY
## 267 4535                       Capital Access Network        0.49    NY
## 268 4544                 ePromos Promotional Products        0.49    NY
## 269 4552                                     Paradysz        0.49    NY
## 270 4559                                       Tabush        0.48    NY
## 271 4560             Sentry Communications & Security        0.48    NY
## 272 4565                                 EquipSystems        0.48    NY
## 273 4569                          Bogota Latin Bistro        0.48    NY
## 274 4577                   Sutherland Global Services        0.48    NY
## 275 4595                                      Numerix        0.48    NY
## 276 4602                              Brite Computers        0.47    NY
## 277 4605                                 QED National        0.47    NY
## 278 4611                                      Inergex        0.47    NY
## 279 4632                                  Transfinder        0.47    NY
## 280 4639  American Petroleum Equipment & Construction        0.46    NY
## 281 4645                                DatapointLabs        0.46    NY
## 282 4646                                    Mimeo.com        0.46    NY
## 283 4652                                   SaveAround        0.46    NY
## 284 4670                                      VarData        0.45    NY
## 285 4671                                    Webucator        0.45    NY
## 286 4702             Copier Fax Business Technologies        0.44    NY
## 287 4716                                Westcon Group        0.44    NY
## 288 4727                               Arrow Security        0.44    NY
## 289 4728                         DurAmerica Brokerage        0.43    NY
## 290 4731                                  PlayScripts        0.43    NY
## 291 4732                                      Mycroft        0.43    NY
## 292 4738                                  McMurry/TMG        0.43    NY
## 293 4747                                  TravelClick        0.43    NY
## 294 4755                              Greyston Bakery        0.42    NY
## 295 4774                              The Lloyd Group        0.42    NY
## 296 4782               Life Safety Engineered Systems        0.42    NY
## 297 4800                     Documentation Strategies        0.41    NY
## 298 4803                             Benefit Resource        0.41    NY
## 299 4820                                Aluf Plastics        0.40    NY
## 300 4839                                     Makovsky        0.40    NY
## 301 4849                         GTM Payroll Services        0.40    NY
## 302 4853                                       Affect        0.39    NY
## 303 4891                 Express Air Freight Unlmited        0.38    NY
## 304 4896              Rand Engineering & Architecture        0.38    NY
## 305 4913                                Jackson Lewis        0.37    NY
## 306 4934                               Buffalo Filter        0.37    NY
## 307 4936                                         Coty        0.36    NY
## 308 4944                              McElroy Deutsch        0.36    NY
## 309 4950             Colorspec Coatings International        0.36    NY
## 310 4954                          Premium Productions        0.36    NY
## 311 4981  SmartSource Computer & Audio Visual Rentals        0.35    NY

Above information shows us the fastest growing companies in NY. There are 311 companies.

by_state <- inc %>%
  group_by(State) %>%
  count() %>%
  arrange(desc(n)) %>%
  rename(total_companies = n)
by_state
## # A tibble: 52 x 2
## # Groups:   State [52]
##    State total_companies
##    <fct>           <int>
##  1 CA                701
##  2 TX                387
##  3 NY                311
##  4 VA                283
##  5 FL                282
##  6 IL                273
##  7 GA                212
##  8 OH                186
##  9 MA                182
## 10 PA                164
## # ... with 42 more rows

Above data shows us the number of fastest growing companies in each state. Based on the data we derived California has the highest number of fastest companies of all. Then comes Texas and New York.

Question 1

Create a graph that shows the distribution of companies in the dataset by State (ie how many are in each state). There are a lot of States, so consider which axis you should use. This visualization is ultimately going to be consumed on a ‘portrait’ oriented screen (ie taller than wide), which should further guide your layout choices.

graph<-ggplot(by_state, aes(x=State, y=total_companies)) + 
  geom_bar(mapping = aes(x=State, y=total_companies), stat = "identity", color = "black", fill = "blue")+
  coord_flip()

ggplotly(graph)

Quesiton 2

Lets dig in on the state with the 3rd most companies in the data set. Imagine you work for the state and are interested in how many people are employed by companies in different industries. Create a plot that shows the average and/or median employment by industry for companies in this state (only use cases with full data, use R’s complete.cases() function.) In addition to this, your graph should show how variable the ranges are, and you should deal with outliers.

Note: I have already find out from my previous part which State is in the third place. That is New York.

third_place <- inc%>%
  filter(State == "NY") %>%
  filter(complete.cases(.))%>%
  group_by(Industry)%>%
  summarise(Mean = mean(Employees), Median = median(Employees))%>%
  gather(statType, Employees, Mean, Median)

third_place
## # A tibble: 50 x 3
##    Industry                     statType Employees
##    <fct>                        <chr>        <dbl>
##  1 Advertising & Marketing      Mean          58.4
##  2 Business Products & Services Mean        1492. 
##  3 Computer Hardware            Mean          44  
##  4 Construction                 Mean          61  
##  5 Consumer Products & Services Mean         626. 
##  6 Education                    Mean          59.9
##  7 Energy                       Mean         129. 
##  8 Engineering                  Mean          53.5
##  9 Environmental Services       Mean         155  
## 10 Financial Services           Mean         144. 
## # ... with 40 more rows
q2_graph<-ggplot(data = third_place, aes(x=Industry, y=Employees)) + geom_bar(stat = "identity", aes(fill=statType), position = "Dodge")+
  coord_flip() +
  labs(title = "Total Employed by Industry in New York")

ggplotly(q2_graph)

Question 3

Now imagine you work for an investor and want to see which industries generate the most revenue per employee. Create a chart that makes this information clear. Once again, the distribution per industry should be shown.

by_industry<- inc%>%
  filter(complete.cases(.))%>%
  group_by(Industry)%>%
  summarize(Total_Rev = sum(Revenue), Total_Emp = sum(Employees), Rev_per_Emp = Total_Rev/Total_Emp)%>%
  arrange(desc(Rev_per_Emp))
  

by_industry
## # A tibble: 25 x 4
##    Industry                       Total_Rev Total_Emp Rev_per_Emp
##    <fct>                              <dbl>     <int>       <dbl>
##  1 Computer Hardware            11885700000      9714    1223564.
##  2 Energy                       13771600000     26437     520921.
##  3 Construction                 13174300000     29099     452741.
##  4 Logistics & Transportation   14837800000     39994     371001.
##  5 Consumer Products & Services 14956400000     45464     328972.
##  6 Insurance                     2337900000      7339     318558.
##  7 Manufacturing                12603600000     43942     286824.
##  8 Retail                       10257400000     37068     276718.
##  9 Financial Services           13150900000     47693     275741.
## 10 Environmental Services        2638800000     10155     259852.
## # ... with 15 more rows
q3_graph <- ggplot(data=by_industry, aes(x=reorder(Industry,Rev_per_Emp), y = Rev_per_Emp))+
  geom_bar(stat = "identity", color = "black", fill = "Blue") +
  labs(title = "Revenue per Employee by Industry", x = "Industry", y = "Revenue per Employee")+
  coord_flip()

ggplotly(q3_graph)

Note: Graph clearly shows that Computer Hardware Industry makes the most revenue per employee.