library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1 v purrr 0.3.2
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 0.8.3 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Principles of Data Visualization and Introduction to ggplot2
I have provided you with data about the 5,000 fastest growing companies in the US, as compiled by Inc. magazine. lets read this in:
inc <- read.csv("https://raw.githubusercontent.com/charleyferrari/CUNY_DATA_608/master/module1/Data/inc5000_data.csv", header= TRUE)
And lets preview this data:
head(inc)
## Rank Name Growth_Rate Revenue
## 1 1 Fuhu 421.48 1.179e+08
## 2 2 FederalConference.com 248.31 4.960e+07
## 3 3 The HCI Group 245.45 2.550e+07
## 4 4 Bridger 233.08 1.900e+09
## 5 5 DataXu 213.37 8.700e+07
## 6 6 MileStone Community Builders 179.38 4.570e+07
## Industry Employees City State
## 1 Consumer Products & Services 104 El Segundo CA
## 2 Government Services 51 Dumfries VA
## 3 Health 132 Jacksonville FL
## 4 Energy 50 Addison TX
## 5 Advertising & Marketing 220 Boston MA
## 6 Real Estate 63 Austin TX
summary(inc)
## Rank Name Growth_Rate
## Min. : 1 (Add)ventures : 1 Min. : 0.340
## 1st Qu.:1252 @Properties : 1 1st Qu.: 0.770
## Median :2502 1-Stop Translation USA: 1 Median : 1.420
## Mean :2502 110 Consulting : 1 Mean : 4.612
## 3rd Qu.:3751 11thStreetCoffee.com : 1 3rd Qu.: 3.290
## Max. :5000 123 Exteriors : 1 Max. :421.480
## (Other) :4995
## Revenue Industry Employees
## Min. :2.000e+06 IT Services : 733 Min. : 1.0
## 1st Qu.:5.100e+06 Business Products & Services: 482 1st Qu.: 25.0
## Median :1.090e+07 Advertising & Marketing : 471 Median : 53.0
## Mean :4.822e+07 Health : 355 Mean : 232.7
## 3rd Qu.:2.860e+07 Software : 342 3rd Qu.: 132.0
## Max. :1.010e+10 Financial Services : 260 Max. :66803.0
## (Other) :2358 NA's :12
## City State
## New York : 160 CA : 701
## Chicago : 90 TX : 387
## Austin : 88 NY : 311
## Houston : 76 VA : 283
## San Francisco: 75 FL : 282
## Atlanta : 74 IL : 273
## (Other) :4438 (Other):2764
Think a bit on what these summaries mean. Use the space below to add some more relevant non-visual exploratory information you think helps you understand this data:
glimpse(inc)
## Observations: 5,001
## Variables: 8
## $ Rank <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,...
## $ Name <fct> Fuhu, FederalConference.com, The HCI Group, Bridge...
## $ Growth_Rate <dbl> 421.48, 248.31, 245.45, 233.08, 213.37, 179.38, 17...
## $ Revenue <dbl> 1.179e+08, 4.960e+07, 2.550e+07, 1.900e+09, 8.700e...
## $ Industry <fct> Consumer Products & Services, Government Services,...
## $ Employees <int> 104, 51, 132, 50, 220, 63, 27, 75, 97, 15, 149, 16...
## $ City <fct> El Segundo, Dumfries, Jacksonville, Addison, Bosto...
## $ State <fct> CA, VA, FL, TX, MA, TX, TN, CA, UT, RI, VA, CA, FL...
Above information helped me to clarify that the data set has 5000 company information based on 8 variables (Rank, Name, Growth_Rate, Revenue, Industry, Employees, City and State) as mentioned in introduction.
Top10_Rank <- inc %>% arrange(desc(Growth_Rate)) %>% head(10) %>% select(c(Rank, Name, Growth_Rate,Industry, State))
Top10_Rank
## Rank Name Growth_Rate
## 1 1 Fuhu 421.48
## 2 2 FederalConference.com 248.31
## 3 3 The HCI Group 245.45
## 4 4 Bridger 233.08
## 5 5 DataXu 213.37
## 6 6 MileStone Community Builders 179.38
## 7 7 Value Payment Systems 174.04
## 8 8 Emerge Digital Group 170.64
## 9 9 Goal Zero 169.81
## 10 10 Yagoozon 166.89
## Industry State
## 1 Consumer Products & Services CA
## 2 Government Services VA
## 3 Health FL
## 4 Energy TX
## 5 Advertising & Marketing MA
## 6 Real Estate TX
## 7 Financial Services TN
## 8 Advertising & Marketing CA
## 9 Consumer Products & Services UT
## 10 Retail RI
Above table gives us a glimpse of Top 10 companies with highest Growth rate.
inc %>%
filter(State == "NY") %>%
arrange(desc(Growth_Rate)) %>%
select(Rank, Name, Growth_Rate, State)
## Rank Name Growth_Rate State
## 1 26 BeenVerified 84.43 NY
## 2 30 Sailthru 73.22 NY
## 3 37 YellowHammer 67.40 NY
## 4 38 Conductor 67.02 NY
## 5 48 Cinium Financial Services 53.65 NY
## 6 70 33Across 44.99 NY
## 7 71 LiveIntent 44.85 NY
## 8 124 Quantum Networks 29.44 NY
## 9 126 Renegade Furniture Group 29.26 NY
## 10 153 Regal Wings 25.13 NY
## 11 174 Refinery29 23.01 NY
## 12 3424 Stemp Systems Group 19.37 NY
## 13 219 Novisync Solutions 19.28 NY
## 14 232 Rethink Autism 18.27 NY
## 15 250 Gravity Media 17.24 NY
## 16 264 MSR Promo 16.30 NY
## 17 267 NUE Agency 16.04 NY
## 18 275 Technical Solutions 15.85 NY
## 19 280 RosettaBooks 15.49 NY
## 20 285 SET 15.28 NY
## 21 299 Ultraflex Power Technologies 14.54 NY
## 22 309 Ivy Exec 14.01 NY
## 23 314 Panjiva 13.77 NY
## 24 388 Odyssey Telecommunications 11.40 NY
## 25 400 Forward Health 11.07 NY
## 26 418 Grok 10.77 NY
## 27 450 Carrot Creative 10.24 NY
## 28 465 Bustin Boards 9.92 NY
## 29 490 MASS Communications 9.33 NY
## 30 497 RCS Capital 9.19 NY
## 31 502 Largetail 9.10 NY
## 32 515 Integra Networks 8.83 NY
## 33 521 Happy Family Brands 8.70 NY
## 34 522 Rockhedge Herb Farms 8.70 NY
## 35 526 Surface Magazine 8.63 NY
## 36 568 LocalResponse 7.94 NY
## 37 609 SmartWatt Energy 7.54 NY
## 38 632 Armadale Capital 7.27 NY
## 39 656 ForwardThink Group 7.01 NY
## 40 660 OnDeck 6.98 NY
## 41 661 Quality In Real Time 6.94 NY
## 42 669 Touchstorm 6.84 NY
## 43 701 Thrillist Media Group 6.52 NY
## 44 709 Driven Local 6.45 NY
## 45 714 The Classy Home 6.43 NY
## 46 717 Merchant Industry 6.42 NY
## 47 728 Vector Media Group 6.30 NY
## 48 744 BulbAmerica 6.16 NY
## 49 750 Livestream 6.13 NY
## 50 752 Private Prep 6.12 NY
## 51 769 Peeled Snacks 5.94 NY
## 52 788 Digital Energy 5.79 NY
## 53 809 The Ruby Group 5.57 NY
## 54 815 Fingerpaint 5.55 NY
## 55 818 Salzinger & Company 5.53 NY
## 56 830 Launchpad Advertising 5.40 NY
## 57 832 In the Know Experiences 5.39 NY
## 58 854 Hipercept 5.23 NY
## 59 883 Envisage Information Systems 5.04 NY
## 60 890 Payoneer 5.00 NY
## 61 896 Optimatic 4.97 NY
## 62 922 Rebecca Minkoff 4.75 NY
## 63 935 Ioxus 4.67 NY
## 64 971 Imagine Easy Solutions 4.45 NY
## 65 1010 Resolute Digital 4.26 NY
## 66 1020 BarkerGilmore 4.22 NY
## 67 1028 Direct Transport Systems 4.15 NY
## 68 1036 Enstoa 4.11 NY
## 69 1048 Curbed Media 4.06 NY
## 70 1054 Tarte 4.03 NY
## 71 1067 Dionysus Consulting 3.96 NY
## 72 1069 Systems Made Simple 3.94 NY
## 73 1091 Blue Fountain Media 3.85 NY
## 74 1095 UpSurge Media Group 3.84 NY
## 75 1100 ArkNet Media 3.82 NY
## 76 1119 Adafruit 3.75 NY
## 77 1129 Likeable Media 3.72 NY
## 78 1166 Manhattan Strategy Group 3.60 NY
## 79 1181 Sub Rosa 3.56 NY
## 80 1190 Usablenet 3.52 NY
## 81 1203 InRhythm 3.47 NY
## 82 1210 Trepoint 3.43 NY
## 83 1214 ALAC International 3.42 NY
## 84 1234 Berricle 3.34 NY
## 85 1259 Decorplanet.com 3.25 NY
## 86 1293 Madison Logic 3.14 NY
## 87 1371 Impelsys 2.94 NY
## 88 1382 City Sounds of NY 2.92 NY
## 89 1423 Media6Degrees 2.82 NY
## 90 1442 Global Warranty Group 2.78 NY
## 91 1443 Oliver Staffing 2.78 NY
## 92 1451 Professional Data Systems 2.76 NY
## 93 1458 Dynomighty Design 2.74 NY
## 94 1485 Cipher Tech Solutions 2.69 NY
## 95 1499 Sterling Infosystems 2.66 NY
## 96 1503 AdCorp Media Group 2.65 NY
## 97 1540 Gaffos.com 2.58 NY
## 98 1559 ModiFace 2.55 NY
## 99 1565 Droga5 2.54 NY
## 100 1609 Enter:Marketing 2.47 NY
## 101 1612 Andiamo Partners 2.46 NY
## 102 1622 LJM Consultants 2.43 NY
## 103 1640 BlueWolf 2.38 NY
## 104 1646 New York Technology Partners - Rochester 2.37 NY
## 105 1706 MKG 2.27 NY
## 106 1709 Tribeca Nik 2.27 NY
## 107 1717 Society Awards 2.26 NY
## 108 1723 Spicer Group 2.25 NY
## 109 1755 Deep Blue Communications 2.19 NY
## 110 1767 Vcorp Services 2.17 NY
## 111 1876 Servomation 2.03 NY
## 112 1877 MaeTec Power 2.03 NY
## 113 1879 Durante Rentals 2.03 NY
## 114 1895 nfrastructure 2.00 NY
## 115 1920 Reliance Star Payment Services 1.97 NY
## 116 1954 CleanEdison 1.94 NY
## 117 1982 Imagine Swimming 1.89 NY
## 118 1985 U.S. Energy Development 1.89 NY
## 119 1986 Medical Supply Depot 1.89 NY
## 120 2067 Signature Systems Group 1.81 NY
## 121 2077 Sachin Babi 1.80 NY
## 122 2089 React2Media 1.78 NY
## 123 2116 Charter School Business Management 1.76 NY
## 124 2142 Liberty Jet Management 1.73 NY
## 125 2173 PM Pediatrics 1.70 NY
## 126 2179 Christmas Central 1.69 NY
## 127 2198 KJT Group 1.68 NY
## 128 2209 inVNT 1.67 NY
## 129 2218 Globo Mobile 1.67 NY
## 130 2234 WorkCompEDI 1.66 NY
## 131 2280 Celerant Technology 1.60 NY
## 132 2292 IntraLogic Solutions 1.59 NY
## 133 2336 BountyJobs 1.56 NY
## 134 2352 LDJ Productions 1.54 NY
## 135 2372 DiGennaro Communications 1.52 NY
## 136 2399 Rain 1.51 NY
## 137 2405 Sullivan 1.50 NY
## 138 2432 Bear Metal Works 1.48 NY
## 139 2436 Shinetech Software 1.48 NY
## 140 2452 Cafe Grumpy 1.46 NY
## 141 2475 Systems Management/Planning 1.45 NY
## 142 2494 RVM 1.42 NY
## 143 2503 Marvin Traub Associates 1.42 NY
## 144 2506 QueBIT 1.42 NY
## 145 2526 Soleo Communications 1.40 NY
## 146 2542 The Fortus Group 1.39 NY
## 147 2549 Central Park West Dentistry 1.39 NY
## 148 2556 Precision Pipeline Solutions 1.39 NY
## 149 2561 1st Equity 1.38 NY
## 150 2562 Advantage Communications Group 1.38 NY
## 151 2585 Primary Support 1.36 NY
## 152 2622 Leviathan 1.34 NY
## 153 2656 TekScape 1.32 NY
## 154 2666 Adventure in Food Trading 1.32 NY
## 155 2675 Cedar Petrochemcials 1.31 NY
## 156 2678 Treo Solutions 1.31 NY
## 157 2702 Ryan Printing 1.30 NY
## 158 2706 Primary Wave Media 1.30 NY
## 159 2718 CreativeFeed 1.29 NY
## 160 2747 Stage 2 Networks 1.27 NY
## 161 2751 IOMedia 1.27 NY
## 162 2764 Weiss & Associates 1.26 NY
## 163 2784 SportsSignup 1.25 NY
## 164 2795 Onco360 1.24 NY
## 165 2799 School Choice International 1.24 NY
## 166 2805 LeadDog Marketing Group 1.23 NY
## 167 2828 D4 1.22 NY
## 168 2830 ReSource Pro 1.22 NY
## 169 2831 Taylor Creative 1.22 NY
## 170 2877 Myriad Supply 1.19 NY
## 171 2889 Logistic Dynamics 1.18 NY
## 172 2896 Telx 1.18 NY
## 173 2924 WTP Advisors 1.16 NY
## 174 2964 DataArt 1.14 NY
## 175 2971 ConServe 1.14 NY
## 176 2981 Flexible Systems 1.13 NY
## 177 2995 Pride Technologies 1.13 NY
## 178 2997 Fibertech Networks 1.13 NY
## 179 3009 5Linx Enterprises 1.12 NY
## 180 3021 Linium 1.11 NY
## 181 3054 DDS Companies 1.09 NY
## 182 3056 VHMnetwork 1.09 NY
## 183 3060 Nasty Pig 1.09 NY
## 184 3097 Gates Automotive 1.07 NY
## 185 3103 SmartSign.com 1.07 NY
## 186 3115 MyOTCStore.com 1.06 NY
## 187 3117 Team Epiphany 1.06 NY
## 188 3136 FSO Onsite Outsourcing 1.05 NY
## 189 3146 Verge Marketing 1.04 NY
## 190 3153 Juice Pharma Worldwide 1.04 NY
## 191 3168 Atrium Staffing 1.03 NY
## 192 3193 Access Display Group 1.02 NY
## 193 3210 Dance With Me 1.01 NY
## 194 3251 Modus Associates 0.99 NY
## 195 3262 Sriven Systems 0.98 NY
## 196 3289 Butler/Till 0.97 NY
## 197 3313 MyDailyMoment.com 0.96 NY
## 198 3351 Bespoke Education 0.94 NY
## 199 3355 Home Clean Home 0.94 NY
## 200 3358 HR Works 0.93 NY
## 201 3383 Graduationsource 0.92 NY
## 202 3387 Mitchell/Martin 0.92 NY
## 203 3401 Alphaserve Technologies 0.92 NY
## 204 3500 Magnolia Bakery 0.88 NY
## 205 3532 The Urban Group 0.86 NY
## 206 3535 Tully Rinckey 0.86 NY
## 207 3542 SweetRiot 0.86 NY
## 208 3544 Proactive Technologies 0.86 NY
## 209 3549 Influence Graphics 0.86 NY
## 210 3584 Infusion 0.84 NY
## 211 3592 Liberty Pumps 0.84 NY
## 212 3594 Post Beam 0.84 NY
## 213 3601 Best Cleaners 0.84 NY
## 214 3603 Gerstein Fisher 0.84 NY
## 215 3632 Integrated Systems Management 0.83 NY
## 216 3643 Docutrend 0.82 NY
## 217 3661 Environmental Products & Services of Vermont 0.81 NY
## 218 3663 Liquid Technology 0.81 NY
## 219 3685 Eminent Technology Solutions 0.80 NY
## 220 3704 Infinity Consulting Solutions 0.79 NY
## 221 3716 LISS Consulting 0.79 NY
## 222 3764 American Portfolios Financial Services 0.77 NY
## 223 3787 Optimax Systems 0.76 NY
## 224 3789 Xtreme Trips 0.76 NY
## 225 3864 ClassBook.com 0.73 NY
## 226 3871 eTransMedia Technology 0.72 NY
## 227 3891 Net@Work 0.72 NY
## 228 3899 Denihan Hospitality Group 0.71 NY
## 229 3924 Finger Lakes Technologies Group 0.70 NY
## 230 3925 Sinu 0.70 NY
## 231 3943 Gersh Academy 0.70 NY
## 232 4000 Send Word Now 0.67 NY
## 233 4003 Ovation Travel Group 0.67 NY
## 234 4007 Spear Physical Therapy 0.67 NY
## 235 4008 Nutricap Labs 0.67 NY
## 236 4039 Empire Office 0.66 NY
## 237 4054 BUMI 0.65 NY
## 238 4117 NorthPoint 0.64 NY
## 239 4128 Cvision Technologies 0.63 NY
## 240 4153 Everyday Health 0.62 NY
## 241 4154 Business Communications Management 0.62 NY
## 242 4158 Edulence 0.62 NY
## 243 4162 JR Products 0.62 NY
## 244 4166 Motivators.com 0.62 NY
## 245 4170 Creative Environment Solutions 0.62 NY
## 246 4224 Fragomen 0.60 NY
## 247 4245 Computer Resources of America 0.59 NY
## 248 4250 NiCoForm 0.59 NY
## 249 4287 LinguaLinx 0.57 NY
## 250 4307 ItsHot.com 0.57 NY
## 251 4335 New York Kids Club 0.56 NY
## 252 4336 Cyrus Innovation 0.56 NY
## 253 4352 Kitware 0.55 NY
## 254 4363 TransPerfect 0.55 NY
## 255 4399 Direct Agents 0.54 NY
## 256 4422 BPA International 0.53 NY
## 257 4432 C2C Outdoor 0.53 NY
## 258 4447 Select Office Suites 0.52 NY
## 259 4448 The Atlantic Group 0.52 NY
## 260 4465 OpenLink 0.52 NY
## 261 4467 Blue Telescope 0.52 NY
## 262 4471 KJ Technology Consulting 0.51 NY
## 263 4474 Sam Schwartz Engineering 0.51 NY
## 264 4475 Presentation Products 0.51 NY
## 265 4490 Catseye Pest Control 0.51 NY
## 266 4522 Geneva Worldwide 0.50 NY
## 267 4535 Capital Access Network 0.49 NY
## 268 4544 ePromos Promotional Products 0.49 NY
## 269 4552 Paradysz 0.49 NY
## 270 4559 Tabush 0.48 NY
## 271 4560 Sentry Communications & Security 0.48 NY
## 272 4565 EquipSystems 0.48 NY
## 273 4569 Bogota Latin Bistro 0.48 NY
## 274 4577 Sutherland Global Services 0.48 NY
## 275 4595 Numerix 0.48 NY
## 276 4602 Brite Computers 0.47 NY
## 277 4605 QED National 0.47 NY
## 278 4611 Inergex 0.47 NY
## 279 4632 Transfinder 0.47 NY
## 280 4639 American Petroleum Equipment & Construction 0.46 NY
## 281 4645 DatapointLabs 0.46 NY
## 282 4646 Mimeo.com 0.46 NY
## 283 4652 SaveAround 0.46 NY
## 284 4670 VarData 0.45 NY
## 285 4671 Webucator 0.45 NY
## 286 4702 Copier Fax Business Technologies 0.44 NY
## 287 4716 Westcon Group 0.44 NY
## 288 4727 Arrow Security 0.44 NY
## 289 4728 DurAmerica Brokerage 0.43 NY
## 290 4731 PlayScripts 0.43 NY
## 291 4732 Mycroft 0.43 NY
## 292 4738 McMurry/TMG 0.43 NY
## 293 4747 TravelClick 0.43 NY
## 294 4755 Greyston Bakery 0.42 NY
## 295 4774 The Lloyd Group 0.42 NY
## 296 4782 Life Safety Engineered Systems 0.42 NY
## 297 4800 Documentation Strategies 0.41 NY
## 298 4803 Benefit Resource 0.41 NY
## 299 4820 Aluf Plastics 0.40 NY
## 300 4839 Makovsky 0.40 NY
## 301 4849 GTM Payroll Services 0.40 NY
## 302 4853 Affect 0.39 NY
## 303 4891 Express Air Freight Unlmited 0.38 NY
## 304 4896 Rand Engineering & Architecture 0.38 NY
## 305 4913 Jackson Lewis 0.37 NY
## 306 4934 Buffalo Filter 0.37 NY
## 307 4936 Coty 0.36 NY
## 308 4944 McElroy Deutsch 0.36 NY
## 309 4950 Colorspec Coatings International 0.36 NY
## 310 4954 Premium Productions 0.36 NY
## 311 4981 SmartSource Computer & Audio Visual Rentals 0.35 NY
Above information shows us the fastest growing companies in NY. There are 311 companies.
by_state <- inc %>%
group_by(State) %>%
count() %>%
arrange(desc(n)) %>%
rename(total_companies = n)
by_state
## # A tibble: 52 x 2
## # Groups: State [52]
## State total_companies
## <fct> <int>
## 1 CA 701
## 2 TX 387
## 3 NY 311
## 4 VA 283
## 5 FL 282
## 6 IL 273
## 7 GA 212
## 8 OH 186
## 9 MA 182
## 10 PA 164
## # ... with 42 more rows
Above data shows us the number of fastest growing companies in each state. Based on the data we derived California has the highest number of fastest companies of all. Then comes Texas and New York.
Create a graph that shows the distribution of companies in the dataset by State (ie how many are in each state). There are a lot of States, so consider which axis you should use. This visualization is ultimately going to be consumed on a ‘portrait’ oriented screen (ie taller than wide), which should further guide your layout choices.
graph<-ggplot(by_state, aes(x=State, y=total_companies)) +
geom_bar(mapping = aes(x=State, y=total_companies), stat = "identity", color = "black", fill = "blue")+
coord_flip()
ggplotly(graph)
Lets dig in on the state with the 3rd most companies in the data set. Imagine you work for the state and are interested in how many people are employed by companies in different industries. Create a plot that shows the average and/or median employment by industry for companies in this state (only use cases with full data, use R’s complete.cases()
function.) In addition to this, your graph should show how variable the ranges are, and you should deal with outliers.
Note: I have already find out from my previous part which State is in the third place. That is New York.
third_place <- inc%>%
filter(State == "NY") %>%
filter(complete.cases(.))%>%
group_by(Industry)%>%
summarise(Mean = mean(Employees), Median = median(Employees))%>%
gather(statType, Employees, Mean, Median)
third_place
## # A tibble: 50 x 3
## Industry statType Employees
## <fct> <chr> <dbl>
## 1 Advertising & Marketing Mean 58.4
## 2 Business Products & Services Mean 1492.
## 3 Computer Hardware Mean 44
## 4 Construction Mean 61
## 5 Consumer Products & Services Mean 626.
## 6 Education Mean 59.9
## 7 Energy Mean 129.
## 8 Engineering Mean 53.5
## 9 Environmental Services Mean 155
## 10 Financial Services Mean 144.
## # ... with 40 more rows
q2_graph<-ggplot(data = third_place, aes(x=Industry, y=Employees)) + geom_bar(stat = "identity", aes(fill=statType), position = "Dodge")+
coord_flip() +
labs(title = "Total Employed by Industry in New York")
ggplotly(q2_graph)
Now imagine you work for an investor and want to see which industries generate the most revenue per employee. Create a chart that makes this information clear. Once again, the distribution per industry should be shown.
by_industry<- inc%>%
filter(complete.cases(.))%>%
group_by(Industry)%>%
summarize(Total_Rev = sum(Revenue), Total_Emp = sum(Employees), Rev_per_Emp = Total_Rev/Total_Emp)%>%
arrange(desc(Rev_per_Emp))
by_industry
## # A tibble: 25 x 4
## Industry Total_Rev Total_Emp Rev_per_Emp
## <fct> <dbl> <int> <dbl>
## 1 Computer Hardware 11885700000 9714 1223564.
## 2 Energy 13771600000 26437 520921.
## 3 Construction 13174300000 29099 452741.
## 4 Logistics & Transportation 14837800000 39994 371001.
## 5 Consumer Products & Services 14956400000 45464 328972.
## 6 Insurance 2337900000 7339 318558.
## 7 Manufacturing 12603600000 43942 286824.
## 8 Retail 10257400000 37068 276718.
## 9 Financial Services 13150900000 47693 275741.
## 10 Environmental Services 2638800000 10155 259852.
## # ... with 15 more rows
q3_graph <- ggplot(data=by_industry, aes(x=reorder(Industry,Rev_per_Emp), y = Rev_per_Emp))+
geom_bar(stat = "identity", color = "black", fill = "Blue") +
labs(title = "Revenue per Employee by Industry", x = "Industry", y = "Revenue per Employee")+
coord_flip()
ggplotly(q3_graph)
Note: Graph clearly shows that Computer Hardware Industry makes the most revenue per employee.