library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.4
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.4.0
## ── Conflicts ───────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
ceo.data2 <- read_csv("http://asayanalytics.com/ceo_salaries-csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## WideIndustry = col_character(),
## Company = col_character(),
## CEO = col_character(),
## CityofBirth = col_character(),
## StateofBirth = col_character(),
## Undergrad = col_character(),
## UGDegree = col_character(),
## Graduate = col_character(),
## GradDegree = col_character(),
## Bonus = col_character(),
## Industry = col_character()
## )
## See spec(...) for full column specifications.
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this: ###Summary of Ceo
summary(ceo.data2)
## TotalComp WideIndustry Company CEO
## Min. : 28816 Length:800 Length:800 Length:800
## 1st Qu.: 788446 Class :character Class :character Class :character
## Median : 1304470 Mode :character Mode :character Mode :character
## Mean : 2818743
## 3rd Qu.: 2511430
## Max. :203020000
## NA's :10
## CityofBirth StateofBirth Age Undergrad
## Length:800 Length:800 Min. :29.00 Length:800
## Class :character Class :character 1st Qu.:52.00 Class :character
## Mode :character Mode :character Median :57.00 Mode :character
## Mean :56.33
## 3rd Qu.:61.00
## Max. :81.00
##
## UGDegree UGDate AgeOfUnder Graduate
## Length:800 Min. :35.00 Min. :15.00 Length:800
## Class :character 1st Qu.:55.00 1st Qu.:21.00 Class :character
## Mode :character Median :60.00 Median :22.00 Mode :character
## Mean :59.87 Mean :21.99
## 3rd Qu.:64.00 3rd Qu.:22.00
## Max. :90.00 Max. :47.00
## NA's :82 NA's :82
## GradDegree MBA? MasterPhd? G_date
## Length:800 Min. :0.0000 Min. :0.000 Min. :37.00
## Class :character 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:60.00
## Mode :character Median :0.0000 Median :1.000 Median :65.00
## Mean :0.2637 Mean :0.505 Mean :65.02
## 3rd Qu.:1.0000 3rd Qu.:1.000 3rd Qu.:70.00
## Max. :1.0000 Max. :1.000 Max. :90.00
## NA's :396
## AgeOfGradu YearsFirm YearsCEO Salary
## Min. :18.00 Min. : 1.00 Min. : 1.000 Min. : 18600
## 1st Qu.:24.00 1st Qu.:11.00 1st Qu.: 3.000 1st Qu.: 433333
## Median :25.00 Median :23.00 Median : 6.000 Median : 569231
## Mean :26.57 Mean :22.05 Mean : 8.127 Mean : 613458
## 3rd Qu.:28.00 3rd Qu.:32.00 3rd Qu.:11.000 3rd Qu.: 750000
## Max. :51.00 Max. :58.00 Max. :55.000 Max. :2700000
## NA's :396 NA's :11
## Bonus Other StGains Compfor5Yrs
## Length:800 Min. : 174 Min. : 2188 Min. : 113181
## Class :character 1st Qu.: 26446 1st Qu.: 193017 1st Qu.: 3483990
## Mode :character Median : 116910 Median : 597988 Median : 5804310
## Mean : 531641 Mean : 3125219 Mean : 9781009
## 3rd Qu.: 500568 3rd Qu.: 2025438 3rd Qu.: 10747750
## Max. :13730800 Max. :202261000 Max. :236771000
## NA's :54 NA's :500 NA's :176
## StockOwned Sales Profits ReturnOver5Yrs
## Min. : 0.00001 Min. : 62 Min. :-7987.0 Min. :-57.00
## 1st Qu.: 0.04575 1st Qu.: 1164 1st Qu.: 53.1 1st Qu.: 8.00
## Median : 0.17000 Median : 2440 Median : 116.7 Median : 15.00
## Mean : 2.15093 Mean : 5033 Mean : 241.3 Mean : 16.62
## 3rd Qu.: 0.78150 3rd Qu.: 4828 3rd Qu.: 276.0 3rd Qu.: 21.00
## Max. :46.11000 Max. :138220 Max. : 5280.0 Max. :173.00
## NA's :4 NA's :22
## Industry IndustryCode
## Length:800 Min. : 0.000
## Class :character 1st Qu.: 2.000
## Mode :character Median : 6.000
## Mean : 8.495
## 3rd Qu.:16.000
## Max. :19.000
##
## # A tibble: 800 x 3
## Company CEO StateofBirth
## <chr> <chr> <chr>
## 1 Teledyne Mr. William P Rutledge PA
## 2 Boeing Mr. Frank A Shrontz ID
## 3 Northrop Mr. KentKresa NY
## 4 Martin Marietta Mr. Norman R Augustine CO
## 5 McDonnell Douglas Mr. John F McDonnell MD
## 6 GenCorp Mr. A WilliamReynolds OH
## 7 Lockheed Mr. Daniel M Tellep PA
## 8 General Electric Mr. John F Welch Jr MA
## 9 Textron Mr. James F Hardymon KY
## 10 General Dynamics Mr. James R Mellor MI
## # … with 790 more rows
## # A tibble: 1 x 33
## TotalComp WideIndustry Company CEO CityofBirth StateofBirth Age Undergrad
## <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 14899200 Aerospacede… Genera… Mr. … Detroit MI 64 U of Mic…
## # … with 25 more variables: UGDegree <chr>, UGDate <dbl>, AgeOfUnder <dbl>,
## # Graduate <chr>, GradDegree <chr>, `MBA?` <dbl>, `MasterPhd?` <dbl>,
## # G_date <dbl>, AgeOfGradu <dbl>, YearsFirm <dbl>, YearsCEO <dbl>,
## # Salary <dbl>, Bonus <chr>, Other <dbl>, StGains <dbl>, Compfor5Yrs <dbl>,
## # StockOwned <dbl>, Sales <dbl>, Profits <dbl>, ReturnOver5Yrs <dbl>,
## # Industry <chr>, IndustryCode <dbl>, young <lgl>, middle_aged <lgl>,
## # senior <lgl>
## # A tibble: 47 x 3
## CEO Salary Bonus
## <chr> <dbl> <chr>
## 1 Mr. John F Welch Jr 1750000 2.2e+006
## 2 Mr. Lawrence A Bossidy 1100000 1.5e+006
## 3 Mr. Dean L Buntrock 1400000 <NA>
## 4 Mr. Josh S Weston 1005000 <NA>
## 5 Mr. Louis V Gerstner Jr 1500000 1.125e+006
## 6 Dr. LeonardTow 1003000 485000
## 7 Mr. Robert E Allen 1032000 1.3567e+006
## 8 Mr. John F Smith Jr 1375000 <NA>
## 9 Mr. Edwin L Artzt 1137500 625000
## 10 Mr. John W Teets 1150330 1.1733e+006
## # … with 37 more rows
## # A tibble: 47 x 33
## TotalComp WideIndustry Company CEO CityofBirth StateofBirth Age Undergrad
## <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 7285420 Aerospacede… Genera… Mr. … Peabody MA 58 U of Mas…
## 2 9544050 Aerospacede… Allied… Mr. … Pittsfield MA 59 Colgate U
## 3 1486110 Business WMX Te… Mr. … Columbia SD 62 St Olaf C
## 4 8110850 Business Automa… Mr. … Brooklyn NY 65 CUNY City
## 5 7709730 ComputersCo… Intern… Mr. … Mineola NY 52 Dartmouth
## 6 5409690 ComputersCo… Citize… Dr. … Brooklyn NY 66 CUNY Bro…
## 7 3983410 ComputersCo… ATT Mr. … Joplin MO 59 Wabash C
## 8 1440930 Consumer Genera… Mr. … Worcester MA 56 U of Mas…
## 9 8378200 Consumer Procte… Mr. … New York NY 64 U of Ore…
## 10 5685500 Consumer Dial Mr. … Elgin IL 60 <NA>
## # … with 37 more rows, and 25 more variables: UGDegree <chr>, UGDate <dbl>,
## # AgeOfUnder <dbl>, Graduate <chr>, GradDegree <chr>, `MBA?` <dbl>,
## # `MasterPhd?` <dbl>, G_date <dbl>, AgeOfGradu <dbl>, YearsFirm <dbl>,
## # YearsCEO <dbl>, Salary <dbl>, Bonus <chr>, Other <dbl>, StGains <dbl>,
## # Compfor5Yrs <dbl>, StockOwned <dbl>, Sales <dbl>, Profits <dbl>,
## # ReturnOver5Yrs <dbl>, Industry <chr>, IndustryCode <dbl>, young <lgl>,
## # middle_aged <lgl>, senior <lgl>
## # A tibble: 47 x 33
## TotalComp WideIndustry Company CEO CityofBirth StateofBirth Age Undergrad
## <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 7285420 Aerospacede… Genera… Mr. … Peabody MA 58 U of Mas…
## 2 9544050 Aerospacede… Allied… Mr. … Pittsfield MA 59 Colgate U
## 3 1486110 Business WMX Te… Mr. … Columbia SD 62 St Olaf C
## 4 8110850 Business Automa… Mr. … Brooklyn NY 65 CUNY City
## 5 7709730 ComputersCo… Intern… Mr. … Mineola NY 52 Dartmouth
## 6 5409690 ComputersCo… Citize… Dr. … Brooklyn NY 66 CUNY Bro…
## 7 3983410 ComputersCo… ATT Mr. … Joplin MO 59 Wabash C
## 8 1440930 Consumer Genera… Mr. … Worcester MA 56 U of Mas…
## 9 8378200 Consumer Procte… Mr. … New York NY 64 U of Ore…
## 10 5685500 Consumer Dial Mr. … Elgin IL 60 <NA>
## # … with 37 more rows, and 25 more variables: UGDegree <chr>, UGDate <dbl>,
## # AgeOfUnder <dbl>, Graduate <chr>, GradDegree <chr>, `MBA?` <dbl>,
## # `MasterPhd?` <dbl>, G_date <dbl>, AgeOfGradu <dbl>, YearsFirm <dbl>,
## # YearsCEO <dbl>, Salary <dbl>, Bonus <chr>, Other <dbl>, StGains <dbl>,
## # Compfor5Yrs <dbl>, StockOwned <dbl>, Sales <dbl>, Profits <dbl>,
## # ReturnOver5Yrs <dbl>, Industry <chr>, IndustryCode <dbl>, young <lgl>,
## # middle_aged <lgl>, senior <lgl>
ggplot(data = ceo.data2, aes(x = Age, y = Sales, color = "blue")) +
geom_point()
## ScatterPlot Age by Bonus if Middle age
ggplot(data = ceo.data2, aes(x = Age, y = Bonus, color = middle_aged)) + geom_point()
ceo.data2 %>%
group_by(middle_aged) %>%
ggplot(aes(x = Age, y = Profits, color = middle_aged))+ geom_point()
#Make a bar graph to sum the total compensation by industry for business, consumer durables, aerospacedefense,Energy
#By Using Dbplyer we can group industy and sum the total compensation per industry
ceo.data2 %>%
filter(Industry %in% c('Business', 'Consumerdurables', 'Energy','Aerospacedefense')) %>%
group_by(Industry) %>%
summarise(TotalComp1 = sum(TotalComp, na.rm = 1)) %>%
ggplot(aes(x = Industry, y= TotalComp1)) +
geom_bar(stat = "identity")
#You could further the analysis of this graph by runnning a count on each industry to compare the number of CEOs in each industry, thus determining the significance of the test.
#Countign the number of ceo's in the the industries of Business, AeroSpaceDefense, Energy, Consumerdurable goods, Consumer nondurable goods
###Data Wrangling uses the same methods from self analysis one and the data wrangling of the original document
###Self Analysis of Question 2
###We can compare how many ceo's based on the industry they work on. A further analysis could show what industry has the most ceo's and we could compare the chances getting to be ceo within in each industry. Or we can see what the pay difference between different industrial based on the number of ceo's in that industry
#Ceo's who are over 30 years of age and we compared their salaries that are or above 1 million dollars
## # A tibble: 47 x 3
## Company CEO StateofBirth
## <chr> <chr> <chr>
## 1 General Electric Mr. John F Welch Jr MA
## 2 AlliedSignal Mr. Lawrence A Bossidy MA
## 3 WMX Technologies Mr. Dean L Buntrock SD
## 4 Automatic Data Proce Mr. Josh S Weston NY
## 5 International Busine Mr. Louis V Gerstner Jr NY
## 6 Citizens Utilities Dr. LeonardTow NY
## 7 ATT Mr. Robert E Allen MO
## 8 General Motors Mr. John F Smith Jr MA
## 9 ProcterGamble Mr. Edwin L Artzt NY
## 10 Dial Mr. John W Teets IL
## # … with 37 more rows
## # A tibble: 0 x 34
## # … with 34 variables: TotalComp <dbl>, WideIndustry <chr>, Company <chr>,
## # CEO <chr>, CityofBirth <chr>, StateofBirth <chr>, Age <dbl>,
## # Undergrad <chr>, UGDegree <chr>, UGDate <dbl>, AgeOfUnder <dbl>,
## # Graduate <chr>, GradDegree <chr>, `MBA?` <dbl>, `MasterPhd?` <dbl>,
## # G_date <dbl>, AgeOfGradu <dbl>, YearsFirm <dbl>, YearsCEO <dbl>,
## # Salary <dbl>, Bonus <chr>, Other <dbl>, StGains <dbl>, Compfor5Yrs <dbl>,
## # StockOwned <dbl>, Sales <dbl>, Profits <dbl>, ReturnOver5Yrs <dbl>,
## # Industry <chr>, IndustryCode <dbl>, young <lgl>, middle_aged <lgl>,
## # senior <lgl>, Sales_Profit_Difference <dbl>
## # A tibble: 47 x 3
## CEO Salary Bonus
## <chr> <dbl> <chr>
## 1 Mr. John F Welch Jr 1750000 2.2e+006
## 2 Mr. Lawrence A Bossidy 1100000 1.5e+006
## 3 Mr. Dean L Buntrock 1400000 <NA>
## 4 Mr. Josh S Weston 1005000 <NA>
## 5 Mr. Louis V Gerstner Jr 1500000 1.125e+006
## 6 Dr. LeonardTow 1003000 485000
## 7 Mr. Robert E Allen 1032000 1.3567e+006
## 8 Mr. John F Smith Jr 1375000 <NA>
## 9 Mr. Edwin L Artzt 1137500 625000
## 10 Mr. John W Teets 1150330 1.1733e+006
## # … with 37 more rows
## # A tibble: 47 x 34
## TotalComp WideIndustry Company CEO CityofBirth StateofBirth Age Undergrad
## <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 7285420 Aerospacede… Genera… Mr. … Peabody MA 58 U of Mas…
## 2 9544050 Aerospacede… Allied… Mr. … Pittsfield MA 59 Colgate U
## 3 1486110 Business WMX Te… Mr. … Columbia SD 62 St Olaf C
## 4 8110850 Business Automa… Mr. … Brooklyn NY 65 CUNY City
## 5 7709730 ComputersCo… Intern… Mr. … Mineola NY 52 Dartmouth
## 6 5409690 ComputersCo… Citize… Dr. … Brooklyn NY 66 CUNY Bro…
## 7 3983410 ComputersCo… ATT Mr. … Joplin MO 59 Wabash C
## 8 1440930 Consumer Genera… Mr. … Worcester MA 56 U of Mas…
## 9 8378200 Consumer Procte… Mr. … New York NY 64 U of Ore…
## 10 5685500 Consumer Dial Mr. … Elgin IL 60 <NA>
## # … with 37 more rows, and 26 more variables: UGDegree <chr>, UGDate <dbl>,
## # AgeOfUnder <dbl>, Graduate <chr>, GradDegree <chr>, `MBA?` <dbl>,
## # `MasterPhd?` <dbl>, G_date <dbl>, AgeOfGradu <dbl>, YearsFirm <dbl>,
## # YearsCEO <dbl>, Salary <dbl>, Bonus <chr>, Other <dbl>, StGains <dbl>,
## # Compfor5Yrs <dbl>, StockOwned <dbl>, Sales <dbl>, Profits <dbl>,
## # ReturnOver5Yrs <dbl>, Industry <chr>, IndustryCode <dbl>, young <lgl>,
## # middle_aged <lgl>, senior <lgl>, Sales_Profit_Difference <dbl>
## # A tibble: 47 x 34
## TotalComp WideIndustry Company CEO CityofBirth StateofBirth Age Undergrad
## <dbl> <chr> <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 7285420 Aerospacede… Genera… Mr. … Peabody MA 58 U of Mas…
## 2 9544050 Aerospacede… Allied… Mr. … Pittsfield MA 59 Colgate U
## 3 1486110 Business WMX Te… Mr. … Columbia SD 62 St Olaf C
## 4 8110850 Business Automa… Mr. … Brooklyn NY 65 CUNY City
## 5 7709730 ComputersCo… Intern… Mr. … Mineola NY 52 Dartmouth
## 6 5409690 ComputersCo… Citize… Dr. … Brooklyn NY 66 CUNY Bro…
## 7 3983410 ComputersCo… ATT Mr. … Joplin MO 59 Wabash C
## 8 1440930 Consumer Genera… Mr. … Worcester MA 56 U of Mas…
## 9 8378200 Consumer Procte… Mr. … New York NY 64 U of Ore…
## 10 5685500 Consumer Dial Mr. … Elgin IL 60 <NA>
## # … with 37 more rows, and 26 more variables: UGDegree <chr>, UGDate <dbl>,
## # AgeOfUnder <dbl>, Graduate <chr>, GradDegree <chr>, `MBA?` <dbl>,
## # `MasterPhd?` <dbl>, G_date <dbl>, AgeOfGradu <dbl>, YearsFirm <dbl>,
## # YearsCEO <dbl>, Salary <dbl>, Bonus <chr>, Other <dbl>, StGains <dbl>,
## # Compfor5Yrs <dbl>, StockOwned <dbl>, Sales <dbl>, Profits <dbl>,
## # ReturnOver5Yrs <dbl>, Industry <chr>, IndustryCode <dbl>, young <lgl>,
## # middle_aged <lgl>, senior <lgl>, Sales_Profit_Difference <dbl>
###Visualization of Question 3
ggplot(data = ceo.data2, aes(x = Sales, y = Sales_Profit_Difference, color = "Sales-Profit Difference"))+
geom_point()+
ggtitle("Sales & Profit Difference")
###Statistical Analysis for Question 3
#Checking linerality between sales and profit differnece for ceo's over the age of 30 and make over a million dollars. Further analysis could be checking correlation between sales and sales profit difference. By trying to find a correlation we could see the likely hold of how much ceo's drive profits in a company
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.