library(tidyverse)
## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'tibble'
## Warning: replacing previous import 'ellipsis::check_dots_unnamed' by
## 'rlang::check_dots_unnamed' when loading 'tibble'
## Warning: replacing previous import 'ellipsis::check_dots_used' by
## 'rlang::check_dots_used' when loading 'tibble'
## Warning: replacing previous import 'ellipsis::check_dots_empty' by
## 'rlang::check_dots_empty' when loading 'tibble'
## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'pillar'
## Warning: replacing previous import 'ellipsis::check_dots_unnamed' by
## 'rlang::check_dots_unnamed' when loading 'pillar'
## Warning: replacing previous import 'ellipsis::check_dots_used' by
## 'rlang::check_dots_used' when loading 'pillar'
## Warning: replacing previous import 'ellipsis::check_dots_empty' by
## 'rlang::check_dots_empty' when loading 'pillar'
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.4     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(readr)
JANI_Results <- read_csv("JANI - Results.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   `Program Year` = col_character(),
##   Company = col_character(),
##   School = col_character(),
##   Grade = col_character(),
##   Program = col_character(),
##   `Class ID` = col_character(),
##   `Contact hours` = col_double(),
##   Students = col_double()
## )
## Warning: 374 parsing failures.
##  row      col               expected actual                 file
## 1402 Students no trailing characters  3,763 'JANI - Results.csv'
## 1425 Students no trailing characters  3,763 'JANI - Results.csv'
## 1617 Students no trailing characters  2,729 'JANI - Results.csv'
## 1618 Students no trailing characters  2,729 'JANI - Results.csv'
## 1619 Students no trailing characters  2,729 'JANI - Results.csv'
## .... ........ ...................... ...... ....................
## See problems(...) for more details.
#View(JANI_Results)
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
JANI_Results %>%
  janitor::clean_names() %>%
  mutate(totalhrs = contact_hours * students) %>%
  arrange(desc(totalhrs))
## # A tibble: 24,034 x 9
##    program_year company school grade program class_id contact_hours students
##    <chr>        <chr>   <chr>  <chr> <chr>   <chr>            <dbl>    <dbl>
##  1 Program yea… Midwes… Tippe… 12    JA Eco… CL-5818…            72       53
##  2 Program yea… Allen … Wabas… 9     JA Ins… CL-1424…             6      611
##  3 Program yea… Chase   Wabas… 9     JA Ins… CL-1424…             6      611
##  4 Program yea… Coldwe… Wabas… 9     JA Ins… CL-1424…             6      611
##  5 Program yea… Conten… Wabas… 9     JA Ins… CL-1424…             6      611
##  6 Program yea… Fort F… Wabas… 9     JA Ins… CL-1424…             6      611
##  7 Program yea… Indian… Wabas… 9     JA Ins… CL-1424…             6      611
##  8 Program yea… Indian… Wabas… 9     JA Ins… CL-1424…             6      611
##  9 Program yea… Nation… Wabas… 9     JA Ins… CL-1424…             6      611
## 10 Program yea… Nation… Wabas… 9     JA Ins… CL-1424…             6      611
## # … with 24,024 more rows, and 1 more variable: totalhrs <dbl>
JANI_Results %>%
  janitor::clean_names() %>%
  count(company) %>%
  arrange(desc(n))
## # A tibble: 3,684 x 2
##    company                          n
##    <chr>                        <int>
##  1 Purdue University Fort Wayne  1096
##  2 Lake City Bank                 797
##  3 1st Source Bank                571
##  4 Indiana Tech                   567
##  5 Lincoln Financial Group        445
##  6 3Rivers Federal Credit Union   308
##  7 Ivy Tech Community College     288
##  8 University of Saint Francis    186
##  9 First Merchants Bank           178
## 10 Zimmer Biomet                  169
## # … with 3,674 more rows
library(janitor)
JANI_Results %>%
  janitor::clean_names() %>%
  group_by(company)%>%
  count(company, students)%>%
  mutate(n_students = n * students) %>%
  arrange(desc(n_students))
## # A tibble: 11,112 x 4
## # Groups:   company [3,684]
##    company                      students     n n_students
##    <chr>                           <dbl> <int>      <dbl>
##  1 Purdue University Fort Wayne       22   106       2332
##  2 Purdue University Fort Wayne       26    88       2288
##  3 Purdue University Fort Wayne       24    92       2208
##  4 Purdue University Fort Wayne       25    87       2175
##  5 Lake City Bank                     25    83       2075
##  6 Purdue University Fort Wayne       23    89       2047
##  7 Lake City Bank                     23    83       1909
##  8 Lake City Bank                     24    79       1896
##  9 Lake City Bank                     22    83       1826
## 10 Purdue University Fort Wayne       21    83       1743
## # … with 11,102 more rows
library(janitor)
JANI_Results %>%
  janitor::clean_names() %>%
  group_by(company) %>%
  count(company, students) %>%
  mutate(n_students = n * students) %>%
  arrange(desc(n_students))
## # A tibble: 11,112 x 4
## # Groups:   company [3,684]
##    company                      students     n n_students
##    <chr>                           <dbl> <int>      <dbl>
##  1 Purdue University Fort Wayne       22   106       2332
##  2 Purdue University Fort Wayne       26    88       2288
##  3 Purdue University Fort Wayne       24    92       2208
##  4 Purdue University Fort Wayne       25    87       2175
##  5 Lake City Bank                     25    83       2075
##  6 Purdue University Fort Wayne       23    89       2047
##  7 Lake City Bank                     23    83       1909
##  8 Lake City Bank                     24    79       1896
##  9 Lake City Bank                     22    83       1826
## 10 Purdue University Fort Wayne       21    83       1743
## # … with 11,102 more rows
library(janitor)
JANI_Results %>%
  janitor::clean_names() %>%
  count(company) %>%
  arrange(desc(n)) 
## # A tibble: 3,684 x 2
##    company                          n
##    <chr>                        <int>
##  1 Purdue University Fort Wayne  1096
##  2 Lake City Bank                 797
##  3 1st Source Bank                571
##  4 Indiana Tech                   567
##  5 Lincoln Financial Group        445
##  6 3Rivers Federal Credit Union   308
##  7 Ivy Tech Community College     288
##  8 University of Saint Francis    186
##  9 First Merchants Bank           178
## 10 Zimmer Biomet                  169
## # … with 3,674 more rows
  #mutate(Jerry = contact_hours * students) %>%
  #arrange(desc(students))

Top 5 by Appearance

This shows the companies with the most appearances in the dataset are Purdue University Fort Wayne (1096), Lake City Bank (797), 1st Source Bank (571), Indiana Tech (567), and Lincoln Financial Group (445)

library(janitor)
PFW <- JANI_Results %>%
  janitor::clean_names() %>%
  filter(company == "Purdue University Fort Wayne") %>%
  select(company, contact_hours, students) %>%
  mutate(total_hours = contact_hours * students) #total_hours is equal to the students taught in each session multiplied by the number of hours that JA was working with them

PFW
## # A tibble: 1,096 x 4
##    company                      contact_hours students total_hours
##    <chr>                                <dbl>    <dbl>       <dbl>
##  1 Purdue University Fort Wayne            22       14         308
##  2 Purdue University Fort Wayne             5       22         110
##  3 Purdue University Fort Wayne             5       22         110
##  4 Purdue University Fort Wayne             5       21         105
##  5 Purdue University Fort Wayne             6       35         210
##  6 Purdue University Fort Wayne             5       19          95
##  7 Purdue University Fort Wayne             6       30         180
##  8 Purdue University Fort Wayne             5       23         115
##  9 Purdue University Fort Wayne             5       20         100
## 10 Purdue University Fort Wayne             5       20         100
## # … with 1,086 more rows
sum(PFW[c(4)], na.rm=TRUE)
## [1] 156945
library(janitor)
LakeCity <- JANI_Results %>%
  janitor::clean_names() %>%
  filter(company == "Lake City Bank") %>%
  select(company, contact_hours, students) %>%
  mutate(total_hours = contact_hours * students) #total_hours is equal to the students taught in each session multiplied by the number of hours that JA was working with them

LakeCity
## # A tibble: 797 x 4
##    company        contact_hours students total_hours
##    <chr>                  <dbl>    <dbl>       <dbl>
##  1 Lake City Bank            29       22         638
##  2 Lake City Bank            20       23         460
##  3 Lake City Bank            22       14         308
##  4 Lake City Bank            21       28         588
##  5 Lake City Bank            21       25         525
##  6 Lake City Bank             5       20         100
##  7 Lake City Bank             5       20         100
##  8 Lake City Bank             6       20         120
##  9 Lake City Bank             5       18          90
## 10 Lake City Bank             5       20         100
## # … with 787 more rows
sum(LakeCity[c(4)], na.rm=TRUE)
## [1] 143180
library(janitor)
FSource <- JANI_Results %>%
  janitor::clean_names() %>%
  filter(company == "1st Source Bank") %>%
  select(company, contact_hours, students) %>%
  mutate(total_hours = contact_hours * students) #total_hours is equal to the students taught in each session multiplied by the number of hours that JA was working with them

FSource
## # A tibble: 571 x 4
##    company         contact_hours students total_hours
##    <chr>                   <dbl>    <dbl>       <dbl>
##  1 1st Source Bank             6       27         162
##  2 1st Source Bank             6       23         138
##  3 1st Source Bank             6       22         132
##  4 1st Source Bank            45       25        1125
##  5 1st Source Bank             6       28         168
##  6 1st Source Bank             6       29         174
##  7 1st Source Bank             6       30         180
##  8 1st Source Bank             6       26         156
##  9 1st Source Bank             6       26         156
## 10 1st Source Bank             5        7          35
## # … with 561 more rows
sum(FSource[c(4)], na.rm=TRUE)
## [1] 104926
library(janitor)
IT <- JANI_Results %>%
  janitor::clean_names() %>%
  filter(company == "Indiana Tech") %>%
  select(company, contact_hours, students) %>%
  mutate(total_hours = contact_hours * students) #total_hours is equal to the students taught in each session multiplied by the number of hours that JA was working with them

IT
## # A tibble: 567 x 4
##    company      contact_hours students total_hours
##    <chr>                <dbl>    <dbl>       <dbl>
##  1 Indiana Tech             4       23          92
##  2 Indiana Tech            21       24         504
##  3 Indiana Tech             5       23         115
##  4 Indiana Tech             6       23         138
##  5 Indiana Tech            45       11         495
##  6 Indiana Tech             5       12          60
##  7 Indiana Tech             5       22         110
##  8 Indiana Tech             5       25         125
##  9 Indiana Tech             6       18         108
## 10 Indiana Tech             6       14          84
## # … with 557 more rows
sum(IT[c(4)], na.rm=TRUE)
## [1] 81242
library(janitor)
LFG <- JANI_Results %>%
  janitor::clean_names() %>%
  filter(company == "Lincoln Financial Group") %>%
  select(company, contact_hours, students) %>%
  mutate(total_hours = contact_hours * students) #total_hours is equal to the students taught in each session multiplied by the number of hours that JA was working with them

LFG
## # A tibble: 445 x 4
##    company                 contact_hours students total_hours
##    <chr>                           <dbl>    <dbl>       <dbl>
##  1 Lincoln Financial Group             5       28         140
##  2 Lincoln Financial Group             5       28         140
##  3 Lincoln Financial Group             5       13          65
##  4 Lincoln Financial Group             5       22         110
##  5 Lincoln Financial Group             6       18         108
##  6 Lincoln Financial Group             5       26         130
##  7 Lincoln Financial Group             5       23         115
##  8 Lincoln Financial Group             5       24         120
##  9 Lincoln Financial Group             5       25         125
## 10 Lincoln Financial Group             7       27         189
## # … with 435 more rows
sum(LFG[c(4)], na.rm=TRUE)
## [1] 85065

These 5 by total hours

  1. Purdue University Fort Wayne (314991 Total Hours)
  2. Indiana Tech (233084 Total Hours)
  3. Lake City Bank (143180 Total Hours)
  4. 1st Source Bank (104926 Total Hours)
  5. Lincoln Financial Group (85065 Total Hours)
library(janitor)
JANI_Results %>%
  janitor::clean_names() %>%
  mutate(Jerry = contact_hours * students) %>%
  arrange(desc(Jerry))
## # A tibble: 24,034 x 9
##    program_year company school grade program class_id contact_hours students
##    <chr>        <chr>   <chr>  <chr> <chr>   <chr>            <dbl>    <dbl>
##  1 Program yea… Midwes… Tippe… 12    JA Eco… CL-5818…            72       53
##  2 Program yea… Allen … Wabas… 9     JA Ins… CL-1424…             6      611
##  3 Program yea… Chase   Wabas… 9     JA Ins… CL-1424…             6      611
##  4 Program yea… Coldwe… Wabas… 9     JA Ins… CL-1424…             6      611
##  5 Program yea… Conten… Wabas… 9     JA Ins… CL-1424…             6      611
##  6 Program yea… Fort F… Wabas… 9     JA Ins… CL-1424…             6      611
##  7 Program yea… Indian… Wabas… 9     JA Ins… CL-1424…             6      611
##  8 Program yea… Indian… Wabas… 9     JA Ins… CL-1424…             6      611
##  9 Program yea… Nation… Wabas… 9     JA Ins… CL-1424…             6      611
## 10 Program yea… Nation… Wabas… 9     JA Ins… CL-1424…             6      611
## # … with 24,024 more rows, and 1 more variable: Jerry <dbl>
library(janitor)
JANI_Results %>%
  janitor::clean_names() %>%
  group_by(company) %>%
  count(company, students) %>%
  mutate(n_students = n * students) %>%
  arrange(desc(n_students))
## # A tibble: 11,112 x 4
## # Groups:   company [3,684]
##    company                      students     n n_students
##    <chr>                           <dbl> <int>      <dbl>
##  1 Purdue University Fort Wayne       22   106       2332
##  2 Purdue University Fort Wayne       26    88       2288
##  3 Purdue University Fort Wayne       24    92       2208
##  4 Purdue University Fort Wayne       25    87       2175
##  5 Lake City Bank                     25    83       2075
##  6 Purdue University Fort Wayne       23    89       2047
##  7 Lake City Bank                     23    83       1909
##  8 Lake City Bank                     24    79       1896
##  9 Lake City Bank                     22    83       1826
## 10 Purdue University Fort Wayne       21    83       1743
## # … with 11,102 more rows

Another way to find top 5

The Companies with the most students taught in a single session muliplied by how many times this same amount of students was taught are Manchester University (60208 students), Chase (56445 students), Purdue University (56445 students), Trine University (56445 students), and Indiana Michigan Power (52682 students). This was determined by counting by the company and students, multiplying the students by the number of appearances of this number of students and that company (in this case ManU has 16 appearances of 3763 students) to determine how many students each company worked with when focusing on their largest student size. This is not the total number of students, I will be doing that below with these top 5 companies.

For ManU, the su

library(janitor)
ManU <- JANI_Results %>%
  janitor::clean_names() %>%
  filter(company == "Manchester University, College Of Pharmacy") %>%
  select(company, contact_hours, students) %>%
  mutate(total_hours = contact_hours * students) #total_hours is equal to the students taught in each session multiplied by the number of hours that JA was working with them

ManU
## # A tibble: 37 x 4
##    company                                    contact_hours students total_hours
##    <chr>                                              <dbl>    <dbl>       <dbl>
##  1 Manchester University, College Of Pharmacy            25       25         625
##  2 Manchester University, College Of Pharmacy             5       26         130
##  3 Manchester University, College Of Pharmacy             5       24         120
##  4 Manchester University, College Of Pharmacy             5       24         120
##  5 Manchester University, College Of Pharmacy             5       24         120
##  6 Manchester University, College Of Pharmacy             5       22         110
##  7 Manchester University, College Of Pharmacy             5       22         110
##  8 Manchester University, College Of Pharmacy             5       23         115
##  9 Manchester University, College Of Pharmacy             5       19          95
## 10 Manchester University, College Of Pharmacy             5       19          95
## # … with 27 more rows
sum(ManU[c(4)], na.rm=TRUE)
## [1] 2635
library(janitor)
Chase <- JANI_Results %>%
  janitor::clean_names() %>%
  filter(company == "Chase") %>%
  select(company, contact_hours, students) %>%
  mutate(total_hours = contact_hours * students)

Chase
## # A tibble: 114 x 4
##    company contact_hours students total_hours
##    <chr>           <dbl>    <dbl>       <dbl>
##  1 Chase               5       29         145
##  2 Chase               5       21         105
##  3 Chase              28       24         672
##  4 Chase               6       19         114
##  5 Chase              34       24         816
##  6 Chase              28       21         588
##  7 Chase              29       24         696
##  8 Chase               6       26         156
##  9 Chase              38       19         722
## 10 Chase               5       19          95
## # … with 104 more rows
sum(Chase[c(4)], na.rm=TRUE)
## [1] 21398
library(janitor)
Purdue <- JANI_Results %>%
  janitor::clean_names() %>%
  filter(company == "Purdue University") %>%
  select(company, contact_hours, students) %>%
  mutate(total_hours = contact_hours * students)

Purdue
## # A tibble: 134 x 4
##    company           contact_hours students total_hours
##    <chr>                     <dbl>    <dbl>       <dbl>
##  1 Purdue University             5       19          95
##  2 Purdue University             5       22         110
##  3 Purdue University             5       22         110
##  4 Purdue University             5       22         110
##  5 Purdue University             5       22         110
##  6 Purdue University             5       30         150
##  7 Purdue University            21       22         462
##  8 Purdue University            37       25         925
##  9 Purdue University            23       22         506
## 10 Purdue University            24       21         504
## # … with 124 more rows
sum(Purdue[c(4)], na.rm=TRUE)
## [1] 33143
library(janitor)
Trine <- JANI_Results %>%
  janitor::clean_names() %>%
  filter(company == "Trine University") %>%
  select(company, contact_hours, students) %>%
  mutate(total_hours = contact_hours * students)

Trine
## # A tibble: 149 x 4
##    company          contact_hours students total_hours
##    <chr>                    <dbl>    <dbl>       <dbl>
##  1 Trine University            31       20         620
##  2 Trine University            22       16         352
##  3 Trine University            22       16         352
##  4 Trine University            22       16         352
##  5 Trine University            22       19         418
##  6 Trine University            22       19         418
##  7 Trine University            22       15         330
##  8 Trine University            22       15         330
##  9 Trine University            22       15         330
## 10 Trine University             7       28         196
## # … with 139 more rows
sum(Trine[c(4)], na.rm=TRUE)
## [1] 33574
library(janitor)
IMP <- JANI_Results %>%
  janitor::clean_names() %>%
  filter(company == "Indiana Michigan Power") %>%
  select(company, contact_hours, students) %>%
  mutate(total_hours = contact_hours * students)

IMP
## # A tibble: 31 x 4
##    company                contact_hours students total_hours
##    <chr>                          <dbl>    <dbl>       <dbl>
##  1 Indiana Michigan Power            25       25         625
##  2 Indiana Michigan Power             5       18          90
##  3 Indiana Michigan Power             5       27         135
##  4 Indiana Michigan Power             5       18          90
##  5 Indiana Michigan Power             5       26         130
##  6 Indiana Michigan Power             5       18          90
##  7 Indiana Michigan Power             5       26         130
##  8 Indiana Michigan Power             5       26         130
##  9 Indiana Michigan Power            29       13         377
## 10 Indiana Michigan Power             5       25         125
## # … with 21 more rows
sum(IMP[c(4)], na.rm=TRUE)
## [1] 3037

These 5 by total hours

  1. Manchester University (396631 Total Hours)
  2. Trine University (372244 Total Hours)
  3. Purdue University (371813 Total Hours)
  4. Chase (360068 Total Hours)
  5. Indiana Michigan Power (335503 Total Hours)

Combined Top 10 by total hours

  1. Manchester University (396631 Total Hours)
  2. Trine University (372244 Total Hours)
  3. Purdue University (371813 Total Hours)
  4. Chase (360068 Total Hours)
  5. Indiana Michigan Power (335503 Total Hours
  6. Purdue University Fort Wayne (314991 Total Hours)
  7. Indiana Tech (233084 Total Hours)
  8. Lake City Bank (143180 Total Hours)
  9. 1st Source Bank (104926 Total Hours)
  10. Lincoln Financial Group (85065 Total Hours)

Chart of top 10

library(ggplot2)
library(dplyr)
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
Company <- c("Manchester University", "Trine University", "Purdue University", "Chase", "Indiana Michigan Power", "Purdue University Fort Wayne", "Indiana Tech", "Lake City Bank", "1st Source Bank", "Lincoln Financial Group")
Total_Hours <- c(396631, 372244, 371813, 360068, 335503, 314991, 233084, 143180, 104926, 85065)
TopTen <- data.frame(Company, Total_Hours)
TopTen 
##                         Company Total_Hours
## 1         Manchester University      396631
## 2              Trine University      372244
## 3             Purdue University      371813
## 4                         Chase      360068
## 5        Indiana Michigan Power      335503
## 6  Purdue University Fort Wayne      314991
## 7                  Indiana Tech      233084
## 8                Lake City Bank      143180
## 9               1st Source Bank      104926
## 10      Lincoln Financial Group       85065
ggplot(TopTen, aes(x = reorder(Company, Total_Hours), y = Total_Hours, fill = Company)) +
  geom_col() +
  labs(
    x = "Company",
    y = "Total Hours",
    fill = "Company",
    title = "Top Ten Companies by Total Hours") +
  theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
  scale_y_continuous(labels = label_number(suffix = "K", scale = 1e-3))