library(tidyverse)
## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'tibble'
## Warning: replacing previous import 'ellipsis::check_dots_unnamed' by
## 'rlang::check_dots_unnamed' when loading 'tibble'
## Warning: replacing previous import 'ellipsis::check_dots_used' by
## 'rlang::check_dots_used' when loading 'tibble'
## Warning: replacing previous import 'ellipsis::check_dots_empty' by
## 'rlang::check_dots_empty' when loading 'tibble'
## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'pillar'
## Warning: replacing previous import 'ellipsis::check_dots_unnamed' by
## 'rlang::check_dots_unnamed' when loading 'pillar'
## Warning: replacing previous import 'ellipsis::check_dots_used' by
## 'rlang::check_dots_used' when loading 'pillar'
## Warning: replacing previous import 'ellipsis::check_dots_empty' by
## 'rlang::check_dots_empty' when loading 'pillar'
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.4 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readr)
JANI_Results <- read_csv("JANI - Results.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## `Program Year` = col_character(),
## Company = col_character(),
## School = col_character(),
## Grade = col_character(),
## Program = col_character(),
## `Class ID` = col_character(),
## `Contact hours` = col_double(),
## Students = col_double()
## )
## Warning: 374 parsing failures.
## row col expected actual file
## 1402 Students no trailing characters 3,763 'JANI - Results.csv'
## 1425 Students no trailing characters 3,763 'JANI - Results.csv'
## 1617 Students no trailing characters 2,729 'JANI - Results.csv'
## 1618 Students no trailing characters 2,729 'JANI - Results.csv'
## 1619 Students no trailing characters 2,729 'JANI - Results.csv'
## .... ........ ...................... ...... ....................
## See problems(...) for more details.
#View(JANI_Results)
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
JANI_Results %>%
janitor::clean_names() %>%
mutate(totalhrs = contact_hours * students) %>%
arrange(desc(totalhrs))
## # A tibble: 24,034 x 9
## program_year company school grade program class_id contact_hours students
## <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 Program yea… Midwes… Tippe… 12 JA Eco… CL-5818… 72 53
## 2 Program yea… Allen … Wabas… 9 JA Ins… CL-1424… 6 611
## 3 Program yea… Chase Wabas… 9 JA Ins… CL-1424… 6 611
## 4 Program yea… Coldwe… Wabas… 9 JA Ins… CL-1424… 6 611
## 5 Program yea… Conten… Wabas… 9 JA Ins… CL-1424… 6 611
## 6 Program yea… Fort F… Wabas… 9 JA Ins… CL-1424… 6 611
## 7 Program yea… Indian… Wabas… 9 JA Ins… CL-1424… 6 611
## 8 Program yea… Indian… Wabas… 9 JA Ins… CL-1424… 6 611
## 9 Program yea… Nation… Wabas… 9 JA Ins… CL-1424… 6 611
## 10 Program yea… Nation… Wabas… 9 JA Ins… CL-1424… 6 611
## # … with 24,024 more rows, and 1 more variable: totalhrs <dbl>
JANI_Results %>%
janitor::clean_names() %>%
count(company) %>%
arrange(desc(n))
## # A tibble: 3,684 x 2
## company n
## <chr> <int>
## 1 Purdue University Fort Wayne 1096
## 2 Lake City Bank 797
## 3 1st Source Bank 571
## 4 Indiana Tech 567
## 5 Lincoln Financial Group 445
## 6 3Rivers Federal Credit Union 308
## 7 Ivy Tech Community College 288
## 8 University of Saint Francis 186
## 9 First Merchants Bank 178
## 10 Zimmer Biomet 169
## # … with 3,674 more rows
library(janitor)
JANI_Results %>%
janitor::clean_names() %>%
group_by(company)%>%
count(company, students)%>%
mutate(n_students = n * students) %>%
arrange(desc(n_students))
## # A tibble: 11,112 x 4
## # Groups: company [3,684]
## company students n n_students
## <chr> <dbl> <int> <dbl>
## 1 Purdue University Fort Wayne 22 106 2332
## 2 Purdue University Fort Wayne 26 88 2288
## 3 Purdue University Fort Wayne 24 92 2208
## 4 Purdue University Fort Wayne 25 87 2175
## 5 Lake City Bank 25 83 2075
## 6 Purdue University Fort Wayne 23 89 2047
## 7 Lake City Bank 23 83 1909
## 8 Lake City Bank 24 79 1896
## 9 Lake City Bank 22 83 1826
## 10 Purdue University Fort Wayne 21 83 1743
## # … with 11,102 more rows
library(janitor)
JANI_Results %>%
janitor::clean_names() %>%
group_by(company) %>%
count(company, students) %>%
mutate(n_students = n * students) %>%
arrange(desc(n_students))
## # A tibble: 11,112 x 4
## # Groups: company [3,684]
## company students n n_students
## <chr> <dbl> <int> <dbl>
## 1 Purdue University Fort Wayne 22 106 2332
## 2 Purdue University Fort Wayne 26 88 2288
## 3 Purdue University Fort Wayne 24 92 2208
## 4 Purdue University Fort Wayne 25 87 2175
## 5 Lake City Bank 25 83 2075
## 6 Purdue University Fort Wayne 23 89 2047
## 7 Lake City Bank 23 83 1909
## 8 Lake City Bank 24 79 1896
## 9 Lake City Bank 22 83 1826
## 10 Purdue University Fort Wayne 21 83 1743
## # … with 11,102 more rows
library(janitor)
JANI_Results %>%
janitor::clean_names() %>%
count(company) %>%
arrange(desc(n))
## # A tibble: 3,684 x 2
## company n
## <chr> <int>
## 1 Purdue University Fort Wayne 1096
## 2 Lake City Bank 797
## 3 1st Source Bank 571
## 4 Indiana Tech 567
## 5 Lincoln Financial Group 445
## 6 3Rivers Federal Credit Union 308
## 7 Ivy Tech Community College 288
## 8 University of Saint Francis 186
## 9 First Merchants Bank 178
## 10 Zimmer Biomet 169
## # … with 3,674 more rows
#mutate(Jerry = contact_hours * students) %>%
#arrange(desc(students))
This shows the companies with the most appearances in the dataset are Purdue University Fort Wayne (1096), Lake City Bank (797), 1st Source Bank (571), Indiana Tech (567), and Lincoln Financial Group (445)
library(janitor)
PFW <- JANI_Results %>%
janitor::clean_names() %>%
filter(company == "Purdue University Fort Wayne") %>%
select(company, contact_hours, students) %>%
mutate(total_hours = contact_hours * students) #total_hours is equal to the students taught in each session multiplied by the number of hours that JA was working with them
PFW
## # A tibble: 1,096 x 4
## company contact_hours students total_hours
## <chr> <dbl> <dbl> <dbl>
## 1 Purdue University Fort Wayne 22 14 308
## 2 Purdue University Fort Wayne 5 22 110
## 3 Purdue University Fort Wayne 5 22 110
## 4 Purdue University Fort Wayne 5 21 105
## 5 Purdue University Fort Wayne 6 35 210
## 6 Purdue University Fort Wayne 5 19 95
## 7 Purdue University Fort Wayne 6 30 180
## 8 Purdue University Fort Wayne 5 23 115
## 9 Purdue University Fort Wayne 5 20 100
## 10 Purdue University Fort Wayne 5 20 100
## # … with 1,086 more rows
sum(PFW[c(4)], na.rm=TRUE)
## [1] 156945
library(janitor)
LakeCity <- JANI_Results %>%
janitor::clean_names() %>%
filter(company == "Lake City Bank") %>%
select(company, contact_hours, students) %>%
mutate(total_hours = contact_hours * students) #total_hours is equal to the students taught in each session multiplied by the number of hours that JA was working with them
LakeCity
## # A tibble: 797 x 4
## company contact_hours students total_hours
## <chr> <dbl> <dbl> <dbl>
## 1 Lake City Bank 29 22 638
## 2 Lake City Bank 20 23 460
## 3 Lake City Bank 22 14 308
## 4 Lake City Bank 21 28 588
## 5 Lake City Bank 21 25 525
## 6 Lake City Bank 5 20 100
## 7 Lake City Bank 5 20 100
## 8 Lake City Bank 6 20 120
## 9 Lake City Bank 5 18 90
## 10 Lake City Bank 5 20 100
## # … with 787 more rows
sum(LakeCity[c(4)], na.rm=TRUE)
## [1] 143180
library(janitor)
FSource <- JANI_Results %>%
janitor::clean_names() %>%
filter(company == "1st Source Bank") %>%
select(company, contact_hours, students) %>%
mutate(total_hours = contact_hours * students) #total_hours is equal to the students taught in each session multiplied by the number of hours that JA was working with them
FSource
## # A tibble: 571 x 4
## company contact_hours students total_hours
## <chr> <dbl> <dbl> <dbl>
## 1 1st Source Bank 6 27 162
## 2 1st Source Bank 6 23 138
## 3 1st Source Bank 6 22 132
## 4 1st Source Bank 45 25 1125
## 5 1st Source Bank 6 28 168
## 6 1st Source Bank 6 29 174
## 7 1st Source Bank 6 30 180
## 8 1st Source Bank 6 26 156
## 9 1st Source Bank 6 26 156
## 10 1st Source Bank 5 7 35
## # … with 561 more rows
sum(FSource[c(4)], na.rm=TRUE)
## [1] 104926
library(janitor)
IT <- JANI_Results %>%
janitor::clean_names() %>%
filter(company == "Indiana Tech") %>%
select(company, contact_hours, students) %>%
mutate(total_hours = contact_hours * students) #total_hours is equal to the students taught in each session multiplied by the number of hours that JA was working with them
IT
## # A tibble: 567 x 4
## company contact_hours students total_hours
## <chr> <dbl> <dbl> <dbl>
## 1 Indiana Tech 4 23 92
## 2 Indiana Tech 21 24 504
## 3 Indiana Tech 5 23 115
## 4 Indiana Tech 6 23 138
## 5 Indiana Tech 45 11 495
## 6 Indiana Tech 5 12 60
## 7 Indiana Tech 5 22 110
## 8 Indiana Tech 5 25 125
## 9 Indiana Tech 6 18 108
## 10 Indiana Tech 6 14 84
## # … with 557 more rows
sum(IT[c(4)], na.rm=TRUE)
## [1] 81242
library(janitor)
LFG <- JANI_Results %>%
janitor::clean_names() %>%
filter(company == "Lincoln Financial Group") %>%
select(company, contact_hours, students) %>%
mutate(total_hours = contact_hours * students) #total_hours is equal to the students taught in each session multiplied by the number of hours that JA was working with them
LFG
## # A tibble: 445 x 4
## company contact_hours students total_hours
## <chr> <dbl> <dbl> <dbl>
## 1 Lincoln Financial Group 5 28 140
## 2 Lincoln Financial Group 5 28 140
## 3 Lincoln Financial Group 5 13 65
## 4 Lincoln Financial Group 5 22 110
## 5 Lincoln Financial Group 6 18 108
## 6 Lincoln Financial Group 5 26 130
## 7 Lincoln Financial Group 5 23 115
## 8 Lincoln Financial Group 5 24 120
## 9 Lincoln Financial Group 5 25 125
## 10 Lincoln Financial Group 7 27 189
## # … with 435 more rows
sum(LFG[c(4)], na.rm=TRUE)
## [1] 85065
library(janitor)
JANI_Results %>%
janitor::clean_names() %>%
mutate(Jerry = contact_hours * students) %>%
arrange(desc(Jerry))
## # A tibble: 24,034 x 9
## program_year company school grade program class_id contact_hours students
## <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 Program yea… Midwes… Tippe… 12 JA Eco… CL-5818… 72 53
## 2 Program yea… Allen … Wabas… 9 JA Ins… CL-1424… 6 611
## 3 Program yea… Chase Wabas… 9 JA Ins… CL-1424… 6 611
## 4 Program yea… Coldwe… Wabas… 9 JA Ins… CL-1424… 6 611
## 5 Program yea… Conten… Wabas… 9 JA Ins… CL-1424… 6 611
## 6 Program yea… Fort F… Wabas… 9 JA Ins… CL-1424… 6 611
## 7 Program yea… Indian… Wabas… 9 JA Ins… CL-1424… 6 611
## 8 Program yea… Indian… Wabas… 9 JA Ins… CL-1424… 6 611
## 9 Program yea… Nation… Wabas… 9 JA Ins… CL-1424… 6 611
## 10 Program yea… Nation… Wabas… 9 JA Ins… CL-1424… 6 611
## # … with 24,024 more rows, and 1 more variable: Jerry <dbl>
library(janitor)
JANI_Results %>%
janitor::clean_names() %>%
group_by(company) %>%
count(company, students) %>%
mutate(n_students = n * students) %>%
arrange(desc(n_students))
## # A tibble: 11,112 x 4
## # Groups: company [3,684]
## company students n n_students
## <chr> <dbl> <int> <dbl>
## 1 Purdue University Fort Wayne 22 106 2332
## 2 Purdue University Fort Wayne 26 88 2288
## 3 Purdue University Fort Wayne 24 92 2208
## 4 Purdue University Fort Wayne 25 87 2175
## 5 Lake City Bank 25 83 2075
## 6 Purdue University Fort Wayne 23 89 2047
## 7 Lake City Bank 23 83 1909
## 8 Lake City Bank 24 79 1896
## 9 Lake City Bank 22 83 1826
## 10 Purdue University Fort Wayne 21 83 1743
## # … with 11,102 more rows
The Companies with the most students taught in a single session muliplied by how many times this same amount of students was taught are Manchester University (60208 students), Chase (56445 students), Purdue University (56445 students), Trine University (56445 students), and Indiana Michigan Power (52682 students). This was determined by counting by the company and students, multiplying the students by the number of appearances of this number of students and that company (in this case ManU has 16 appearances of 3763 students) to determine how many students each company worked with when focusing on their largest student size. This is not the total number of students, I will be doing that below with these top 5 companies.
For ManU, the su
library(janitor)
ManU <- JANI_Results %>%
janitor::clean_names() %>%
filter(company == "Manchester University, College Of Pharmacy") %>%
select(company, contact_hours, students) %>%
mutate(total_hours = contact_hours * students) #total_hours is equal to the students taught in each session multiplied by the number of hours that JA was working with them
ManU
## # A tibble: 37 x 4
## company contact_hours students total_hours
## <chr> <dbl> <dbl> <dbl>
## 1 Manchester University, College Of Pharmacy 25 25 625
## 2 Manchester University, College Of Pharmacy 5 26 130
## 3 Manchester University, College Of Pharmacy 5 24 120
## 4 Manchester University, College Of Pharmacy 5 24 120
## 5 Manchester University, College Of Pharmacy 5 24 120
## 6 Manchester University, College Of Pharmacy 5 22 110
## 7 Manchester University, College Of Pharmacy 5 22 110
## 8 Manchester University, College Of Pharmacy 5 23 115
## 9 Manchester University, College Of Pharmacy 5 19 95
## 10 Manchester University, College Of Pharmacy 5 19 95
## # … with 27 more rows
sum(ManU[c(4)], na.rm=TRUE)
## [1] 2635
library(janitor)
Chase <- JANI_Results %>%
janitor::clean_names() %>%
filter(company == "Chase") %>%
select(company, contact_hours, students) %>%
mutate(total_hours = contact_hours * students)
Chase
## # A tibble: 114 x 4
## company contact_hours students total_hours
## <chr> <dbl> <dbl> <dbl>
## 1 Chase 5 29 145
## 2 Chase 5 21 105
## 3 Chase 28 24 672
## 4 Chase 6 19 114
## 5 Chase 34 24 816
## 6 Chase 28 21 588
## 7 Chase 29 24 696
## 8 Chase 6 26 156
## 9 Chase 38 19 722
## 10 Chase 5 19 95
## # … with 104 more rows
sum(Chase[c(4)], na.rm=TRUE)
## [1] 21398
library(janitor)
Purdue <- JANI_Results %>%
janitor::clean_names() %>%
filter(company == "Purdue University") %>%
select(company, contact_hours, students) %>%
mutate(total_hours = contact_hours * students)
Purdue
## # A tibble: 134 x 4
## company contact_hours students total_hours
## <chr> <dbl> <dbl> <dbl>
## 1 Purdue University 5 19 95
## 2 Purdue University 5 22 110
## 3 Purdue University 5 22 110
## 4 Purdue University 5 22 110
## 5 Purdue University 5 22 110
## 6 Purdue University 5 30 150
## 7 Purdue University 21 22 462
## 8 Purdue University 37 25 925
## 9 Purdue University 23 22 506
## 10 Purdue University 24 21 504
## # … with 124 more rows
sum(Purdue[c(4)], na.rm=TRUE)
## [1] 33143
library(janitor)
Trine <- JANI_Results %>%
janitor::clean_names() %>%
filter(company == "Trine University") %>%
select(company, contact_hours, students) %>%
mutate(total_hours = contact_hours * students)
Trine
## # A tibble: 149 x 4
## company contact_hours students total_hours
## <chr> <dbl> <dbl> <dbl>
## 1 Trine University 31 20 620
## 2 Trine University 22 16 352
## 3 Trine University 22 16 352
## 4 Trine University 22 16 352
## 5 Trine University 22 19 418
## 6 Trine University 22 19 418
## 7 Trine University 22 15 330
## 8 Trine University 22 15 330
## 9 Trine University 22 15 330
## 10 Trine University 7 28 196
## # … with 139 more rows
sum(Trine[c(4)], na.rm=TRUE)
## [1] 33574
library(janitor)
IMP <- JANI_Results %>%
janitor::clean_names() %>%
filter(company == "Indiana Michigan Power") %>%
select(company, contact_hours, students) %>%
mutate(total_hours = contact_hours * students)
IMP
## # A tibble: 31 x 4
## company contact_hours students total_hours
## <chr> <dbl> <dbl> <dbl>
## 1 Indiana Michigan Power 25 25 625
## 2 Indiana Michigan Power 5 18 90
## 3 Indiana Michigan Power 5 27 135
## 4 Indiana Michigan Power 5 18 90
## 5 Indiana Michigan Power 5 26 130
## 6 Indiana Michigan Power 5 18 90
## 7 Indiana Michigan Power 5 26 130
## 8 Indiana Michigan Power 5 26 130
## 9 Indiana Michigan Power 29 13 377
## 10 Indiana Michigan Power 5 25 125
## # … with 21 more rows
sum(IMP[c(4)], na.rm=TRUE)
## [1] 3037
library(ggplot2)
library(dplyr)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
Company <- c("Manchester University", "Trine University", "Purdue University", "Chase", "Indiana Michigan Power", "Purdue University Fort Wayne", "Indiana Tech", "Lake City Bank", "1st Source Bank", "Lincoln Financial Group")
Total_Hours <- c(396631, 372244, 371813, 360068, 335503, 314991, 233084, 143180, 104926, 85065)
TopTen <- data.frame(Company, Total_Hours)
TopTen
## Company Total_Hours
## 1 Manchester University 396631
## 2 Trine University 372244
## 3 Purdue University 371813
## 4 Chase 360068
## 5 Indiana Michigan Power 335503
## 6 Purdue University Fort Wayne 314991
## 7 Indiana Tech 233084
## 8 Lake City Bank 143180
## 9 1st Source Bank 104926
## 10 Lincoln Financial Group 85065
ggplot(TopTen, aes(x = reorder(Company, Total_Hours), y = Total_Hours, fill = Company)) +
geom_col() +
labs(
x = "Company",
y = "Total Hours",
fill = "Company",
title = "Top Ten Companies by Total Hours") +
theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
scale_y_continuous(labels = label_number(suffix = "K", scale = 1e-3))