library(readr)
## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'tibble'
## Warning: replacing previous import 'ellipsis::check_dots_unnamed' by
## 'rlang::check_dots_unnamed' when loading 'tibble'
## Warning: replacing previous import 'ellipsis::check_dots_used' by
## 'rlang::check_dots_used' when loading 'tibble'
## Warning: replacing previous import 'ellipsis::check_dots_empty' by
## 'rlang::check_dots_empty' when loading 'tibble'
## Warning: replacing previous import 'lifecycle::last_warnings' by
## 'rlang::last_warnings' when loading 'pillar'
## Warning: replacing previous import 'ellipsis::check_dots_unnamed' by
## 'rlang::check_dots_unnamed' when loading 'pillar'
## Warning: replacing previous import 'ellipsis::check_dots_used' by
## 'rlang::check_dots_used' when loading 'pillar'
## Warning: replacing previous import 'ellipsis::check_dots_empty' by
## 'rlang::check_dots_empty' when loading 'pillar'
library(readxl)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ dplyr   1.0.2
## ✓ tibble  3.0.4     ✓ stringr 1.4.0
## ✓ tidyr   1.1.2     ✓ forcats 0.5.0
## ✓ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(ggplot2)
#Top 5 big events
JANIEVENTS <- read_csv("JANI Volunteer data set - Results.csv") %>%
  janitor::clean_names() %>%
  mutate(percentage = `contact_hours` * `students`) %>%
  arrange(desc(percentage))
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   `Program Year` = col_character(),
##   Company = col_character(),
##   School = col_character(),
##   Grade = col_character(),
##   Program = col_character(),
##   `Class ID` = col_character(),
##   `Contact hours` = col_double(),
##   Students = col_double()
## )
## Warning: 374 parsing failures.
##  row      col               expected actual                                    file
## 1402 Students no trailing characters  3,763 'JANI Volunteer data set - Results.csv'
## 1425 Students no trailing characters  3,763 'JANI Volunteer data set - Results.csv'
## 1617 Students no trailing characters  2,729 'JANI Volunteer data set - Results.csv'
## 1618 Students no trailing characters  2,729 'JANI Volunteer data set - Results.csv'
## 1619 Students no trailing characters  2,729 'JANI Volunteer data set - Results.csv'
## .... ........ ...................... ...... .......................................
## See problems(...) for more details.
  JANIEVENTS <- filter(JANIEVENTS, percentage >= 3666)
JANIEVENTS
## # A tibble: 14 x 9
##    program_year company school grade program class_id contact_hours students
##    <chr>        <chr>   <chr>  <chr> <chr>   <chr>            <dbl>    <dbl>
##  1 Program yea… Midwes… Tippe… 12    JA Eco… CL-5818…            72       53
##  2 Program yea… Allen … Wabas… 9     JA Ins… CL-1424…             6      611
##  3 Program yea… Chase   Wabas… 9     JA Ins… CL-1424…             6      611
##  4 Program yea… Coldwe… Wabas… 9     JA Ins… CL-1424…             6      611
##  5 Program yea… Conten… Wabas… 9     JA Ins… CL-1424…             6      611
##  6 Program yea… Fort F… Wabas… 9     JA Ins… CL-1424…             6      611
##  7 Program yea… Indian… Wabas… 9     JA Ins… CL-1424…             6      611
##  8 Program yea… Indian… Wabas… 9     JA Ins… CL-1424…             6      611
##  9 Program yea… Nation… Wabas… 9     JA Ins… CL-1424…             6      611
## 10 Program yea… Nation… Wabas… 9     JA Ins… CL-1424…             6      611
## 11 Program yea… Northe… Wabas… 9     JA Ins… CL-1424…             6      611
## 12 Program yea… Northe… Wabas… 9     JA Ins… CL-1424…             6      611
## 13 Program yea… One Lu… Wabas… 9     JA Ins… CL-1424…             6      611
## 14 Program yea… The Wo… Wabas… 9     JA Ins… CL-1424…             6      611
## # … with 1 more variable: percentage <dbl>
ggplot(JANIEVENTS, aes(x = company)) +
  geom_boxplot()

ggplot(JANIEVENTS, aes(x = company,
                  y = percentage)) +
  geom_col() +
  labs(
    x = "company",
    y = "percentage",
    title = "Each Company's Percentage (contact hours * students)",
    subtitle = "Top Companies"
    )

#Top 5 interactions
JANIINTERACTIONS <- read_csv("JANI Volunteer data set - Results.csv") %>%
  janitor::clean_names() %>%
  count(company) %>%
  arrange(desc(n))
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   `Program Year` = col_character(),
##   Company = col_character(),
##   School = col_character(),
##   Grade = col_character(),
##   Program = col_character(),
##   `Class ID` = col_character(),
##   `Contact hours` = col_double(),
##   Students = col_double()
## )
## Warning: 374 parsing failures.
##  row      col               expected actual                                    file
## 1402 Students no trailing characters  3,763 'JANI Volunteer data set - Results.csv'
## 1425 Students no trailing characters  3,763 'JANI Volunteer data set - Results.csv'
## 1617 Students no trailing characters  2,729 'JANI Volunteer data set - Results.csv'
## 1618 Students no trailing characters  2,729 'JANI Volunteer data set - Results.csv'
## 1619 Students no trailing characters  2,729 'JANI Volunteer data set - Results.csv'
## .... ........ ...................... ...... .......................................
## See problems(...) for more details.
JANIINTERACTIONS <- filter(JANIINTERACTIONS, n >= 445)
 
ggplot(JANIINTERACTIONS, aes(x = company)) +
  geom_boxplot()

ggplot(JANIINTERACTIONS, aes(x = company,
                  y = n)) +
  geom_col() +
  labs(
    x = "Companies",
    y = "Contact Hours",
    title = "Amount of Contact Hours with Students",
    subtitle = "Top 5 Companies"
    )