class_activity_4

library(tidyverse)

setwd("C:/Users/chesl/Desktop/DATA110")

pfizer <- read_csv("pfizer.csv")
fda <- read_csv("fda.csv")
head(pfizer)
# A tibble: 6 × 10
  org_indiv     first_plus first_name last_name city  state category  cash other
  <chr>         <chr>      <chr>      <chr>     <chr> <chr> <chr>    <dbl> <dbl>
1 3-D MEDICAL … STEVEN BR… STEVEN     DEITELZW… NEW … LA    Profess…  2625     0
2 AA DOCTORS, … AAKASH MO… AAKASH     AHUJA     PASO… CA    Expert-…  1000     0
3 ABBO, LILIAN… LILIAN MA… LILIAN     ABBO      MIAMI FL    Busines…     0   448
4 ABBO, LILIAN… LILIAN MA… LILIAN     ABBO      MIAMI FL    Meals        0   119
5 ABBO, LILIAN… LILIAN MA… LILIAN     ABBO      MIAMI FL    Profess…  1800     0
6 ABDULLAH RAF… ABDULLAH   ABDULLAH   RAFFEE    FLINT MI    Expert-…   750     0
# ℹ 1 more variable: total <dbl>
head(fda)
# A tibble: 6 × 5
  name_last name_first name_middle issued     office                            
  <chr>     <chr>      <chr>       <chr>      <chr>                             
1 ADELGLASS JEFFREY    M.          5/25/1999  Center for Drug Evaluation and Re…
2 ADKINSON  N.         FRANKLIN    4/19/2000  Center for Biologics Evaluation a…
3 ALLEN     MARK       S.          1/28/2002  Center for Devices and Radiologic…
4 AMSTERDAM DANIEL     <NA>        11/17/2004 Center for Biologics Evaluation a…
5 AMSTUTZ   HARLAN     C.          7/19/2004  Center for Devices and Radiologic…
6 ANDERSON  C.         JOSEPH      2/25/2000  Center for Devices and Radiologic…
summary(pfizer)
  org_indiv          first_plus         first_name         last_name        
 Length:10087       Length:10087       Length:10087       Length:10087      
 Class :character   Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character   Mode  :character  
                                                                            
                                                                            
                                                                            
                                                                            
     city              state             category              cash        
 Length:10087       Length:10087       Length:10087       Min.   :      0  
 Class :character   Class :character   Class :character   1st Qu.:      0  
 Mode  :character   Mode  :character   Mode  :character   Median :      0  
                                                          Mean   :   3241  
                                                          3rd Qu.:   2000  
                                                          Max.   :1185466  
                                                          NA's   :1        
     other             total        
 Min.   :    0.0   Min.   :      0  
 1st Qu.:    0.0   1st Qu.:    191  
 Median :   41.0   Median :    750  
 Mean   :  266.5   Mean   :   3507  
 3rd Qu.:  262.0   3rd Qu.:   2000  
 Max.   :27681.0   Max.   :1185466  
 NA's   :3                          
ca_expert_10000 <- pfizer |>
  filter(state == "CA" & total >= 10000 & category == "Expert-Led Forums") |>
  arrange(desc(total))
not_ca_expert_10000 <- pfizer |>
  filter(state != "CA" & total >= 10000 & category=="Expert-Led Forums") |>
  arrange(desc(total))
ca_ny_tx_fl_prof_top20 <- pfizer |>
  filter((state=="CA" | state == "NY" | state == "TX" | state == "FL") & category == "Professional Advising") |>
  head(6) |>
  arrange(desc(total))
expert_advice <- pfizer |>
  filter(grepl("Expert|Professional", category)) |>
  arrange(last_name, first_name)

not_expert_advice <- pfizer |>
  filter(!grepl("Expert|Professional", category)) |>
  arrange(last_name, first_name)
pfizer2 <- bind_rows(expert_advice, not_expert_advice)
write_csv(expert_advice, "expert_advice.csv", na="")
state_category_summary <- pfizer |>
  group_by(state, category) |>
  summarize(sum = sum(total), median = median(total), count = n()) |>
  arrange(state, category)
fda$issued <- as.Date(fda$issued, "%m/%d/%Y")

post2005 <- fda |>
  filter(issued >= "2005-01-01") |>
  arrange(issued)
letters_year <- fda |>
  mutate(year = format(issued, "%Y")) |>
  group_by(year) |>
  summarize(letters=n())
fda <- fda |>
  mutate(days_elapsed = Sys.Date() - issued,
  weeks_elapsed = difftime(Sys.Date(), issued, units = "weeks"))
expert_warned_inner <- inner_join(pfizer, fda, by=c("first_name" = "name_first", "last_name" = "name_last")) |>
  filter(category=="Expert-Led Forums")

expert_warned_semi <- semi_join(pfizer, fda, by=c("first_name" = "name_first", "last_name" = "name_last")) |>
  filter(category=="Expert-Led Forums")
expert_warned <- inner_join(pfizer, fda, by=c("first_name" = "name_first", "last_name" = "name_last")) |>
  filter(category=="Expert-Led Forums") |>
  select(first_plus, last_name, city, state, total, issued)

expert_warned <- inner_join(pfizer, fda, by=c("first_name" = "name_first", "last_name" = "name_last")) |>
  filter(category=="Expert-Led Forums") |>
  select(2:5,10,12)