class activity

getwd()
[1] "/Users/rahwahagos"
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.2.0     ✔ readr     2.2.0
✔ forcats   1.0.1     ✔ stringr   1.6.0
✔ ggplot2   4.0.3     ✔ tibble    3.3.1
✔ lubridate 1.9.5     ✔ tidyr     1.3.2
✔ purrr     1.2.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
pfizer <- read_csv("/Users/rahwahagos/Downloads/pfizer.csv")
Rows: 10087 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (7): org_indiv, first_plus, first_name, last_name, city, state, category
dbl (3): cash, other, total

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
fda <- read_csv("/Users/rahwahagos/Downloads/fda.csv")
Rows: 272 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): name_last, name_first, name_middle, issued, office

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(pfizer)
# A tibble: 6 × 10
  org_indiv     first_plus first_name last_name city  state category  cash other
  <chr>         <chr>      <chr>      <chr>     <chr> <chr> <chr>    <dbl> <dbl>
1 3-D MEDICAL … STEVEN BR… STEVEN     DEITELZW… NEW … LA    Profess…  2625     0
2 AA DOCTORS, … AAKASH MO… AAKASH     AHUJA     PASO… CA    Expert-…  1000     0
3 ABBO, LILIAN… LILIAN MA… LILIAN     ABBO      MIAMI FL    Busines…     0   448
4 ABBO, LILIAN… LILIAN MA… LILIAN     ABBO      MIAMI FL    Meals        0   119
5 ABBO, LILIAN… LILIAN MA… LILIAN     ABBO      MIAMI FL    Profess…  1800     0
6 ABDULLAH RAF… ABDULLAH   ABDULLAH   RAFFEE    FLINT MI    Expert-…   750     0
# ℹ 1 more variable: total <dbl>
head(fda)
# A tibble: 6 × 5
  name_last name_first name_middle issued     office                            
  <chr>     <chr>      <chr>       <chr>      <chr>                             
1 ADELGLASS JEFFREY    M.          5/25/1999  Center for Drug Evaluation and Re…
2 ADKINSON  N.         FRANKLIN    4/19/2000  Center for Biologics Evaluation a…
3 ALLEN     MARK       S.          1/28/2002  Center for Devices and Radiologic…
4 AMSTERDAM DANIEL     <NA>        11/17/2004 Center for Biologics Evaluation a…
5 AMSTUTZ   HARLAN     C.          7/19/2004  Center for Devices and Radiologic…
6 ANDERSON  C.         JOSEPH      2/25/2000  Center for Devices and Radiologic…
class(fda$issued) 
[1] "character"
summary(pfizer)  # summary of pfizer data
  org_indiv          first_plus         first_name         last_name        
 Length:10087       Length:10087       Length:10087       Length:10087      
 Class :character   Class :character   Class :character   Class :character  
 Mode  :character   Mode  :character   Mode  :character   Mode  :character  
                                                                            
                                                                            
                                                                            
                                                                            
     city              state             category              cash        
 Length:10087       Length:10087       Length:10087       Min.   :      0  
 Class :character   Class :character   Class :character   1st Qu.:      0  
 Mode  :character   Mode  :character   Mode  :character   Median :      0  
                                                          Mean   :   3241  
                                                          3rd Qu.:   2000  
                                                          Max.   :1185466  
                                                          NA's   :1        
     other             total        
 Min.   :    0.0   Min.   :      0  
 1st Qu.:    0.0   1st Qu.:    191  
 Median :   41.0   Median :    750  
 Mean   :  266.5   Mean   :   3507  
 3rd Qu.:  262.0   3rd Qu.:   2000  
 Max.   :27681.0   Max.   :1185466  
 NA's   :3                          
# doctors in California who were paid $10,000 or more by Pfizer to run "Expert-Led Forums."
ca_expert_10000 <- pfizer %>%
  filter(state == "CA" & total >= 10000 & category == "Expert-Led Forums")
ca_expert_10000
# A tibble: 31 × 10
   org_indiv    first_plus first_name last_name city  state category  cash other
   <chr>        <chr>      <chr>      <chr>     <chr> <chr> <chr>    <dbl> <dbl>
 1 ANDREW M BL… ANDREW M   ANDREW     BLUMENFE… DEL … CA    Expert-… 20500     0
 2 BANDARI, DA… DANIEL SH… DANIEL     BANDARI   LOS … CA    Expert-… 21000     0
 3 BAROYA, IVA… IVAN STEP… IVAN       BAROYA    BONI… CA    Expert-… 26400     0
 4 BECK, CLIFF… CLIFFORD … CLIFFORD   BECK      TORR… CA    Expert-… 10500     0
 5 CHOPRA, SHA… SHAGUN     SHAGUN     CHOPRA    SAN … CA    Expert-… 17250     0
 6 COLE, EMILY… EMILY ELI… EMILY      COLE      SAN … CA    Expert-… 12000     0
 7 EHRESMANN, … GLENN RIC… GLENN      EHRESMANN LOS … CA    Expert-… 12000     0
 8 FONAROW, GR… GREGG CUR… GREGG      FONAROW   LOS … CA    Expert-… 15000     0
 9 GINSBERG, D… DAVID ALAN DAVID      GINSBERG  LOS … CA    Expert-… 45750     0
10 HARBOUR, MI… MICHAEL J… MICHAEL    HARBOUR   PALO… CA    Expert-… 13500     0
# ℹ 21 more rows
# ℹ 1 more variable: total <dbl>
# doctors in California who were paid $10,000 or more by Pfizer to run "Expert-Led Forums."
ca_expert_10000_sorted <- pfizer %>%
  filter(state == "CA" & total >= 10000 & category == "Expert-Led Forums") %>%
  arrange(desc(total))
ca_expert_10000_sorted
# A tibble: 31 × 10
   org_indiv   first_plus first_name last_name city  state category   cash other
   <chr>       <chr>      <chr>      <chr>     <chr> <chr> <chr>     <dbl> <dbl>
 1 SACKS, GER… GERALD MI… GERALD     SACKS     SANT… CA    Expert-… 146500     0
 2 NIDES, MIT… MITCHELL   MITCHELL   NIDES     LOS … CA    Expert-…  70500     0
 3 POTKIN, ST… STEVEN GA… STEVEN     POTKIN    ORAN… CA    Expert-…  48350     0
 4 GINSBERG, … DAVID ALAN DAVID      GINSBERG  LOS … CA    Expert-…  45750     0
 5 LOUIE, SAM… SAMUEL     SAMUEL     LOUIE     SACR… CA    Expert-…  41250     0
 6 INSTITUTE … GURKIPAL   GURKIPAL   SINGH     WOOD… CA    Expert-…  40000     0
 7 BAROYA, IV… IVAN STEP… IVAN       BAROYA    BONI… CA    Expert-…  26400     0
 8 MATTHEW BU… MATTHEW J… MATTHEW    BUDOFF    MANH… CA    Expert-…  24000     0
 9 QUANG H NG… QUANG H    QUANG      NGUYEN    LA J… CA    Expert-…  22500     0
10 SCHROEDER,… JOHN SPEER JOHN       SCHROEDER STAN… CA    Expert-…  21500     0
# ℹ 21 more rows
# ℹ 1 more variable: total <dbl>
# Find doctors in states other than California who were paid $10,000 or more by Pfizer to run "Expert-Led Forums."
not_ca_expert_10000 <- pfizer %>%
  filter(state != "CA" & total >= 10000 & category=="Expert-Led Forums") %>%
  arrange(desc(total))
not_ca_expert_10000
# A tibble: 133 × 10
   org_indiv    first_plus first_name last_name city  state category  cash other
   <chr>        <chr>      <chr>      <chr>     <chr> <chr> <chr>    <dbl> <dbl>
 1 HESS, TODD … TODD MICH… TODD       HESS      SAIN… MN    Expert-… 79000     0
 2 MILLER, JOH… JOHN JOSE… JOHN       MILLER    EXET… NH    Expert-… 78000     0
 3 WEATHERS, V… VIVIAN JOY VIVIAN     WEATHERS  APEX  NC    Expert-… 75400     0
 4 ROBERT B NE… ROBERT BU… ROBERT     NETT      SAN … TX    Expert-… 60750     0
 5 SOLERA CONS… STEVEN AB… STEVEN     KAPLAN    CHAP… NY    Expert-… 56500     0
 6 GRIFFIN, JA… JAMES DALE JAMES      GRIFFIN   DALL… TX    Expert-… 54250     0
 7 D BRENT JOY… DAVID BRE… DAVID      JOYE      SUMM… NC    Expert-… 53500     0
 8 STUBBLEFIEL… MICHAEL D  MICHAEL    STUBBLEF… NEW … NY    Expert-… 50500     0
 9 COHEN, SETH… SETH ALEX… SETH       COHEN     SEAT… WA    Expert-… 44500     0
10 SUSSMAN, DA… DAVID OWEN DAVID      SUSSMAN   VOOR… NJ    Expert-… 42500     0
# ℹ 123 more rows
# ℹ 1 more variable: total <dbl>
# Find the 20 doctors across the four largest states (CA, TX, FL, NY) who were paid the most for professional advice.
ca_ny_tx_fl_prof_top20 <- pfizer %>%
  filter((state=="CA" | state == "NY" | state == "TX" | state == "FL") &     category == "Professional Advising") %>%
  arrange(desc(total)) %>%
  head(6)
ca_ny_tx_fl_prof_top20
# A tibble: 6 × 10
  org_indiv    first_plus first_name last_name city  state category   cash other
  <chr>        <chr>      <chr>      <chr>     <chr> <chr> <chr>     <dbl> <dbl>
1 BAILES, JOS… JOSEPH SW… JOSEPH     BAILES    AUST… TX    Profess… 105000     0
2 SAWYERS, CH… CHARLES L… CHARLES    SAWYERS   NEW … NY    Profess… 100000     0
3 MALENKA, RO… ROBERT CH… ROBERT     MALENKA   STAN… CA    Profess…  75566     0
4 BEUTLER, BR… BRUCE ALAN BRUCE      BEUTLER   DALL… TX    Profess…  50000     0
5 REGENTS OF … DAVID RAY… DAVID      GANDARA   IRVI… CA    Profess…  38500     0
6 PTACEK, LOU… LOUIS JOHN LOUIS      PTACEK    SAN … CA    Profess…  37588     0
# ℹ 1 more variable: total <dbl>
expert_advice <- pfizer %>%
  filter(category == "Expert-Led Forums" | category == "Professional Advising") %>%
  arrange(last_name, first_name) %>%
  head(20)
expert_advice
# A tibble: 20 × 10
   org_indiv    first_plus first_name last_name city  state category  cash other
   <chr>        <chr>      <chr>      <chr>     <chr> <chr> <chr>    <dbl> <dbl>
 1 ABBO, LILIA… LILIAN MA… LILIAN     ABBO      MIAMI FL    Profess…  1800     0
 2 ABEBE, SHEI… SHEILA Y   SHEILA     ABEBE     INDI… IN    Expert-…   825     0
 3 NEW YORK UN… JUDITH ANN JUDITH     ABERG     NEW … NY    Profess…  1750     0
 4 ABOLNIK, IG… IGOR Z     IGOR       ABOLNIK   PROVO UT    Expert-…  1750     0
 5 ABRAKSIA, S… SAMIR      SAMIR      ABRAKSIA  BEAC… OH    Expert-…  2000     0
 6 ABRAKSIA, S… SAMIR      SAMIR      ABRAKSIA  BEAC… OH    Profess…  2500     0
 7 ABRAMSON, S… STEVEN BA… STEVEN     ABRAMSON  NEW … NY    Profess…  4400     0
 8 ABUZZAHAB, … FARUK S    FARUK      ABUZZAHAB MINN… MN    Profess…  1750     0
 9 ABUZZAHAB, … MARY JENN… MARY       ABUZZAHAB SAIN… MN    Expert-…  1000     0
10 ACCACHA, SI… SIHAM DON… SIHAM      ACCACHA   MINE… NY    Expert-…  1250     0
11 ACEVEDO MAR… IRIS ARLE… IRIS       ACEVEDO … CAGU… PR    Expert-…   750     0
12 ACKERMAN, I… IVAN FOST… IVAN       ACKERMAN  BRAN… FL    Expert-…  1250     0
13 PAIN MEDICI… WILLIAM E… WILLIAM    ACKERMAN  LITT… AR    Expert-…  1000     0
14 ACOSTA, LUI… LUIS SILV… LUIS       ACOSTA    HOUS… TX    Expert-…  1000     0
15 ADAMS, SAND… SANDRA GA… SANDRA     ADAMS     SAN … TX    Profess… 12840     0
16 ADDONA, TOM… TOMMASO    TOMMASO    ADDONA    NEW … NY    Expert-…   750     0
17 HEALTH RESE… ALEX ASIE… ALEX       ADJEI     BUFF… NY    Profess…  2000     0
18 ADLER, DAVI… DAVID ELL… DAVID      ADLER     PORT… OR    Profess…    71     0
19 ADLER, JERE… JEREMY A   JEREMY     ADLER     ENCI… CA    Expert-…   850     0
20 ADMANI, ARI… ARIFF      ARIFF      ADMANI    PARA… NJ    Expert-…  2000     0
# ℹ 1 more variable: total <dbl>
# use pattern matching with grepl to filter text
expert_advice <- pfizer %>%
  filter(grepl("Expert|Professional", category)) %>%
  arrange(last_name, first_name)
expert_advice
# A tibble: 4,382 × 10
   org_indiv    first_plus first_name last_name city  state category  cash other
   <chr>        <chr>      <chr>      <chr>     <chr> <chr> <chr>    <dbl> <dbl>
 1 ABBO, LILIA… LILIAN MA… LILIAN     ABBO      MIAMI FL    Profess…  1800     0
 2 ABEBE, SHEI… SHEILA Y   SHEILA     ABEBE     INDI… IN    Expert-…   825     0
 3 NEW YORK UN… JUDITH ANN JUDITH     ABERG     NEW … NY    Profess…  1750     0
 4 ABOLNIK, IG… IGOR Z     IGOR       ABOLNIK   PROVO UT    Expert-…  1750     0
 5 ABRAKSIA, S… SAMIR      SAMIR      ABRAKSIA  BEAC… OH    Expert-…  2000     0
 6 ABRAKSIA, S… SAMIR      SAMIR      ABRAKSIA  BEAC… OH    Profess…  2500     0
 7 ABRAMSON, S… STEVEN BA… STEVEN     ABRAMSON  NEW … NY    Profess…  4400     0
 8 ABUZZAHAB, … FARUK S    FARUK      ABUZZAHAB MINN… MN    Profess…  1750     0
 9 ABUZZAHAB, … MARY JENN… MARY       ABUZZAHAB SAIN… MN    Expert-…  1000     0
10 ACCACHA, SI… SIHAM DON… SIHAM      ACCACHA   MINE… NY    Expert-…  1250     0
# ℹ 4,372 more rows
# ℹ 1 more variable: total <dbl>
not_expert_advice <- pfizer %>%
  filter(!grepl("Expert|Professional", category)) %>%
  arrange(last_name, first_name)
# merge/append data frames
pfizer2 <- bind_rows(expert_advice, not_expert_advice)
pfizer2
# A tibble: 10,087 × 10
   org_indiv    first_plus first_name last_name city  state category  cash other
   <chr>        <chr>      <chr>      <chr>     <chr> <chr> <chr>    <dbl> <dbl>
 1 ABBO, LILIA… LILIAN MA… LILIAN     ABBO      MIAMI FL    Profess…  1800     0
 2 ABEBE, SHEI… SHEILA Y   SHEILA     ABEBE     INDI… IN    Expert-…   825     0
 3 NEW YORK UN… JUDITH ANN JUDITH     ABERG     NEW … NY    Profess…  1750     0
 4 ABOLNIK, IG… IGOR Z     IGOR       ABOLNIK   PROVO UT    Expert-…  1750     0
 5 ABRAKSIA, S… SAMIR      SAMIR      ABRAKSIA  BEAC… OH    Expert-…  2000     0
 6 ABRAKSIA, S… SAMIR      SAMIR      ABRAKSIA  BEAC… OH    Profess…  2500     0
 7 ABRAMSON, S… STEVEN BA… STEVEN     ABRAMSON  NEW … NY    Profess…  4400     0
 8 ABUZZAHAB, … FARUK S    FARUK      ABUZZAHAB MINN… MN    Profess…  1750     0
 9 ABUZZAHAB, … MARY JENN… MARY       ABUZZAHAB SAIN… MN    Expert-…  1000     0
10 ACCACHA, SI… SIHAM DON… SIHAM      ACCACHA   MINE… NY    Expert-…  1250     0
# ℹ 10,077 more rows
# ℹ 1 more variable: total <dbl>
# write expert_advice data to a csv file
write_csv(expert_advice, "expert_advice.csv", na="")
# calculate total payments by state
state_sum <- pfizer %>%
  group_by(state) %>%
  summarize(tot_pay = sum(total)) %>%
  arrange(desc(tot_pay))
state_sum
# A tibble: 52 × 2
   state tot_pay
   <chr>   <dbl>
 1 CA    4737807
 2 TX    2802196
 3 FL    2564047
 4 PA    2484505
 5 NC    2328435
 6 NY    2065042
 7 MA    1764771
 8 IL    1256825
 9 MI    1146285
10 OH    1019450
# ℹ 42 more rows