getwd()[1] "/Users/rahwahagos"
getwd()[1] "/Users/rahwahagos"
library(tidyverse)── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.2.0 ✔ readr 2.2.0
✔ forcats 1.0.1 ✔ stringr 1.6.0
✔ ggplot2 4.0.3 ✔ tibble 3.3.1
✔ lubridate 1.9.5 ✔ tidyr 1.3.2
✔ purrr 1.2.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
pfizer <- read_csv("/Users/rahwahagos/Downloads/pfizer.csv")Rows: 10087 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (7): org_indiv, first_plus, first_name, last_name, city, state, category
dbl (3): cash, other, total
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
fda <- read_csv("/Users/rahwahagos/Downloads/fda.csv")Rows: 272 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): name_last, name_first, name_middle, issued, office
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(pfizer)# A tibble: 6 × 10
org_indiv first_plus first_name last_name city state category cash other
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
1 3-D MEDICAL … STEVEN BR… STEVEN DEITELZW… NEW … LA Profess… 2625 0
2 AA DOCTORS, … AAKASH MO… AAKASH AHUJA PASO… CA Expert-… 1000 0
3 ABBO, LILIAN… LILIAN MA… LILIAN ABBO MIAMI FL Busines… 0 448
4 ABBO, LILIAN… LILIAN MA… LILIAN ABBO MIAMI FL Meals 0 119
5 ABBO, LILIAN… LILIAN MA… LILIAN ABBO MIAMI FL Profess… 1800 0
6 ABDULLAH RAF… ABDULLAH ABDULLAH RAFFEE FLINT MI Expert-… 750 0
# ℹ 1 more variable: total <dbl>
head(fda)# A tibble: 6 × 5
name_last name_first name_middle issued office
<chr> <chr> <chr> <chr> <chr>
1 ADELGLASS JEFFREY M. 5/25/1999 Center for Drug Evaluation and Re…
2 ADKINSON N. FRANKLIN 4/19/2000 Center for Biologics Evaluation a…
3 ALLEN MARK S. 1/28/2002 Center for Devices and Radiologic…
4 AMSTERDAM DANIEL <NA> 11/17/2004 Center for Biologics Evaluation a…
5 AMSTUTZ HARLAN C. 7/19/2004 Center for Devices and Radiologic…
6 ANDERSON C. JOSEPH 2/25/2000 Center for Devices and Radiologic…
class(fda$issued) [1] "character"
summary(pfizer) # summary of pfizer data org_indiv first_plus first_name last_name
Length:10087 Length:10087 Length:10087 Length:10087
Class :character Class :character Class :character Class :character
Mode :character Mode :character Mode :character Mode :character
city state category cash
Length:10087 Length:10087 Length:10087 Min. : 0
Class :character Class :character Class :character 1st Qu.: 0
Mode :character Mode :character Mode :character Median : 0
Mean : 3241
3rd Qu.: 2000
Max. :1185466
NA's :1
other total
Min. : 0.0 Min. : 0
1st Qu.: 0.0 1st Qu.: 191
Median : 41.0 Median : 750
Mean : 266.5 Mean : 3507
3rd Qu.: 262.0 3rd Qu.: 2000
Max. :27681.0 Max. :1185466
NA's :3
# doctors in California who were paid $10,000 or more by Pfizer to run "Expert-Led Forums."
ca_expert_10000 <- pfizer %>%
filter(state == "CA" & total >= 10000 & category == "Expert-Led Forums")
ca_expert_10000# A tibble: 31 × 10
org_indiv first_plus first_name last_name city state category cash other
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
1 ANDREW M BL… ANDREW M ANDREW BLUMENFE… DEL … CA Expert-… 20500 0
2 BANDARI, DA… DANIEL SH… DANIEL BANDARI LOS … CA Expert-… 21000 0
3 BAROYA, IVA… IVAN STEP… IVAN BAROYA BONI… CA Expert-… 26400 0
4 BECK, CLIFF… CLIFFORD … CLIFFORD BECK TORR… CA Expert-… 10500 0
5 CHOPRA, SHA… SHAGUN SHAGUN CHOPRA SAN … CA Expert-… 17250 0
6 COLE, EMILY… EMILY ELI… EMILY COLE SAN … CA Expert-… 12000 0
7 EHRESMANN, … GLENN RIC… GLENN EHRESMANN LOS … CA Expert-… 12000 0
8 FONAROW, GR… GREGG CUR… GREGG FONAROW LOS … CA Expert-… 15000 0
9 GINSBERG, D… DAVID ALAN DAVID GINSBERG LOS … CA Expert-… 45750 0
10 HARBOUR, MI… MICHAEL J… MICHAEL HARBOUR PALO… CA Expert-… 13500 0
# ℹ 21 more rows
# ℹ 1 more variable: total <dbl>
# doctors in California who were paid $10,000 or more by Pfizer to run "Expert-Led Forums."
ca_expert_10000_sorted <- pfizer %>%
filter(state == "CA" & total >= 10000 & category == "Expert-Led Forums") %>%
arrange(desc(total))
ca_expert_10000_sorted# A tibble: 31 × 10
org_indiv first_plus first_name last_name city state category cash other
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
1 SACKS, GER… GERALD MI… GERALD SACKS SANT… CA Expert-… 146500 0
2 NIDES, MIT… MITCHELL MITCHELL NIDES LOS … CA Expert-… 70500 0
3 POTKIN, ST… STEVEN GA… STEVEN POTKIN ORAN… CA Expert-… 48350 0
4 GINSBERG, … DAVID ALAN DAVID GINSBERG LOS … CA Expert-… 45750 0
5 LOUIE, SAM… SAMUEL SAMUEL LOUIE SACR… CA Expert-… 41250 0
6 INSTITUTE … GURKIPAL GURKIPAL SINGH WOOD… CA Expert-… 40000 0
7 BAROYA, IV… IVAN STEP… IVAN BAROYA BONI… CA Expert-… 26400 0
8 MATTHEW BU… MATTHEW J… MATTHEW BUDOFF MANH… CA Expert-… 24000 0
9 QUANG H NG… QUANG H QUANG NGUYEN LA J… CA Expert-… 22500 0
10 SCHROEDER,… JOHN SPEER JOHN SCHROEDER STAN… CA Expert-… 21500 0
# ℹ 21 more rows
# ℹ 1 more variable: total <dbl>
# Find doctors in states other than California who were paid $10,000 or more by Pfizer to run "Expert-Led Forums."
not_ca_expert_10000 <- pfizer %>%
filter(state != "CA" & total >= 10000 & category=="Expert-Led Forums") %>%
arrange(desc(total))
not_ca_expert_10000# A tibble: 133 × 10
org_indiv first_plus first_name last_name city state category cash other
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
1 HESS, TODD … TODD MICH… TODD HESS SAIN… MN Expert-… 79000 0
2 MILLER, JOH… JOHN JOSE… JOHN MILLER EXET… NH Expert-… 78000 0
3 WEATHERS, V… VIVIAN JOY VIVIAN WEATHERS APEX NC Expert-… 75400 0
4 ROBERT B NE… ROBERT BU… ROBERT NETT SAN … TX Expert-… 60750 0
5 SOLERA CONS… STEVEN AB… STEVEN KAPLAN CHAP… NY Expert-… 56500 0
6 GRIFFIN, JA… JAMES DALE JAMES GRIFFIN DALL… TX Expert-… 54250 0
7 D BRENT JOY… DAVID BRE… DAVID JOYE SUMM… NC Expert-… 53500 0
8 STUBBLEFIEL… MICHAEL D MICHAEL STUBBLEF… NEW … NY Expert-… 50500 0
9 COHEN, SETH… SETH ALEX… SETH COHEN SEAT… WA Expert-… 44500 0
10 SUSSMAN, DA… DAVID OWEN DAVID SUSSMAN VOOR… NJ Expert-… 42500 0
# ℹ 123 more rows
# ℹ 1 more variable: total <dbl>
# Find the 20 doctors across the four largest states (CA, TX, FL, NY) who were paid the most for professional advice.
ca_ny_tx_fl_prof_top20 <- pfizer %>%
filter((state=="CA" | state == "NY" | state == "TX" | state == "FL") & category == "Professional Advising") %>%
arrange(desc(total)) %>%
head(6)
ca_ny_tx_fl_prof_top20# A tibble: 6 × 10
org_indiv first_plus first_name last_name city state category cash other
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
1 BAILES, JOS… JOSEPH SW… JOSEPH BAILES AUST… TX Profess… 105000 0
2 SAWYERS, CH… CHARLES L… CHARLES SAWYERS NEW … NY Profess… 100000 0
3 MALENKA, RO… ROBERT CH… ROBERT MALENKA STAN… CA Profess… 75566 0
4 BEUTLER, BR… BRUCE ALAN BRUCE BEUTLER DALL… TX Profess… 50000 0
5 REGENTS OF … DAVID RAY… DAVID GANDARA IRVI… CA Profess… 38500 0
6 PTACEK, LOU… LOUIS JOHN LOUIS PTACEK SAN … CA Profess… 37588 0
# ℹ 1 more variable: total <dbl>
expert_advice <- pfizer %>%
filter(category == "Expert-Led Forums" | category == "Professional Advising") %>%
arrange(last_name, first_name) %>%
head(20)
expert_advice# A tibble: 20 × 10
org_indiv first_plus first_name last_name city state category cash other
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
1 ABBO, LILIA… LILIAN MA… LILIAN ABBO MIAMI FL Profess… 1800 0
2 ABEBE, SHEI… SHEILA Y SHEILA ABEBE INDI… IN Expert-… 825 0
3 NEW YORK UN… JUDITH ANN JUDITH ABERG NEW … NY Profess… 1750 0
4 ABOLNIK, IG… IGOR Z IGOR ABOLNIK PROVO UT Expert-… 1750 0
5 ABRAKSIA, S… SAMIR SAMIR ABRAKSIA BEAC… OH Expert-… 2000 0
6 ABRAKSIA, S… SAMIR SAMIR ABRAKSIA BEAC… OH Profess… 2500 0
7 ABRAMSON, S… STEVEN BA… STEVEN ABRAMSON NEW … NY Profess… 4400 0
8 ABUZZAHAB, … FARUK S FARUK ABUZZAHAB MINN… MN Profess… 1750 0
9 ABUZZAHAB, … MARY JENN… MARY ABUZZAHAB SAIN… MN Expert-… 1000 0
10 ACCACHA, SI… SIHAM DON… SIHAM ACCACHA MINE… NY Expert-… 1250 0
11 ACEVEDO MAR… IRIS ARLE… IRIS ACEVEDO … CAGU… PR Expert-… 750 0
12 ACKERMAN, I… IVAN FOST… IVAN ACKERMAN BRAN… FL Expert-… 1250 0
13 PAIN MEDICI… WILLIAM E… WILLIAM ACKERMAN LITT… AR Expert-… 1000 0
14 ACOSTA, LUI… LUIS SILV… LUIS ACOSTA HOUS… TX Expert-… 1000 0
15 ADAMS, SAND… SANDRA GA… SANDRA ADAMS SAN … TX Profess… 12840 0
16 ADDONA, TOM… TOMMASO TOMMASO ADDONA NEW … NY Expert-… 750 0
17 HEALTH RESE… ALEX ASIE… ALEX ADJEI BUFF… NY Profess… 2000 0
18 ADLER, DAVI… DAVID ELL… DAVID ADLER PORT… OR Profess… 71 0
19 ADLER, JERE… JEREMY A JEREMY ADLER ENCI… CA Expert-… 850 0
20 ADMANI, ARI… ARIFF ARIFF ADMANI PARA… NJ Expert-… 2000 0
# ℹ 1 more variable: total <dbl>
# use pattern matching with grepl to filter text
expert_advice <- pfizer %>%
filter(grepl("Expert|Professional", category)) %>%
arrange(last_name, first_name)
expert_advice# A tibble: 4,382 × 10
org_indiv first_plus first_name last_name city state category cash other
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
1 ABBO, LILIA… LILIAN MA… LILIAN ABBO MIAMI FL Profess… 1800 0
2 ABEBE, SHEI… SHEILA Y SHEILA ABEBE INDI… IN Expert-… 825 0
3 NEW YORK UN… JUDITH ANN JUDITH ABERG NEW … NY Profess… 1750 0
4 ABOLNIK, IG… IGOR Z IGOR ABOLNIK PROVO UT Expert-… 1750 0
5 ABRAKSIA, S… SAMIR SAMIR ABRAKSIA BEAC… OH Expert-… 2000 0
6 ABRAKSIA, S… SAMIR SAMIR ABRAKSIA BEAC… OH Profess… 2500 0
7 ABRAMSON, S… STEVEN BA… STEVEN ABRAMSON NEW … NY Profess… 4400 0
8 ABUZZAHAB, … FARUK S FARUK ABUZZAHAB MINN… MN Profess… 1750 0
9 ABUZZAHAB, … MARY JENN… MARY ABUZZAHAB SAIN… MN Expert-… 1000 0
10 ACCACHA, SI… SIHAM DON… SIHAM ACCACHA MINE… NY Expert-… 1250 0
# ℹ 4,372 more rows
# ℹ 1 more variable: total <dbl>
not_expert_advice <- pfizer %>%
filter(!grepl("Expert|Professional", category)) %>%
arrange(last_name, first_name)# merge/append data frames
pfizer2 <- bind_rows(expert_advice, not_expert_advice)
pfizer2# A tibble: 10,087 × 10
org_indiv first_plus first_name last_name city state category cash other
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
1 ABBO, LILIA… LILIAN MA… LILIAN ABBO MIAMI FL Profess… 1800 0
2 ABEBE, SHEI… SHEILA Y SHEILA ABEBE INDI… IN Expert-… 825 0
3 NEW YORK UN… JUDITH ANN JUDITH ABERG NEW … NY Profess… 1750 0
4 ABOLNIK, IG… IGOR Z IGOR ABOLNIK PROVO UT Expert-… 1750 0
5 ABRAKSIA, S… SAMIR SAMIR ABRAKSIA BEAC… OH Expert-… 2000 0
6 ABRAKSIA, S… SAMIR SAMIR ABRAKSIA BEAC… OH Profess… 2500 0
7 ABRAMSON, S… STEVEN BA… STEVEN ABRAMSON NEW … NY Profess… 4400 0
8 ABUZZAHAB, … FARUK S FARUK ABUZZAHAB MINN… MN Profess… 1750 0
9 ABUZZAHAB, … MARY JENN… MARY ABUZZAHAB SAIN… MN Expert-… 1000 0
10 ACCACHA, SI… SIHAM DON… SIHAM ACCACHA MINE… NY Expert-… 1250 0
# ℹ 10,077 more rows
# ℹ 1 more variable: total <dbl>
# write expert_advice data to a csv file
write_csv(expert_advice, "expert_advice.csv", na="")# calculate total payments by state
state_sum <- pfizer %>%
group_by(state) %>%
summarize(tot_pay = sum(total)) %>%
arrange(desc(tot_pay))
state_sum# A tibble: 52 × 2
state tot_pay
<chr> <dbl>
1 CA 4737807
2 TX 2802196
3 FL 2564047
4 PA 2484505
5 NC 2328435
6 NY 2065042
7 MA 1764771
8 IL 1256825
9 MI 1146285
10 OH 1019450
# ℹ 42 more rows