library(ggplot2)
cyl.am <- ggplot(mtcars, aes(x = factor(cyl), fill = factor(am)))
#position = "stack" is default
cyl.am + geom_bar()
cyl.am +
geom_bar(position = "fill")
# Dodging - principles of similarity and proximity
cyl.am +
geom_bar(position = "dodge")
val = c("#E41A1C", "#377EB8")
lab = c("Manual", "Automatic")
cyl.am +
geom_bar(position = "dodge") +
scale_x_discrete("Cylinders") +
scale_y_continuous("Numbers") +
scale_fill_manual("Transmission",
values = val,
labels = lab)
mtcars$group <- 0
ggplot(mtcars, aes(x = mpg, y = group)) + geom_point()
# Change the y aesthetic limits
ggplot(mtcars, aes(x = mpg, y = group)) + geom_point() + geom_jitter() + scale_y_continuous(limits = c(-2,2))
# Basic scatter plot: wt on x-axis and mpg on y-axis; map cyl to col
ggplot(mtcars, aes(x = wt, y = mpg, col = cyl)) + geom_point(size = 4)
# Hollow circles - an improvement
ggplot(mtcars, aes(x = wt, y = mpg, col = cyl)) + geom_point(shape = 1, size = 4 )
# Add transparency - very nice
ggplot(mtcars, aes(x = wt, y = mpg, col = cyl)) + geom_point( size = 4, alpha = 0.6)
head(iris)
ggplot(iris, aes(x = Sepal.Width, fill = Species)) + geom_histogram(aes(binwidth = 0.2))
Ignoring unknown aesthetics: binwidth
ggplot(iris, aes(x = Sepal.Width, fill = Species)) + geom_histogram(aes(y = ..density..), binwidth = 0.1)
ggplot(iris, aes(x = Sepal.Width, fill = Species)) + geom_histogram(binwidth = 0.1, position = "fill")
ggplot(iris, aes(x = Sepal.Width, fill = Species)) + geom_histogram(binwidth = 0.1, position = "dodge")
# Make a univariate histogram
ggplot(mtcars, aes(mpg)) +
geom_histogram()
# Change the bin width to 1
ggplot(mtcars, aes(mpg)) +
geom_histogram(binwidth = 1)
# Change the y aesthetic to density
ggplot(mtcars, aes(mpg)) +
geom_histogram(aes(y=..density..), binwidth = 1)
# Custom color code
myBlue <- "#377EB8"
# Change the fill color to myBlue
ggplot(mtcars, aes(mpg)) +
geom_histogram(aes(y = ..density..),
binwidth = 1, fill = myBlue)
# Draw a bar plot of cyl, filled according to am
ggplot(mtcars, aes(x= cyl, fill = factor(am))) + geom_bar()
# Change the position argument to stack
ggplot(mtcars, aes(x= cyl, fill = factor(am))) + geom_bar(position = "stack")
# Change the position argument to fill
ggplot(mtcars, aes(x= cyl, fill = factor(am))) + geom_bar(position = "fill")
# Change the position argument to dodge
ggplot(mtcars, aes(x= cyl, fill = factor(am))) + geom_bar(position = "dodge")
# Draw a bar plot of cyl, filled according to am
ggplot(mtcars, aes(x = cyl, fill = factor(am))) + geom_bar()
# Change the position argument to "dodge"
ggplot(mtcars, aes(x = cyl, fill = factor(am))) + geom_bar(position = "dodge")
# Define posn_d with position_dodge()
posn_d <- position_dodge(width = 0.2)
# Change the position argument to posn_d
ggplot(mtcars, aes(x = cyl, fill = factor(am))) + geom_bar(position = posn_d)
# Use posn_d as position and adjust alpha to 0.6
ggplot(mtcars, aes(x = cyl, fill = factor(am))) + geom_bar(position = posn_d, alpha = 0.6)
#API key
saved_cfg <- data.world::save_config("eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiJwcm9kLXVzZXItY2xpZW50OmNocmlzaXllciIsImlzcyI6ImFnZW50OmNocmlzaXllcjo6Zjk1YzVlYTEtZTBiZS00NTU5LTg5MjItYWVkODg4Nzc3NjBkIiwiaWF0IjoxNDk1ODQ2MDQ1LCJyb2xlIjpbInVzZXJfYXBpX3dyaXRlIiwidXNlcl9hcGlfcmVhZCJdLCJnZW5lcmFsLXB1cnBvc2UiOnRydWV9.hMhCmAcXhD3DqJbp5L0JJF9xRfJsMZf-oPSkyxmC5D07tJhNAn-mzPPa4kSVKD65mUuktHwglgUGkJPZoNORVg")
library(data.world)
library(tidyverse)
# Datasets are identified by their URL
df <- read.csv("https://query.data.world/s/5s3rdju1vng0j5ij7675hcpto",header=T);
head(df)
# List tables
data_list <- data.world::query(
qry_sql("SELECT * FROM Tables"),
dataset = drugs_ds)
# data_list is a tbl_df with two columns: tableID and tableName.
data_list$tableName
[1] "FDA_NDC_Product" "Data" "Methods"
[4] "Variables" "Pharma_Lobby" "atc-codes"
[7] "companies_drugs_keyed" "drug_list" "drug_uses"
[10] "drugdata_clean" "drugnames_withclasses" "lobbying_keyed"
[13] "manufacturers_drugs_cleaned" "meps_full_2014" "spending-2011"
[16] "spending-2012" "spending-2013" "spending-2014"
[19] "spending-2015" "spending_all_top100" "usp_drug_classification"
data_list$tableId
[1] "FDA_NDC_Product.csv/FDA_NDC_Product"
[2] "Medicare_Drug_Spending_PartD_All_Drugs_YTD_2015_12_06_2016.xlsx/Data"
[3] "Medicare_Drug_Spending_PartD_All_Drugs_YTD_2015_12_06_2016.xlsx/Methods"
[4] "Medicare_Drug_Spending_PartD_All_Drugs_YTD_2015_12_06_2016.xlsx/Variables"
[5] "Pharma_Lobby.csv/Pharma_Lobby"
[6] "atc-codes.csv/atc-codes"
[7] "companies_drugs_keyed.csv/companies_drugs_keyed"
[8] "drug_list.json/drug_list"
[9] "drug_uses.csv/drug_uses"
[10] "drugdata_clean.csv/drugdata_clean"
[11] "drugnames_withclasses.csv/drugnames_withclasses"
[12] "lobbying_keyed.csv/lobbying_keyed"
[13] "manufacturers_drugs_cleaned.csv/manufacturers_drugs_cleaned"
[14] "meps_full_2014.zip/meps_full_2014/meps_full_2014.csv/meps_full_2014"
[15] "spending-2011.csv/spending-2011"
[16] "spending-2012.csv/spending-2012"
[17] "spending-2013.csv/spending-2013"
[18] "spending-2014.csv/spending-2014"
[19] "spending-2015.csv/spending-2015"
[20] "spending_all_top100.csv/spending_all_top100"
[21] "usp_drug_classification.csv/usp_drug_classification"
get_year <- function(yr) {
data.world::query(qry_sql(paste0("SELECT * FROM `spending-", yr, "`")),
dataset = drugs_ds)[,-1] %>%
## First column is a row number; don"t need that
mutate(year = yr)
}
# Read in and combine all years' data
spend <- map_df(2011:2015, get_year)
head(spend)
# Add a row for each generic with overall summaries of each variable ----------
spend_overall <- spend %>%
group_by(drugname_generic, year) %>%
summarise(
claim_count = sum(claim_count, na.rm = TRUE),
total_spending = sum(total_spending, na.rm = TRUE),
user_count = sum(user_count, na.rm = TRUE),
unit_count = sum(unit_count, na.rm = TRUE),
user_count_non_lowincome = sum(user_count_non_lowincome, na.rm = TRUE),
user_count_lowincome = sum(user_count_lowincome, na.rm = TRUE)
) %>%
mutate(
total_spending_per_user = total_spending / user_count,
drugname_brand = "ALL BRAND NAMES",
## Add NA values for variables that are brand-specific
unit_cost_wavg = NA,
out_of_pocket_avg_lowincome = NA,
out_of_pocket_avg_non_lowincome = NA
) %>%
ungroup()
# Select top 100 generics by number of users across all five years ------------
by_user_top100 <- group_by(spend_overall, drugname_generic) %>%
summarise(total_users = sum(user_count, na.rm = TRUE)) %>%
arrange(desc(total_users)) %>%
slice(1:100)
# For top 100 generics, add ALL BRAND NAMES rows to by-brand-name rows --------
spend_all_top100 <- bind_rows(spend, spend_overall) %>%
filter(drugname_generic %in% by_user_top100$drugname_generic) %>%
arrange(drugname_generic)
head(spend_all_top100)
library(dplyr)
df <- read.csv("https://query.data.world/s/7ezifc8eqig9vdazaoa1noecv",header=T)
df$arrival_date <- as.Date(df$arrival_date, format = "%m/%d/%Y")
df$departure_date <- as.Date(df$departure_date, format = "%m/%d/%Y")
df$ArrivalYear <- format(as.Date(df$arrival_date, format="%Y/%m/%d"),"%Y")
df$ArrivalYear <- as.integer(df$ArrivalYear)
head(df)
dim(df)
[1] 48237 8
whowentwhere <- df %>% filter(grepl("Russia", country)) %>% select(name, country, ArrivalYear) %>% arrange(desc(name))
head(whowentwhere)
dim(whowentwhere)
[1] 673 3
whowentwhere1 <- whowentwhere %>% filter(ArrivalYear >= 2012) %>%
group_by(name,country, ArrivalYear) %>%
summarise(n= n()) %>% arrange(desc(name))
whowentwhere1
ggplot(whowentwhere1, aes(ArrivalYear, fill = factor(name))) + geom_bar()
whowentwhere2 <- whowentwhere %>%
group_by(name) %>%
summarise(n= n()) %>% arrange(desc(n))
whowentwhere2
PanAm <- df %>% filter(grepl("Weldon",name)) %>% filter(grepl("Russia", country)) %>% arrange(desc(departure_date))
dim(PanAm)
[1] 22 8
PanAm
PanAm$ArrivalYear <- format(as.Date(PanAm$arrival_date, format="%Y/%m/%d"),"%Y")
head(PanAm)
xyz <- ggplot(PanAm, aes(x = ArrivalYear, fill = factor(country))) + geom_bar() + theme(legend.position='null') + ggtitle("Weldon in Russia")
xyz
abc <- df %>% filter(grepl("Weldon",name)) %>% arrange(desc(departure_date))
ggplot(abc, aes(x = ArrivalYear, fill = factor(country))) + geom_bar() + ggtitle("Weldon")
Weldon
yz <- ggplot(PanAm, aes(x = ArrivalYear, fill = factor(country))) + geom_bar() + theme(legend.position='bottom') +
theme(legend.title=element_blank())
yz
library(gridExtra)
g_legend<-function(a.gplot){
tmp <- ggplot_gtable(ggplot_build(a.gplot))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend <- tmp$grobs[[leg]]
legend
}
legend <- g_legend(yz)
grid.arrange(legend, yz+ theme(legend.position = 'none'),
ncol=2, nrow=1, widths=c(1/6,5/6))
Nunes
PanAm <- df %>% filter(grepl("nunes",name)) %>% filter(grepl("Russia", country))%>% arrange(desc(departure_date))
PanAm$ArrivalYear <- format(as.Date(PanAm$arrival_date, format="%Y/%m/%d"),"%Y")
PanAm
Dana Rohrabacher
PanAm <- whowentwhere %>% filter(grepl("Rohr",name)) %>% filter(grepl("Russia", country))%>% arrange(desc(ArrivalYear))
PanAm
#PanAm$ArrivalYear <- format(as.Date(PanAm$arrival_date, format="%Y/%m/%d"),"%Y")
xyz <- ggplot(PanAm, aes(x = ArrivalYear, fill = factor(country))) + geom_bar()
# + theme(legend.position='null')
xyz