#Question 2
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
class_data=read.table("https://raw.githubusercontent.com/bcaffo/ds4bme/master/data/classInterests.txt", header = TRUE, fill = TRUE)
##Bar plot for year
# Create a vector of different years
year = c("PhD" ,"Master's", "Senior", "Junior", "Sophomore")
# Create a vector to save the number of students in each year
year_freq=vector()
for (yr in year){
count = length(which(class_data$Year==yr))
year_freq = c(year_freq,count)}
# Create data frame with year and corresponding frequencies
year_data= as.data.frame(cbind(year, year_freq))
# Create bar plot for year
p1 =ggplot(data=year_data, aes(x=year, y=year_freq) )+
geom_bar(stat="identity")+
labs(x="Year", y="Number of Students")
p1 = ggplotly(p1)
p1
##Bar plot for program
# Create a vector of different programs
program = c("BME","Computer_Science", "Materials_Science", "ChemBE", "JHSPH-MMI", "BCMB_SOM")
# Create a vector to save the number of students in each program
program_freq=vector()
for (pr in program){
count = length(which(class_data$Program==pr))
program_freq = c(program_freq,count)
}
# Create data frame with program and corresponding frequencies
program_data= as.data.frame(cbind(program, program_freq))
# Create bar plot for program
p2 = ggplot(data=program_data, aes(x=program, y=program_freq) )+
geom_bar(stat="identity")+
labs(x="Program", y="Number of Students")+
theme(axis.text.x = element_text(angle = 90))
p2 = ggplotly(p2)
p2
#Question 3
library(ggmosaic) # ggplot library for mosaic plots
library(ggplot2)
library(plotly)
# Create mosaic plot
p3 = ggplot(data = class_data) +
geom_mosaic(aes(x= product(Year, Program), fill=Year)) +
theme(axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y =element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_text(angle = 90))
p3 = ggplotly(p3)
p3
#Question 5
library(ggplot2)
library(tidyr)
library(plotly)
# Read data
health_data=read.csv("https://raw.githubusercontent.com/jhu-advdatasci/2018/master/data/KFF/healthcare-spending.csv", header = FALSE, fill = TRUE)
# Convert data frame to matrix: Remove rows with unnecessary data
health_mat=as.matrix(t(health_data[c(3, 5:55), ] ))
# Reduce length of the year wise spending string values for easy representation in the bar plot
for (i in 2:25){
health_mat[i,1]= substr(health_mat[i,1],1,4)
}
xx=c(1:24)
# Set state names as column names of the matrix
colnames(health_mat)=as.vector(health_mat[1, ])
# Change the matrix back to a data frame for ggplot
health_frame=as.data.frame((cbind(xx,health_mat[2:25,2:52 ])))
# Create a stacked barplot
hf=health_frame %>%
gather(variable, value, Alabama:Wyoming)
p4 = ggplot(hf, aes(x=value, y=xx, colour = variable , group =1))+
geom_line()+
labs(x="Year", y="Statewise expenditure") +
theme(axis.text.x = element_text(angle = 90),
axis.text.y = element_blank(),
legend.title = element_blank())
#scale_y_discrete(limits = c(0,600000))
#p4 = ggplotly(p4)
p4

# Question 6
# Read data
library(plotly)
library(reshape2)
library(ggplot2)
health_data=read.csv("https://raw.githubusercontent.com/jhu-advdatasci/2018/master/data/KFF/healthcare-spending.csv", header = FALSE, fill = TRUE)
# Convert data frame to a matrix for calculating average spending
health_mat2=as.matrix(health_data[ 5:55, ] )
# Calculate average spending as row mean for each row(state)
avgSpending=rowMeans(matrix(as.numeric(health_mat2[ ,2:25]),nrow=51, ncol=24))
zz=cbind(health_mat2[ ,1] , avgSpending)
data =zz
data <- melt(data, id="V1")
data$value = as.double(levels(data$value))[data$value] # <-- converting
p5 <- ggplot(data, aes(x=Var1, y=value)) +
geom_bar(position="dodge", stat="identity")+
labs(x="States", y="Average Spending")
p5 = ggplotly(p5)
p5