knitr::opts_chunk$set(echo = TRUE)
######DO NOT MODIFY. This will load required packages and data.

library(tidyverse)   # For tidying data
library(gganimate)   # For animate and save motion
library(gifski)      # For animate and save motion
library(plotly)      # For dynamic graph
library(rmarkdown)   # For convert R Markdown documents into a variety of formats
library(pastecs)     # For using by() function
library(formattable) # For converting numeric to percentage
library(DT)          # For using datatable()
library(htmlwidgets) # Save the Interactive file at .html format


cces <- drop_na(read_csv(url("https://www.dropbox.com/s/ahmt12y39unicd2/cces_sample_coursera.csv?raw=1")))

cel <- drop_na(read_csv(url("https://www.dropbox.com/s/4ebgnkdhhxo5rac/cel_volden_wiseman%20_coursera.csv?raw=1")))

#When there are some options that you want to set for multiple tables produced from the same R session, you can use the global option named DT.options
  ##Create datable() with Filter, in order to add filter for each column - you must do manually in each datatable() command.
options(DT.options = list(pageLength = 10, 
                          lengthMenu = c(10, 20, 50, 100), # Adjust option in Show entries
                          autoWidth = TRUE,
                          language = list(search = 'Filter:'))) 

Exercise 1

Data: cces

Explain what you are visualizing here: Bar charts show the educational levels of respondents towards their job in the cces file

Task 1: The Opinion of respondents by Gender

First, let’s recode and reorder the following variables (gender, educ and CC18_308a)

data1 <-cces
  
# Create a new column 'Gender' to relabel old column 'gender'
data1 <- data1 %>%     
  mutate( 
    Gender = case_when(
      gender == 1 ~ "Male",
      gender == 2 ~ "Female"))
  
# Create a new column 'Education' to relabel old column 'educ'
data1 <- data1 %>%  
  mutate(
    Education = case_when(
      educ == 1 ~ "No high school",
      educ == 2 ~ "High school graduate",
      educ == 3 ~ "Some college",
      educ == 4 ~ "2-year",
      educ == 5 ~ "4-year",
      educ == 6 ~ "Post-grad"))
#Reorder the level of 'Education' variable
data1$Education = factor(data1$Education, levels = c ("No high school", "High school graduate", "Some college", "2-year", "4-year", "Post-grad")) 
   
# Create a new column 'Opinion' to relabel old column 'CC18_308a'
data1 <- data1 %>% 
  mutate(
    Opinion = case_when(
      CC18_308a == 1 ~ "Strongly approve",
      CC18_308a == 2 ~ "Somewhat approve",
      CC18_308a == 3 ~ "Somewhat disapprove",
      CC18_308a == 4 ~ "Strongly disapprove"))
data1$Opinion = factor(data1$Opinion, levels = c ("Strongly disapprove", "Somewhat disapprove", "Somewhat approve", "Strongly approve"))

Let’s take a look at data1:

#Take a look at data1
datatable(data1,
          # Edit the cell
          editable = 'cell', 
          # Add cation for table - located top, align center
          caption = htmltools::tags$caption(     
                    style = 'caption-side: top; text-align: center;',
                    'Table 1.1: ', htmltools::em('The cces data')
                     ),
          # Highlight Black for top row
          options = list(    
                      initComplete = JS(
                      "function(settings, json) {",
                      "$(this.api().table().header()).css({'background-color': '#000', 'color': '#fff'});",
                       "}")),
          
)

From data1, I create a “data1.2” by summarizing the variable “Gender” to the “Opinion”:

data1.2 <- data1 %>% count(Gender, Opinion,  sort = TRUE)
colnames(data1.2) <- c("Gender", "Opinion", "Count")
datatable(data1.2, 
          #Editable column 2, not editable column 1 and 3
          editable = list(target = 'row', disable = list(columns = c(1,3))),
          
          # Highlight Black for top row
          options = list(    
                      initComplete = JS(
                      "function(settings, json) {",
                      "$(this.api().table().header()).css({'background-color': '#000', 'color': '#fff'});",
                       "}")),
          # Add cation for table - located top, align center
          caption = htmltools::tags$caption(     
                    style = 'caption-side: top; text-align: center;',
                    'Table 1.2: ', htmltools::em('Summary of Gender and Opinion of Respondents')
                     )
)

Based on “data1.2”, I create the a static/ interactive bar-chart to summerize the opinion of different respondents:

# OR using datatable(data1.2)
ggplot(data1.2, aes( y = Count, x = Gender, fill = Opinion, label = Count)) + 
  stat_summary(geom = "bar", position = "dodge") + # Cannot use geom_bar() 
  labs( x = "Gender", y= "Number of respondents", title = "The opinion of respondents") +
  theme(plot.title = element_text(hjust = 0.5)) + # Adjust the title to the center
  geom_text(position = position_dodge(width = .9), vjust = - 0.5, size = 3) +  # Adjust the label to the center of each bar, vjust: adjust the height position of label
  theme(
    plot.title =    element_text(size = (18), face = "bold" ),
    plot.subtitle = element_text(size = (15)),
    legend.title =  element_text(size = (15), face = "bold.italic", colour = "black"),
    legend.text =   element_text(size = (14), face = "italic",      colour = "black"),
    legend.spacing.y = unit(0.5, 'cm'),     #legend.spacing.y (x) for y(x) axis, 
    axis.title =    element_text(size = (15), face = "bold",        colour = "black"),
    axis.text =     element_text(size = (13),                       colour = "black" ) ) +
  guides(fill = guide_legend(byrow = TRUE)) # set distance for legend text equally

#Save the static plot as png
dev.copy(device = png, width = 480, height = 480, file = " The static opinion of respondents by Gender.png ")
## png 
##   3
dev.off()
## png 
##   2
#Save the static plot as tiff
ggsave("Outlier Amazon.tiff", width = 10, height = 10) 
dev.off()
## null device 
##           1
a <- ggplot(data1.2, aes( y = Count, x = Gender, fill = Opinion, label = Count)) + 
  stat_summary(geom = "bar", position = "dodge") +
  labs( x = "Gender", y= "Number of respondents", title = "The opinion of respondents") +
  theme(plot.title = element_text(hjust = 0.5)) +
  geom_text(position = position_dodge(width = .9), vjust = - 0.5, size = 4) +
  theme(
    plot.title =    element_text(size = (18), face = "bold" ),
    plot.subtitle = element_text(size = (15)), # We dont have subtitle here
    legend.title =  element_text(size = (15), face = "bold.italic", colour = "black"),
    legend.text =   element_text(size = (10), face = "italic",      colour = "black"),
    legend.spacing.y = unit(0.5, 'cm'),     #legend.spacing.y (x) for y(x) axis, 
    axis.title =    element_text(size = (15), face = "bold",        colour = "black"),
    axis.text =     element_text(size = (13),                       colour = "black" ) ) +
  guides(fill = guide_legend(byrow = TRUE)) # set distance for legend text equally
ggplotly(a)
# Save the ggplotly file:
saveWidget(as_widget(ggplotly(a)), file="The opinion of respondents stat_summary.html")

#Save on online system
Sys.setenv("plotly_username"="Minh_Tri_92")
Sys.setenv("plotly_api_key"="B6mso8qyGEpF7kJekgKe")
api_create(ggplotly(a), "The opinion of respondents stat_summary")

Instead of using data1.2 (grouping Gender + Opinion from data1), I can use “data1” directly and let the program shows the information related to different groups on charts. As shown on the chart, the program still correct calculate and label “Count” - The number of respondents

b <- ggplot(data1, aes( x = Gender, fill = Opinion)) + 
  geom_bar(position = "dodge") + # Using geom_bar as dont need value of y=
  labs( x = "Gender", y= "Number of respondents", title = "The opinion of respondents") +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(
    plot.title =    element_text(size = (18), face = "bold" ),
    plot.subtitle = element_text(size = (15)), # We dont have subtitle here
    legend.title =  element_text(size = (15), face = "bold", colour = "black"),
    legend.text =   element_text(size = (10), face = "italic",      colour = "black"),
    legend.spacing.y = unit(0.5, 'cm'),     #legend.spacing.y (x) for y(x) axis, 
    axis.title =    element_text(size = (15), face = "bold",        colour = "black"),
    axis.text =     element_text(size = (13),                       colour = "black" ) ) +
  guides(fill = guide_legend(byrow = TRUE)) # set distance for legend text equally
    
ggplotly(b)
# Save the ggplotly file:
saveWidget(as_widget(ggplotly(a)), file="The opinion of respondents geom_bar.html")

#Save on online system
Sys.setenv("plotly_username"="Minh_Tri_92")
Sys.setenv("plotly_api_key"="B6mso8qyGEpF7kJekgKe")
api_create(ggplotly(b), "The opinion of respondents geom_bar")

We can see that there are 4 levels of opinions: strongly disapprove, disapprove, approve, strongly approve.For further analysis, the following graph will cover the Educational levels of respondents in each type of opinions

Task 2: The Opinion levels of respondents by Gender and Education

First, I create a “data1.1” by sorting 3 variables: “Gender”, “Opinion”, “Education”.

# Sort 3 variables Gender - Educ - transCC18_308a by 'count'
data1.1 <- data1 %>% count(Gender, Opinion, Education,  sort = TRUE)
#Rename the 4 columns
colnames(data1.1) <- c("Gender", "Opinion", "Education", "Count")

datatable(data1.1, 
          filter = 'top',
          caption = htmltools::tags$caption(     
                    style = 'caption-side: top; text-align: center;',
                    'Table 1.3: ', htmltools::em('Summary of Gender, Opinion and Education of Respondents'))
          )

Next, let’s draw a bar-chart by using “data1.1”:

ggplot(data1.1, aes( y = Count, x= Gender, fill = Opinion, label = Count)) + 
  stat_summary(geom = "bar", position = "dodge") + # Cannot use geom_bar
  labs( x = "Gender", y= "Number of respondents", title = "The opinion of respondents") +
  theme(plot.title = element_text(hjust = 0.5)) + # Adjust the title to the center
  geom_text(position = position_dodge(width = .9), vjust = - 0.5, size = 3) + # Adjust the label to the center of each bar, vjust: adjust the height position of label  
  facet_wrap(~Education) + ylim(0,80)  

#Save the static plot as png
dev.copy(device = png, width = 480, height = 480, file = " The static opinion of respondents by gender-education.png ")
## png 
##   3
dev.off()  
## png 
##   2
a <- ggplot(data1.1, aes( y = Count, x= Gender, fill = Opinion, label = Count)) + 
  stat_summary(geom = "bar", position = "dodge") + # Cannot use geom_bar
  labs( x = "Gender", y= "Number of respondents", title = "The opinion of respondents") +
  theme(plot.title = element_text(hjust = 0.5)) + # Adjust the title to the center
  geom_text(position = position_dodge(width = .9), vjust = - 0.5, size = 3) + 
  facet_wrap(~Education)
ggplotly(a)
# Save the ggplotly file:
saveWidget(as_widget(ggplotly(a)), file="The opinion of respondents by gender-education.html")
#Save on online system
Sys.setenv("plotly_username"="Minh_Tri_92")
Sys.setenv("plotly_api_key"="B6mso8qyGEpF7kJekgKe")
api_create(ggplotly(a), "The opinion of respondents by gender-opinion")

Another solution is using data1 directly but without labeling about the count number (don’t show here)

Next, I will illustrate the remaining types of bar chart which makes it easier to compare proportions: First chart shows the opinion of respondents on the scale of 1, second chart shows the number of respondents stack on one another. It’s a little bit difficult to calculate and show percentage label for 1st chart and 2nd chart.

In order to show the labels of Percentage on the chart, I create a dataset 1.3 which has the variable Proportion or Percentage

#First, we need to group the data of 3 variables: Gender, Education, Opinion: 
# equal to data 1.1
data1.3 <- data1 %>% group_by(Gender, Education, Opinion) %>% summarise(Count = n()) 

#Calculate and add the column of Proportion of group: Education
data1.3 = data1.3 %>%  
  group_by(Gender, Education) %>% mutate(Proportion= round(prop.table(Count),2))

#Calculate and add the column of the Percentage of group: Education
data1.3 = data1.3 %>%  
  group_by(Gender, Education) %>% mutate(Percentage= percent(round(prop.table(Count),2)))  

data1.3
## # A tibble: 48 x 6
## # Groups:   Gender, Education [12]
##    Gender Education            Opinion             Count Proportion Percentage
##    <chr>  <fct>                <fct>               <int>      <dbl> <formttbl>
##  1 Female No high school       Strongly disapprove    11       0.58 58.00%    
##  2 Female No high school       Somewhat disapprove     3       0.16 16.00%    
##  3 Female No high school       Somewhat approve        2       0.11 11.00%    
##  4 Female No high school       Strongly approve        3       0.16 16.00%    
##  5 Female High school graduate Strongly disapprove    50       0.39 39.00%    
##  6 Female High school graduate Somewhat disapprove    18       0.14 14.00%    
##  7 Female High school graduate Somewhat approve       24       0.19 19.00%    
##  8 Female High school graduate Strongly approve       35       0.28 28.00%    
##  9 Female Some college         Strongly disapprove    53       0.54 54.00%    
## 10 Female Some college         Somewhat disapprove     6       0.06 6.00%     
## # ... with 38 more rows
datatable (data1.3, 
#Beware datatable(data1.3, filter = 'top') will break your interactive figure in knit html file - don't know the reason.
          options = list(    
                      initComplete = JS(
                      "function(settings, json) {",
                      "$(this.api().table().header()).css({'background-color': '#000', 'color': '#fff'});",
                       "}")),
          caption = htmltools::tags$caption(     
                    style = 'caption-side: top; text-align: center;',
                    'Table 1.4: ', htmltools::em('The Summary of the Respondent Characteristics'))
          ) 

A view at data1.3 show that the Percentage variable is successfully created while the command: datatable(data1.3) doesn’t show the percentage format. However when we drawing the graph, it can still show the percentage value on the bar. There are 2 different ways using the “Proportion” variable or “Percentage” variable:

ggplot(data1.3, aes( y = Percentage, x = Gender, fill = Opinion, label = Percentage)) + 
  stat_summary(geom = "bar", position="fill") +
  labs( x = "Gender", y= "Proportion of respondents", title = "The opinion of respondents") +
  facet_wrap(~Education) +
  geom_text(position = position_stack(vjust = 0.5), size = 3) +
  scale_y_continuous(labels = scales::percent_format()) + # Change y-axis to percentage value
  theme(plot.title = element_text(hjust = 0.5))  

#Using percent() to change the numeric value to percentage value
ggplot(data1.3, aes( y = Percentage, x = Gender, fill = Opinion, label = percent(Proportion))) +
  stat_summary(geom = "bar", position="fill") +
  labs( x = "Gender", y= "Proportion of respondents", title = "The opinion of respondents") +
  facet_wrap(~Education) +
  geom_text(position = position_stack(vjust = 0.5), size = 3) +
  scale_y_continuous(labels = scales::percent_format()) +
  theme(plot.title = element_text(hjust = 0.5))  

Because there are too many small value labels, I decide to hide some of them in this “stack” bar chart (hide value < 10)

ggplot(data1.3, aes( y = Count, x = Gender, fill = Opinion, label = Count)) + 
  stat_summary(geom = "bar", position="stack") +
  labs( x = "Gender", y= "Number of respondents", title = "The opinion of respondents") +
  geom_text(aes(label = ifelse(Count >= 10, Count, "")), position = position_stack(vjust = 0.5), size = 3) +
  facet_wrap(~Education) +
  theme(plot.title = element_text(hjust = 0.5)) 

For interactive plot:

#Using paste0() to add string "(Proportion x 100) %":
b1 <- ggplot(data1.3, aes( y = Percentage, x = Gender, fill = Opinion, label = paste0(Proportion*100,"%"))) + 
  stat_summary(geom = "bar", position="fill")  +
  labs( x = "Gender", y= "Proportion of respondents", title = "The opinion of respondents") +
  facet_wrap(~Education) +
  geom_text(position = position_stack(vjust = 0.5), size = 3) +
  scale_y_continuous(labels = scales::percent_format()) +
  theme(plot.title = element_text(hjust = 0.5))
ggplotly(b1)
Sys.setenv("plotly_username"="Minh_Tri_92")
Sys.setenv("plotly_api_key"="B6mso8qyGEpF7kJekgKe")
api_create(ggplotly(b1), "The opinion of respondents percetage barchart")

Let try using animation graph. Explain what you are visualizing here: A bar chart shows the number of respondents education in the cces file

Put your figure here:

ggplot(data1.3, aes( y = Count, x= Gender, fill = Opinion, label = Count)) + 
  stat_summary(geom = "bar", position = "dodge") +  
  labs( x = "Gender", y= "Number of respondents", title = "The opinion of respondents", subtitle = 'Education: {closest_state}') +
  theme(plot.title = element_text(hjust = 0.5)) +  
  geom_text(position = position_dodge(width = .9), vjust = - 0.5, size = 4) +
  theme(
    plot.title =    element_text(size = (18), face = "bold" ),
    plot.subtitle = element_text(size = (15)),
    legend.title =  element_text(size = (15), face = "bold.italic", colour = "black"),
    legend.text =   element_text(size = (14), face = "italic",      colour = "black"),
    axis.title =    element_text(size = (15),                       colour = "black"),
    axis.text =     element_text(size = (13),                       colour = "black" ) ) +  
  transition_states(Education, transition_length = 0, state_length = 2) +
  enter_fade() +  exit_fade()

anim_save("The opinion of respondents.gif")

For interactive plot with animation:

a = plot_ly(data1.3) %>%
  add_bars(data1.3,
    x = ~Gender, xend = ~Gender, 
    y = ~ Count, yend = ~0, 
    frame = ~Education,
    color = ~ Opinion  ) %>% 
  animation_slider( currentvalue = list(prefix = "Education: ", font = list(color="red"))) %>%
  animation_opts(1000, easing = "elastic", redraw = FALSE) 
a
Sys.setenv("plotly_username"="Minh_Tri_92")
Sys.setenv("plotly_api_key"="B6mso8qyGEpF7kJekgKe") 
api_create(a, "Video The opinion of respondents")

Exercise 2

Data: cel

Explain what you are visualizing here: Drawing a scatter plot illustrating the relationship between the DW-Nominate score of each member (dwnom1) and the number of the bills introduced in congress 110 (all_bills) according to years (elected)

Let observe the data2

data2 <- filter(cel, congress == 110)
data2 <- data2 %>% 
  mutate(
    Gender = case_when(
      female == 0 ~ "Male",
      female == 1 ~ "Female"))

datatable(data2, 
          filter = 'top',
          # Add cation for table - located top, align center
          caption = htmltools::tags$caption(     
                    style = 'caption-side: top; text-align: center;',
                    'Table 2: ', htmltools::em('The cel data of respondent in congress 110'))
          )

Put your figure here:

ggplot(data2, aes(x= dwnom1, y = all_bills, colour = Gender)) +
  geom_point() +
  facet_wrap(~elected) +
  labs(x ="DW-Nominate score", y = "Number of bills", title = "The DW-Nominate score and Number of bills introduced each year") +
  theme(plot.title = element_text(hjust = 0.5))

For interactive plot:

For interactive and animation plot:

c1 <- ggplot(data2, aes(x= dwnom1, y = all_bills, color = Gender)) +
  geom_point(aes(frame = elected)) +
  labs(x ="DW-Nominate score", y = "Number of bills", title = "The DW-Nominate score and Number of bills introduced each year") +
  theme(plot.title = element_text(hjust = 0.5))
## Warning: Ignoring unknown aesthetics: frame
ggplotly(c1)
## Warning in p$x$data[firstFrame] <- p$x$frames[[1]]$data: number of items to
## replace is not a multiple of replacement length

Problem arises when we plot the graph with Color for Different Genders. Similar problem is already indicated in many topics and solution cannot be found. (https://community.plotly.com/t/frame-showing-less-category-than-actual/8628). The Warning message shows that the frame shows less category than normal.

Only female or male are showed (from year 1986).

However. if I alter the option aes(Color = Gender) by aes(ids/label=Gender, text1 = …), the frame can show both Gender at the same time (ofcourse with no Color)

 c2 <- ggplot(data2, aes(x= dwnom1, y = all_bills)) +
  geom_point(aes(frame = elected, ids = Gender, text1 = thomas_name, text2 = seniority)) +
  labs(x ="DW-Nominate score", y = "Number of bills", title = "The DW-Nominate score and Number of bills introduced each year") +
  theme(plot.title = element_text(hjust = 0.5))
ggplotly(c2)

Exercise 3

Explain what you are visualizing here: Drawing a (interactive) line illustrating the number of the bills introduced in congress 110 (all_bills) according to year the member was elected (elected)

Put your figure here:

ggplot(data2, aes(x= elected, y = all_bills, colour = Gender)) +
  stat_summary(fun.y = mean, geom = "point") +
  stat_summary(fun.y = mean, geom = "line", aes(group = 1)) +
  labs( x= "Elected Year", y = "Mean of bills", title = "The mean value of bills of the members in each elected year") +
  theme_bw() + # theme_bw must be set before theme() to set title in center
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(legend.position='none') +
  facet_wrap(~Gender, as.table = FALSE)  

Let check some values by filter the “Elected Year” 1990 - 1992 - 1994

library(pastecs)
year1990 <- filter(data2, elected ==1990) 
by(year1990$all_bills, year1990$Gender, stat.desc )
## year1990$Gender: Female
##      nbr.val     nbr.null       nbr.na          min          max        range 
##    2.0000000    0.0000000    0.0000000   31.0000000   36.0000000    5.0000000 
##          sum       median         mean      SE.mean CI.mean.0.95          var 
##   67.0000000   33.5000000   33.5000000    2.5000000   31.7655118   12.5000000 
##      std.dev     coef.var 
##    3.5355339    0.1055383 
## ------------------------------------------------------------ 
## year1990$Gender: Male
##      nbr.val     nbr.null       nbr.na          min          max        range 
##   13.0000000    0.0000000    0.0000000    3.0000000   33.0000000   30.0000000 
##          sum       median         mean      SE.mean CI.mean.0.95          var 
##  150.0000000   11.0000000   11.5384615    2.2999871    5.0112415   68.7692308 
##      std.dev     coef.var 
##    8.2927216    0.7187025
year1992 <- filter(data2, elected ==1992) 
by(year1992$all_bills, year1992$Gender, stat.desc)
## year1992$Gender: Female
##      nbr.val     nbr.null       nbr.na          min          max        range 
##    8.0000000    0.0000000    0.0000000    7.0000000   74.0000000   67.0000000 
##          sum       median         mean      SE.mean CI.mean.0.95          var 
##  213.0000000   20.5000000   26.6250000    7.8078200   18.4625606  487.6964286 
##      std.dev     coef.var 
##   22.0838499    0.8294404 
## ------------------------------------------------------------ 
## year1992$Gender: Male
##      nbr.val     nbr.null       nbr.na          min          max        range 
##   36.0000000    0.0000000    0.0000000    2.0000000   52.0000000   50.0000000 
##          sum       median         mean      SE.mean CI.mean.0.95          var 
##  632.0000000   15.0000000   17.5555556    2.2040527    4.4744649  174.8825397 
##      std.dev     coef.var 
##   13.2243162    0.7532838
year1994 <- filter(data2, elected ==1994) 
by(year1994$all_bills, year1994$Gender, stat.desc)
## year1994$Gender: Female
##      nbr.val     nbr.null       nbr.na          min          max        range 
##    4.0000000    0.0000000    0.0000000   14.0000000   41.0000000   27.0000000 
##          sum       median         mean      SE.mean CI.mean.0.95          var 
##  108.0000000   26.5000000   27.0000000    5.9581876   18.9616123  142.0000000 
##      std.dev     coef.var 
##   11.9163753    0.4413472 
## ------------------------------------------------------------ 
## year1994$Gender: Male
##      nbr.val     nbr.null       nbr.na          min          max        range 
##   26.0000000    0.0000000    0.0000000    1.0000000   53.0000000   52.0000000 
##          sum       median         mean      SE.mean CI.mean.0.95          var 
##  355.0000000   11.0000000   13.6538462    2.0523475    4.2268888  109.5153846 
##      std.dev     coef.var 
##   10.4649598    0.7664478

Insteading checking year one by one, you can create a table by using by_group() and summarise() which is still correct:

bygender = group_by(data2, elected , Gender)
a = summarise(bygender, min = min(all_bills, na.rm = TRUE), max = max(all_bills, na.rm = TRUE), 
          median = median(all_bills, na.rm = TRUE), 
          mean = round(mean(all_bills, na.rm = TRUE),2), 
          "1stQ" = quantile(all_bills, prob=c(.25)), 
          "3rdQ" = quantile(all_bills, prob=c(.75)))

datatable(a, 
          filter = 'top',
          caption = htmltools::tags$caption(     
                    style = 'caption-side: top; text-align: center;',
                    'Table 3: ', htmltools::em('The basic characteristics of responents in congress 110'))
          )

For interactive plot:

d <- ggplot(data2, aes(x= elected, y = all_bills, colour = Gender)) +
  stat_summary(fun.y = mean, geom = "point") +
  stat_summary(fun.y = mean, geom = "line", aes(group = 1)) +
  labs( x= "Elected Year", y = "Mean of bills", title = "The mean value of bills of the members in each elected year") +
  theme_bw() + # theme_bw must be set before theme() to set title in center
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(legend.position='none') +
  facet_wrap(~Gender, as.table = FALSE)  

ggplotly(d)

Drawing box plot to identify median - min - max of these value:

d <- ggplot(data2, aes(x= elected, y = all_bills, colour = Gender)) +
  geom_boxplot()+
  labs( x= "Elected Year", y = "Mean of bills", title = "The mean value of bills of the members in each elected year") +
  theme_bw() + # theme_bw must be set before theme() to set title in center
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(legend.position='none') +
  facet_wrap(~Gender, as.table = FALSE)  

ggplotly(d)

A few comments on interactive box plot and line graph: 1. No value on mean of bills is shown on the interactive graph.
2. A box plot only shows the plot for a few years (not all of the data)

As we can see the value of mean(Female) of 1992 and 1994 are 26.625 and 27 but it can’t be shown on the interactive line graph. In my opinion, these values are supposed to be shown.

Exercise 4

Explain what you are visualizing here: Drawing a box plot illustrate the number of bills (all_law) that the member(female) introduced that became law in congress 110

Put your figure here:

data3 <- filter(cel, congress == 110)
data3 <- data3 %>% 
  mutate(
    Gender = case_when(
      female == 0 ~ "Male",
      female == 1 ~ "Female"))
data3 <- data3 %>% 
  mutate(
    Dem = case_when(
      dem == 0 ~ "Republican",
      dem == 1 ~ "Democrat"))

g <- ggplot(data3, aes(x= Dem, y = all_bills, fill = Dem)) +
  geom_boxplot()  +
  labs( x= "Political view", y = "The number of bills", title = "The number of bills introduced in congress 110") +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(legend.position='none')+
  facet_wrap(~Gender, as.table = FALSE)
g 

ggplotly(g)