install.packages("remotes", repos = "https://cran.r-project.org/")
## 
## The downloaded binary packages are in
##  /var/folders/rz/qgdl4bg53r3__zyx803f25mc0000gn/T//Rtmpq2roCt/downloaded_packages
remotes::install_github("davidycliao/legisTaiwan@0.2.1", force = TRUE)
## 
## ── R CMD build ─────────────────────────────────────────────────────────────────
##      checking for file ‘/private/var/folders/rz/qgdl4bg53r3__zyx803f25mc0000gn/T/Rtmpq2roCt/remotes6d8472c90f8/davidycliao-legisTaiwan-989ef0e/DESCRIPTION’ ...  ✔  checking for file ‘/private/var/folders/rz/qgdl4bg53r3__zyx803f25mc0000gn/T/Rtmpq2roCt/remotes6d8472c90f8/davidycliao-legisTaiwan-989ef0e/DESCRIPTION’
##   ─  preparing ‘legisTaiwan’:
##    checking DESCRIPTION meta-information ...  ✔  checking DESCRIPTION meta-information
##   ─  checking for LF line-endings in source and make files and shell scripts
##   ─  checking for empty or unneeded directories
##   ─  building ‘legisTaiwan_0.2.1.tar.gz’
##      
## 
library("legisTaiwan")
library("ggplot2")
library(dplyr)
library(tidyr)
library(stringr)
library(tidytext)

get_ly_bills()

Distribution of 議案狀態 by 會期

Distribution of 議案狀態 by 提案來源

## Obtain the data by 議案狀態 and 提案來源
bill_origin_count <- final_bills_df %>% 
  group_by(議案狀態, 提案來源) %>% 
  summarise(count = n(), .groups = 'drop') %>% 
  arrange(desc(count))

# Bar plot for 議案狀態 by 提案來源
ggplot(bill_origin_count, aes(x = 議案狀態, y = count, fill = 提案來源)) +
  geom_bar(stat = "identity", position = "stack") +
  labs(
    title = "不同提案來源之議案狀態分佈",
    x = "議案狀態",
    y = "提案數量",
    fill = "提案來源",
  )+
  theme_minimal()+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  theme(text = element_text(family = "Heiti TC Light"))

## Obtain the data from page 1 to page 100 in term 10

民眾黨在第10屆之提案數量by會期

#Calculate data by 會期 and 提案單位
bills_origin_term10 <- bills_origin_term10_normalized %>% 
  group_by(會期, 提案單位) %>% 
  summarise(count = n(), .groups = 'drop') %>% 
  arrange(會期, desc(count))

#Filter keyword with 黨團
bills_origin_term10_with_party <- bills_origin_term10 %>%
  filter(str_detect(提案單位, "黨團"))

#Filter keyword with 本院台灣民眾黨黨團
minzhongdang_term10 <- bills_origin_term10_with_party %>% 
  filter(str_detect(提案單位, "本院台灣民眾黨黨團"))

#Create plots
ggplot(minzhongdang_term10, aes(x = 會期, y = count)) +
  geom_line(color = "blue", size = 1) +  
  geom_point(color = "red", size = 2) +   
  labs(
    title = "台灣民眾黨在不同會期的提案數量變化(第十屆)",
    x = "會期",
    y = "提案數量"
  ) +
  theme_minimal() + 
  theme(text = element_text(family = "Heiti TC Light")) +
  scale_x_continuous(breaks = seq(min(minzhongdang_term10$會期),
                                  max(minzhongdang_term10$會期),
                                  by = 1))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

議案類別 by 會期

# Calculate bill categories by different session and drop NA
bills_cat_term10 <- bills_origin_term10_normalized %>% 
  group_by(會期, 議案類別) %>% 
  summarise(count = n(), .groups = 'drop') %>% 
  filter(議案類別 != "--") %>%
  arrange(會期, desc(count))

# Create plot
ggplot(bills_cat_term10, aes(x = 會期, y = count, fill = 議案類別)) +
  geom_bar(stat = "identity", position = "stack") +
  labs(
    title = "不同會期之立委提出議案類別數量",
    x = "會期",
    y = "議案數量",
    fill = "議案類別"
  ) +
  theme(text = element_text(family = "Heiti TC Light"))+
  scale_x_continuous(breaks = seq(min(bills_origin_term10_with_party$會期, na.rm = TRUE), 
                                  max(bills_origin_term10_with_party$會期, na.rm = TRUE), 
                                  by = 1)) 
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_bar()`).

分析第10屆立委提案中帶有兩岸、中國、大陸等字眼之法案

get_ly_legislators_by_term

Distribution of legislators by party

KMT legislators by different terms

DPP legislators by different terms

get_ly_legislator_cosign_bills()

Get 魯明哲co-signed bills and their status in term 8

Get鍾佳濱co-signed bills and their status in term 8

get_ly_committee_meets()

於國防外交委員會(code= 35)中抓取第一頁資料(100筆)並分析會議種類與數量

meet <- get_ly_committee_meets(35)
## 
## Fetching meetings data for committee ID 35...
##   |                                                                              |                                                                      |   0%  |                                                                              |==============                                                        |  20%  |                                                                              |============================                                          |  40%  |                                                                              |==========================================                            |  60%  |                                                                              |========================================================              |  80%  |                                                                              |======================================================================| 100%
## 
## 
## ====== Retrieved Information ======
## -----------------------------------
## Total Meetings: 737
## Page: 1 of 8
## Records per page: 100
## 
## Meeting Type Distribution:
##  公聽會: 1
##  委員會: 86
##  聯席會議: 9
##  黨團協商: 4
## 
## Session Distribution:
##  Session 1: 40
##  Session 2: 29
##  Session 3: 5
##  Session 7: 11
##  Session 8: 15
## 
## Location Distribution:
##  紅樓301會議室: 97
##  紅樓302會議室: 1
##  群賢樓801會議室: 1
##  群賢樓9樓大禮堂: 1
## ===================================
meet_data <- meet$meetings
meet_data$會議種類 <- as.factor(meet_data$會議種類)

# Create plots: 會議種類與數量
ggplot(meet_data, aes(x = 會議種類)) + 
  geom_bar(fill = "skyblue", color = "black") +  # 填充顏色並加上邊框
  theme_minimal() +  # 使用乾淨的主題
  labs(
    title = "會議種類分佈",  # 圖標題
    x = "會議種類",  # x軸標籤
    y = "頻次"  # y軸標籤
  ) + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  theme(text = element_text(family = "Heiti TC Light"))

於國防外交委員會當中,出席次數前十名的委員並繪製條狀圖

#Clean data
attendees <- meet_data %>%
  separate_rows(出席委員, sep = ",") %>%  
  filter(!is.na(出席委員) & 出席委員 != "")

#Obtain data by 出席委員
attendees_count <- attendees %>% 
  group_by(出席委員) %>% 
  summarise(count = n(), .groups = "drop") %>% 
  arrange(desc(count))

# Grab top 10
top_10_attendees <- attendees_count %>% 
  head(10)

# Create plots: 國防外交委員會中前十名的出席委員
ggplot(top_10_attendees, aes(x = reorder(出席委員, count), y = count)) + 
  geom_bar(stat = "identity", fill = "skyblue", color = "black") + 
  coord_flip() +  
  theme_minimal() +
  labs(
    title = "Top 10 出席委員(國防外交委員會)",
    x = "委員名稱",
    y = "出席次數"
  ) + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  theme(text = element_text(family = "Heiti TC Light"))

於國防外交委員會當中,出席次數大於40次的委員並繪製散點圖

#Clean data
attendees <- meet_data %>%
  separate_rows(出席委員, sep = ",") %>%  # 根據 "、" 進行分割
  filter(!is.na(出席委員) & 出席委員 != "")

#Obtain data by 出席次數大於40次的立委
attendees_count_40 <- attendees %>% 
  group_by(出席委員) %>% 
  summarise(count = n(), .groups = "drop") %>% 
  filter(count > 40) %>% 
  arrange(count) 

#Create plots: 立委與出席次數
ggplot(attendees_count_40, aes(x = 出席委員, y = count)) +
  geom_point(aes(color = 出席委員)) +
  geom_text(aes(label = 出席委員), vjust = -0.5, size = 3, check_overlap = TRUE, 
             family = "Heiti TC Light") +  # 確保這裡也設置了字體
  theme_minimal() +
  labs(
    title = "立法委員與出席次數",
    x = "立法委員",
    y = "出席次數"
  ) +
  theme(axis.text.x = element_blank(),
        legend.title = element_text(family = "Heiti TC Light"),  # 確保圖例標題也使用中文字體
        legend.text = element_text(family = "Heiti TC Light"),
        plot.title = element_text(family = "Heiti TC Light"),
        axis.title.x = element_text(family = "Heiti TC Light"),
        axis.title.y = element_text(family = "Heiti TC Light")) 

get_ly_interpellations()、有網址

第八屆立委中,前十位質詢次數的有哪些立委?(抓取前100頁資料)

第八屆立委當中,質詢當中最常出現的關鍵字前十名

#Filter key words in interpellations
duty_words <- final_interpellations_df %>% 
  unnest_tokens(word, description) %>%
  count(word, sort = TRUE) %>%
  filter(nchar(word) > 1)

#View the data
top_duty_words <- head(duty_words, 10)

#Creating plots with keywords frequency in interperllation
ggplot(top_duty_words, aes(x = reorder(word, n), y = n)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  labs(title = "Frequency of Keywords in Interpellation Descriptions", 
       x = "Keyword", y = "Frequency") +
  theme_minimal() +
  coord_flip()+
  theme(text = element_text(family = "Heiti TC Light"))

按會期和立法者來分質詢次數:

Top ten interperlation by legislators from session 1 to 8 in term 8

#Clean data
inter_by_legislators <- final_interpellations_df %>%
  separate_rows(legislators, sep = ",") %>%
  group_by(legislators, sessionTimes) %>%
  summarise(count = n(), .groups = 'drop') %>%
  arrange(sessionTimes, desc(count))

# This code groups the data by 'legislators', sums the 'count' for each legislator, 
# sorts the results in descending order by total count, and selects the top 5 legislators.
top_5_legislators <- inter_by_legislators %>% 
  group_by(legislators) %>% 
  summarise(total_count = sum(count)) %>% 
  arrange(desc(total_count)) %>% 
  slice_head(n = 5)

# Merge the original dataset with top 10 egislators we filtered
filtered_data <- inter_by_legislators %>%
  filter(legislators %in% top_5_legislators$legislators)

# Convert the type of sessionTimes
filtered_data$sessionTimes <- as.numeric(filtered_data$sessionTimes)

# Create plots: 
ggplot(filtered_data, aes(x = sessionTimes, y = count, 
                                 group = legislators, color = legislators)) +
  geom_line() + 
  labs(title = "各會期中前五名立法者的質詢次數趨勢(第八屆)",
       x = "會期",
       y = "質詢次數") +
  theme_minimal() +
  scale_x_continuous(breaks = seq(min(filtered_data$sessionTimes), 
                                  max(filtered_data$sessionTimes), 
                                  by = 1)) +
  theme(axis.text.x = element_blank(),
        legend.title = element_text(family = "Heiti TC Light"),  # 確保圖例標題也使用中文字體
        legend.text = element_text(family = "Heiti TC Light"),
        plot.title = element_text(family = "Heiti TC Light"),
        axis.title.x = element_text(family = "Heiti TC Light"),
        axis.title.y = element_text(family = "Heiti TC Light")) 

透過舊的functions(\(get\_{bills()}\))抓資料

#bill_all <- get_bills(start_date = 1130201, end_date = 1140118, verbose = TRUE)
#bill_data <- bill_all$data

#bill_data_out <- head(bill_data, 50)

#bill_term11 <- get_bills_2(term = 11, session_period = 2, verbose = TRUE)
#bill_term11_data <- bill_term11$data

#target_bills <- c(
  #"離島建設條例第八條條文修正草案",
  #"臺灣地區與大陸地區人民關係條例",
  #"財政收支劃分法",
  #"公職人員選舉罷免法",
  #"核子反應器設施管制法",
  #"政黨及其附隨組織不當取得財產處理條例",
  #"憲法訴訟法",
 # "國道六號東延花蓮建設特別條例草案",
  #"警察人員人事條例"
#)

#filtered_df <- bill_data[grep(paste(target_bills, collapse = "|"), bill_data$billName), ]
#write.csv(filtered_df, "bill_data.csv", row.names = FALSE, fileEncoding = "UTF-8")

這部分我在跑的時候\(get\_bills()\)突然無法knit,他報錯是URL的問題,可能你檢查一下。 不過資料的部分我已經存成bill_data.csv在我們的dropbox當中了