legisData <- read.csv("~/Downloads/AntiLGBTQData.csv", fileEncoding = "UTF-8")
# Remove the period from the 'ISSUE.' column
colnames(legisData)[colnames(legisData) == "ISSUE."] <- "ISSUE"
# Check for missing values in important columns
anyNA(legisData$State)
[1] FALSE
anyNA(legisData$Bill)
[1] FALSE
anyNA(legisData$ISSUE)
[1] FALSE
anyNA(legisData$Status)
[1] FALSE
anyNA(legisData$Status.Detail)
[1] FALSE
Status Info
set.seed(789)
# Remove duplicate bills based on the Bill column (only keep unique bills)
uniqueLegisData <- legisData %>% distinct(Bill, .keep_all = TRUE)
# Create a table of unique bill statuses
statusTable <- table(uniqueLegisData$Status)
# Generate a pie chart with labels showing both count and percentage for unique bills
pie(statusTable,
labels = paste(names(statusTable), "\n", statusTable, "(", round(statusTable/sum(statusTable)*100, 1), "%)", sep = ""),
main = "Status of Unique Bills",
col = rainbow(length(statusTable)))
statusTable
Advancing Defeated Introduced Passed into Law
327 64 5 23
Summary Stats: Bills/State Frequency
legisData
stateCounts <- as.numeric(stateTable)
summary(stateCounts)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.000 3.000 6.000 9.886 12.000 51.000
which.max(stateCounts)
[1] 39
names(stateTable)[which.max(stateCounts)]
[1] "Texas"
which.min(stateCounts)
[1] 4
names(stateTable)[which.min(stateCounts)]
[1] "California"
boxplot(stateCounts, main = "Boxplot of Bills per State", ylab = "Number of Bills")
hist(stateCounts, main = "Histogram of Bills per State", xlab = "Number of Bills", col = "lightblue", breaks = 10)
House v Senate
legisData$Chamber <- ifelse(grepl("\\bAB\\b|\\bHF\\b|\\bHB\\b|\\HSB\\b", legisData$Bill, ignore.case = TRUE), "House",
ifelse(grepl("\\bSF\\b|\\bSB\\b|\\S\\b|\\SSB\\b", legisData$Bill, ignore.case = TRUE), "Senate", "Unknown"))
# Create the table of Senate vs. House bills (no "Unknown" values)
chamberTable <- table(legisData$Chamber)
# Display the table
chamberTable
House Senate
214 221
# Pie chart for Senate vs. House bills
pie(chamberTable, col = c("lightpink", "lightgreen"),
main = "Bills by Chamber (Senate vs. House)")
# Assign 'House' or 'Senate' based on bill title (HB, AB, HF for House; SB, SF for Senate)
Issues Stats
unique(legisData$`ISSUEÂ `)
names(legisData)[grepl("ISSUE", names(legisData))]
names(legisData)[grepl("ISSUE", names(legisData))] <- "Issue"
library(dplyr)
library(tidyr)
legisData_clean <- legisData %>%
separate_rows(Issue, sep = ",|/|;|\\n") %>% # separate by common delimiters
mutate(Issue = trimws(Issue))
head(legisData_clean$Issue)
issueTable <- table(legisData_clean$Issue)
issueTable
issueCounts <- table(legisData_clean$Issue)
names(legisData)
par(mar = c(10, 4, 4, 2))
barplot(issueTable,
las = 3,
col = "lightblue",
main = "Frequency of Bill Issues",
ylab = "Number of Bills",
xlab = "Issue",
cex.names = 0.5)
legisData
Dividing States by Party
redStates <- c("Alaska", "Arizona", "Arkansas", "Florida", "Georgia", "Idaho", "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana", "Mississippi", "Missouri", "Montana", "New Hampshire", "North Carolina", "North Dakota", "Ohio", "Oklahoma", "South Carolina", "South Dakota", "Tennessee", "Texas", "Utah", "West Virginia", "Wyoming")
blueStates <- c("California", "Colorado", "Connecticut", "Hawaii", "Maine", "Maryland", "Massachusetts", "Nevada", "New Jersey", "New Mexico", "Oregon", "Rhode Island", "Virginia", "Washington")
purpleStates <- c("Michigan", "Minnesota", "Nebraska", "Pennsylvania")
legisData$StateColor <- ifelse(legisData$State %in% redStates, "Red",
ifelse(legisData$State %in% blueStates, "Blue",
ifelse(legisData$State %in% purpleStates, "Purple", "Unknown")))
colorTable <- table(legisData$StateColor)
colorTable
Blue Purple Red
47 16 372
pie(colorTable, col = c("blue", "purple", "red"),
main = "Bills by State Color")
NA
NA
political affiliation testing
uniqueLegisData <- legisData %>% distinct(Bill, .keep_all = TRUE)
# Check if 'StateColor' exists in the dataset
colnames(uniqueLegisData)
[1] "State" "Bill" "ISSUE" "Status" "Status.Detail" "Chamber" "StateColor"
billsPerState <- table(uniqueLegisData$State)
billsData <- data.frame(State = names(billsPerState), Bills = as.vector(billsPerState))
billsData$StateColor <- uniqueLegisData$StateColor[match(billsData$State, uniqueLegisData$State)]
anovaResult <- aov(Bills ~ StateColor, data = billsData)
summary(anovaResult)
Df Sum Sq Mean Sq F value Pr(>F)
StateColor 2 1148 574.0 6.182 0.0045 **
Residuals 41 3807 92.9
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
tukeyResult <- TukeyHSD(anovaResult)
tukeyResult$StateColor
diff lwr upr p adj
Purple-Blue 0.7857143 -12.498669 14.07010 0.98866437
Red-Blue 10.5549451 2.787495 18.32240 0.00551238
Red-Purple 9.7692308 -2.815477 22.35394 0.15511047
plot(tukeyResult)
status v color
library(dplyr)
uniqueLegisData <- legisData %>% distinct(Bill, .keep_all = TRUE)
statusColorTable <- table(uniqueLegisData$StateColor, uniqueLegisData$Status)
statusColorTable
Advancing Defeated Introduced Passed into Law
Blue 29 14 2 0
Purple 15 0 1 0
Red 283 50 2 23
chisq.test(statusColorTable)
Warning in chisq.test(statusColorTable) :
Chi-squared approximation may be incorrect
Pearson's Chi-squared test
data: statusColorTable
X-squared = 24.394, df = 6, p-value = 0.0004419
fisher.test(statusColorTable)
Fisher's Exact Test for Count Data
data: statusColorTable
p-value = 0.0007295
alternative hypothesis: two.sided