######## PART 1 ########
# Installing and loading required packages
if (!require("tidyverse")) install.packages("tidyverse")
if (!require("tidycensus")) install.packages("tidycensus")
if (!require("sf")) install.packages("sf")
if (!require("mapview")) install.packages("mapview")
library(tidyverse)
library(tidycensus)
library(sf)
library(mapview)
# Transmitting API key
census_api_key("71f42f990c13d3e5c61a6c6f723de11b70e679a0")
# Fetching ACS codebooks
DetailedTables <- load_variables(2022, "acs5", cache = TRUE)
SubjectTables <- load_variables(2022, "acs5/subject", cache = TRUE)
ProfileTables <- load_variables(2022, "acs5/profile", cache = TRUE)
# Double checking target variables
ChosenVars <- filter(ProfileTables,name == "DP04_0047P"|
name == "DP02_0001")
print(ChosenVars$name)
## [1] "DP02_0001" "DP04_0047P"
print(ChosenVars$label)
## [1] "Estimate!!HOUSEHOLDS BY TYPE!!Total households"
## [2] "Percent!!HOUSING TENURE!!Occupied housing units!!Renter-occupied"
print(ChosenVars$concept)
## [1] "Selected Social Characteristics in the United States"
## [2] "Selected Housing Characteristics"
# Specifying target variables
VariableList =
c(Renters_ = "DP04_0047P",
Households_ = "DP02_0001")
# Fetching data
p1data <- get_acs(
geography = "county",
state = "TN",
variables = VariableList,
year = 2022,
survey = "acs5",
output = "wide",
geometry = TRUE)
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|==== | 5%
|
|==== | 6%
|
|===== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================= | 24%
|
|================= | 25%
|
|================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|===================== | 31%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|=========================== | 38%
|
|============================ | 40%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|===================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 70%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 79%
|
|======================================================== | 80%
|
|======================================================== | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|================================================================= | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 99%
|
|======================================================================| 100%
# Reformatting data
p1data <-
separate_wider_delim(p1data,
NAME,
delim = ", ",
names = c("County", "State"))
# Filtering data
filtereddata <- p1data %>%
filter(County == "Davidson County"|
County == "Rutherford County"|
County == "Williamson County"|
County == "Cheatham County"|
County == "Robertson County"|
County == "Sumner County"|
County == "Wilson County")
# Plotting data
ggplot(filtereddata, aes(x = Renters_E, y = reorder(County, Renters_E))) +
geom_errorbarh(aes(xmin = Renters_E - Renters_M, xmax = Renters_E + Renters_M)) +
geom_point(size = 3, color = "darkblue") +
theme_minimal(base_size = 12.5) +
labs(title = "Pct. households being rented",
subtitle = "Nashville-area counties. Brackets show error margins.",
x = "2018-2022 ACS estimate",
y = "")
# Mapping data
mapdata <- filtereddata %>%
rename(Renters = Renters_E,
Households = Households_E)
mapdata <- st_as_sf(mapdata)
mapviewOptions(basemaps.color.shuffle = FALSE)
mapview(mapdata, zcol = "Renters",
layer.name = "Pct. being rented",
popup = TRUE)
# Exporting data in .csv format (this is probably unnecessary but better to have & not need)
CSVdata <- st_drop_geometry(mapdata)
write.csv(CSVdata, "p1data.csv", row.names = FALSE)
After running all of this code, the reader can see that household renting was at its highest in Davidson County at 45.8% of households being rented properties. The second highest is Rutherford County at 34.8%, and the lowest is Cheatham County at only 18.8% of households being owned by renters.
######## PART 2 ########
# Install and load tidyverse
if (!require("tidyverse"))
install.packages("tidyverse")
library(tidyverse)
# Read the data
# NOTE: You may edit the URL to load a different dataset
p2data <- read.csv("https://raw.githubusercontent.com/drkblake/Data/main/SocialData.csv")
head(p2data,10)
## ID Type Impressions
## 1 1 Photo 695
## 2 2 Text 940
## 3 3 Photo 1196
## 4 4 Photo 936
## 5 5 Photo 1389
## 6 6 Photo 857
## 7 7 Text 797
## 8 8 Photo 1810
## 9 9 Photo 1086
## 10 10 Video 1416
# Specify the DV and IV
p2data$DV <- p2data$Impressions
p2data$IV <- p2data$Type
# Graph the group distributions and averages
averages <- group_by(p2data, IV) %>%
summarise(mean = mean(DV, na.rm = TRUE))
ggplot(p2data, aes(x = DV)) +
geom_histogram() +
facet_grid(IV ~ .) +
geom_histogram(color = "black", fill = "#1f78b4") +
geom_vline(data = averages, aes(xintercept = mean, ))
# Calculate and show the group counts, means, standard
# deviations, minimums, and maximums
group_by(p2data, IV) %>%
summarise(
count = n(),
mean = mean(DV, na.rm = TRUE),
sd = sd(DV, na.rm = TRUE),
min = min(DV, na.rm = TRUE),
max = max(DV, na.rm = TRUE))
## # A tibble: 3 × 6
## IV count mean sd min max
## <chr> <int> <dbl> <dbl> <int> <int>
## 1 Photo 58 1035. 297. 397 1810
## 2 Text 43 999. 278. 515 1746
## 3 Video 39 1370. 307. 829 1952
options(scipen = 999)
oneway.test(p2data$DV ~ p2data$IV,
var.equal = FALSE)
##
## One-way analysis of means (not assuming equal variances)
##
## data: p2data$DV and p2data$IV
## F = 19.119, num df = 2.000, denom df = 85.525, p-value = 0.000000137
# If the ANOVA detects significant difference, run
# this post-hoc procedure to learn which
# group pairs differed significantly.
anova_1 <- aov(p2data$DV ~ p2data$IV)
TukeyHSD(anova_1)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = p2data$DV ~ p2data$IV)
##
## $`p2data$IV`
## diff lwr upr p adj
## Text-Photo -36.35605 -176.6202 103.9081 0.8126345
## Video-Photo 334.87710 190.5414 479.2128 0.0000005
## Video-Text 371.23315 217.1076 525.3587 0.0000002
Using the ANOVA method of comparing averages, we can see that the video content gets significantly more engagement - on average - than both the photo and the text posts. The biggest difference in engagement lies between the video posts and the text posts, with the videos having a mean engagement that is about 371 interactions higher than the text.
######## PART 3 ########
if (!require("tidyverse")) install.packages("tidyverse")
if (!require("tidytext")) install.packages("tidytext")
library(tidyverse)
library(tidytext)
p3data <- read.csv("https://raw.githubusercontent.com/drkblake/Data/main/WhiteHouse.csv")
tidy_text <- p3data %>%
unnest_tokens(word,Full.Text) %>%
count(word, sort = TRUE)
# Deleting standard stop words
data("stop_words")
tidy_text <- tidy_text %>%
anti_join(stop_words)
my_stopwords <- tibble(word = c("https",
"t.co",
"rt"))
tidy_text <- tidy_text %>%
anti_join(my_stopwords)
view(tidy_text)
While it may be a general answer, the most common theme present is “Domestic Affairs”. Some of the most frequent words used are “jobs”, “families”, “health”, “inflation”, and “care.” When these terms are searched for in the p3data data frame, they’re usually in the context of American matters and not foreign affairs.