Part 1: Mapping

######## PART 1 ########

# Installing and loading required packages

if (!require("tidyverse")) install.packages("tidyverse")
if (!require("tidycensus")) install.packages("tidycensus")
if (!require("sf")) install.packages("sf")
if (!require("mapview")) install.packages("mapview")

library(tidyverse)
library(tidycensus)
library(sf)
library(mapview)

# Transmitting API key

census_api_key("71f42f990c13d3e5c61a6c6f723de11b70e679a0")

# Fetching ACS codebooks

DetailedTables <- load_variables(2022, "acs5", cache = TRUE)
SubjectTables <- load_variables(2022, "acs5/subject", cache = TRUE)
ProfileTables <- load_variables(2022, "acs5/profile", cache = TRUE)

# Double checking target variables

ChosenVars <- filter(ProfileTables,name == "DP04_0047P"|
                       name == "DP02_0001")
print(ChosenVars$name)

## [1] "DP02_0001"  "DP04_0047P"

print(ChosenVars$label)

## [1] "Estimate!!HOUSEHOLDS BY TYPE!!Total households"                  
## [2] "Percent!!HOUSING TENURE!!Occupied housing units!!Renter-occupied"

print(ChosenVars$concept)

## [1] "Selected Social Characteristics in the United States"
## [2] "Selected Housing Characteristics"

# Specifying target variables

VariableList = 
  c(Renters_ = "DP04_0047P",
    Households_ = "DP02_0001")

# Fetching data

p1data <- get_acs(
  geography = "county",
  state = "TN",
  variables = VariableList,
  year = 2022,
  survey = "acs5",
  output = "wide",
  geometry = TRUE)

## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |                                                                      |   1%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |==============                                                        |  21%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=====================                                                 |  31%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |========================================================              |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================|  99%
  |                                                                            
  |======================================================================| 100%

# Reformatting data

p1data <-
  separate_wider_delim(p1data,
                       NAME,
                       delim = ", ",
                       names = c("County", "State"))

# Filtering data

filtereddata <- p1data %>% 
  filter(County == "Davidson County"|
           County == "Rutherford County"|
           County == "Williamson County"|
           County == "Cheatham County"|
           County == "Robertson County"|
           County == "Sumner County"|
           County == "Wilson County")

# Plotting data

ggplot(filtereddata, aes(x = Renters_E, y = reorder(County, Renters_E))) + 
  geom_errorbarh(aes(xmin = Renters_E - Renters_M, xmax = Renters_E + Renters_M)) + 
  geom_point(size = 3, color = "darkblue") + 
  theme_minimal(base_size = 12.5) + 
  labs(title = "Pct. households being rented", 
       subtitle = "Nashville-area counties. Brackets show error margins.", 
       x = "2018-2022 ACS estimate", 
       y = "")

# Mapping data

mapdata <- filtereddata %>% 
  rename(Renters = Renters_E,
         Households = Households_E)

mapdata <- st_as_sf(mapdata)

mapviewOptions(basemaps.color.shuffle = FALSE)
mapview(mapdata, zcol = "Renters",
        layer.name = "Pct. being rented",
        popup = TRUE)

# Exporting data in .csv format (this is probably unnecessary but better to have & not need)

CSVdata <- st_drop_geometry(mapdata)
write.csv(CSVdata, "p1data.csv", row.names = FALSE)

After running all of this code, the reader can see that household renting was at its highest in Davidson County at 45.8% of households being rented properties. The second highest is Rutherford County at 34.8%, and the lowest is Cheatham County at only 18.8% of households being owned by renters.

Part 2: Statistics

######## PART 2 ########

# Install and load tidyverse
if (!require("tidyverse"))
  install.packages("tidyverse")
library(tidyverse)

# Read the data
# NOTE: You may edit the URL to load a different dataset

p2data <- read.csv("https://raw.githubusercontent.com/drkblake/Data/main/SocialData.csv")
head(p2data,10)

##    ID  Type Impressions
## 1   1 Photo         695
## 2   2  Text         940
## 3   3 Photo        1196
## 4   4 Photo         936
## 5   5 Photo        1389
## 6   6 Photo         857
## 7   7  Text         797
## 8   8 Photo        1810
## 9   9 Photo        1086
## 10 10 Video        1416

# Specify the DV and IV
p2data$DV <- p2data$Impressions
p2data$IV <- p2data$Type

# Graph the group distributions and averages
averages <- group_by(p2data, IV) %>%
  summarise(mean = mean(DV, na.rm = TRUE))
ggplot(p2data, aes(x = DV)) +
  geom_histogram() +
  facet_grid(IV ~ .) +
  geom_histogram(color = "black", fill = "#1f78b4") +
  geom_vline(data = averages, aes(xintercept = mean, ))

# Calculate and show the group counts, means, standard
# deviations, minimums, and maximums
group_by(p2data, IV) %>%
  summarise(
    count = n(),
    mean = mean(DV, na.rm = TRUE),
    sd = sd(DV, na.rm = TRUE),
    min = min(DV, na.rm = TRUE),
    max = max(DV, na.rm = TRUE))

## # A tibble: 3 × 6
##   IV    count  mean    sd   min   max
##   <chr> <int> <dbl> <dbl> <int> <int>
## 1 Photo    58 1035.  297.   397  1810
## 2 Text     43  999.  278.   515  1746
## 3 Video    39 1370.  307.   829  1952

options(scipen = 999)
oneway.test(p2data$DV ~ p2data$IV,
            var.equal = FALSE)

## 
##  One-way analysis of means (not assuming equal variances)
## 
## data:  p2data$DV and p2data$IV
## F = 19.119, num df = 2.000, denom df = 85.525, p-value = 0.000000137

# If the ANOVA detects significant difference, run
# this post-hoc procedure to learn which
# group pairs differed significantly.

anova_1 <- aov(p2data$DV ~ p2data$IV)
TukeyHSD(anova_1)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = p2data$DV ~ p2data$IV)
## 
## $`p2data$IV`
##                  diff       lwr      upr     p adj
## Text-Photo  -36.35605 -176.6202 103.9081 0.8126345
## Video-Photo 334.87710  190.5414 479.2128 0.0000005
## Video-Text  371.23315  217.1076 525.3587 0.0000002

Using the ANOVA method of comparing averages, we can see that the video content gets significantly more engagement - on average - than both the photo and the text posts. The biggest difference in engagement lies between the video posts and the text posts, with the videos having a mean engagement that is about 371 interactions higher than the text.

Part 3: Text Analysis

######## PART 3 ########

if (!require("tidyverse")) install.packages("tidyverse")
if (!require("tidytext")) install.packages("tidytext")

library(tidyverse)
library(tidytext)

p3data <- read.csv("https://raw.githubusercontent.com/drkblake/Data/main/WhiteHouse.csv")

tidy_text <- p3data %>% 
  unnest_tokens(word,Full.Text) %>% 
  count(word, sort = TRUE)

# Deleting standard stop words
data("stop_words")
tidy_text <- tidy_text %>%
  anti_join(stop_words)

my_stopwords <- tibble(word = c("https",
                                "t.co",
                                "rt"))
tidy_text <- tidy_text %>% 
  anti_join(my_stopwords)

view(tidy_text)

While it may be a general answer, the most common theme present is “Domestic Affairs”. Some of the most frequent words used are “jobs”, “families”, “health”, “inflation”, and “care.” When these terms are searched for in the p3data data frame, they’re usually in the context of American matters and not foreign affairs.

Final Exam Lab

Jason Noble

2024-04-22

Part 1: Mapping

Part 2: Statistics

Part 3: Text Analysis