rm(list=ls()) # I like to use this to clear my global environment
import <- read.csv('/Users/aakashupraity/Desktop/owsfinal2021/rawdata.csv', header=TRUE, stringsAsFactors = FALSE)
library(dplyr)
library(lubridate)
colnames(import)[colnames(import)=="DATE.PUB...MM.DD.YYYY."] <-"when" #Date article was published
colnames(import)[colnames(import)=="NEWSPAPER"] <-"paper" #Name of publication
colnames(import)[colnames(import)=="TITLE"] <-"article" #Title of article
colnames(import)[colnames(import)=="CITATION"] <-"cit" #Citation
colnames(import)[colnames(import)=="PUB..LOCATION"] <-"place" #Location of publishing house
colnames(import)[colnames(import)=="FULL.CLEANED.TEXT"] <-"edited" #Edited article text
colnames(import)[colnames(import)=="FILE.NAME"] <-"code" #Article identifier
import <- janitor::remove_empty(import, which = "cols") #removes empty columns
import$when <- lubridate:: mdy(import$when) #categorize data as formatted date
data <- import %>%
dplyr::select(when, place, paper, article, cit, code, edited) %>%
group_by(article)
str(data)
## tibble [2,431 × 7] (S3: grouped_df/tbl_df/tbl/data.frame)
## $ when : Date[1:2431], format: "2001-05-02" "2004-04-07" ...
## $ place : chr [1:2431] "Dallas" "Dallas" "Portland" "Portland" ...
## $ paper : chr [1:2431] "Polk County Itemizer-Observer" "Polk County Itemizer-Observer" "Willamette Weekly" "Willamette Weekly" ...
## $ article: chr [1:2431] "Water quality focus of advisory council" "Just what is the Luckiamute Watershed Council? Polk County Itemizer-Observer" "Something in the Water" "\"Witchcraft\" For Bureaucrats" ...
## $ cit : chr [1:2431] "Water quality focus of advisory council. (2001, May 2).Polk County Itemizer-Observer. Retrieved from http://www"| __truncated__ "Just what is the Luckiamute Watershed Council? Polk County Itemizer-Observer. (2004, April 7).Polk County Itemi"| __truncated__ "Bundick, A. N. (2005, March 29). Something in the Water. Willamette Week. Retrieved from http://www.wweek.com/p"| __truncated__ "Brosy, A. (2006, August 15). \"Witchcraft\" For Bureaucrats. Willamette Weekly. Retrieved from http://www.wweek"| __truncated__ ...
## $ code : chr [1:2431] "DA012PC122" "DA042PC123" "PO051WW043" "PO063WW028" ...
## $ edited : chr [1:2431] "Water quality and availability has been in the headlines a lot lately, but as most rural residents know, water "| __truncated__ "The Luckiamute Watershed Council works to improve water quality for humans, fish and wildlife.The Luckiamute Wa"| __truncated__ "Portlanders have voted down fluoridation three times, but now the state Legislature may force it down our throa"| __truncated__ "Dick Torpey squints at the hot summer sky and slowly walks across a parking lot with two thin, yard-long rods h"| __truncated__ ...
## - attr(*, "groups")= tibble [2,408 × 2] (S3: tbl_df/tbl/data.frame)
## ..$ article: chr [1:2408] " ‘Polluted by Money’ series underscores our commitment to journalism in the public interest (column)" " Portland restaurants scramble after Friday's boil water notice." "‘Affordability concerns’: Costly arsenic solution will trickle down to city’s residents" "‘Carbon pollution isn’t free’: How Oregon can cap, trade, reinvest" ...
## ..$ .rows : list<int> [1:2408]
## .. ..$ : int 1946
## .. ..$ : int 113
## .. ..$ : int 1684
## .. ..$ : int 804
## .. ..$ : int 755
## .. ..$ : int 2050
## .. ..$ : int 664
## .. ..$ : int 1457
## .. ..$ : int 306
## .. ..$ : int 1780
## .. ..$ : int 41
## .. ..$ : int 2157
## .. ..$ : int 2055
## .. ..$ : int 2065
## .. ..$ : int 4
## .. ..$ : int 819
## .. ..$ : int 1343
## .. ..$ : int 204
## .. ..$ : int 611
## .. ..$ : int 133
## .. ..$ : int 651
## .. ..$ : int 636
## .. ..$ : int 352
## .. ..$ : int 403
## .. ..$ : int 181
## .. ..$ : int 610
## .. ..$ : int 1065
## .. ..$ : int 224
## .. ..$ : int 2409
## .. ..$ : int 726
## .. ..$ : int 1034
## .. ..$ : int 1851
## .. ..$ : int 922
## .. ..$ : int 706
## .. ..$ : int 893
## .. ..$ : int 1507
## .. ..$ : int 1030
## .. ..$ : int 465
## .. ..$ : int 1654
## .. ..$ : int 149
## .. ..$ : int 1696
## .. ..$ : int 483
## .. ..$ : int 980
## .. ..$ : int 952
## .. ..$ : int 1080
## .. ..$ : int 738
## .. ..$ : int 2395
## .. ..$ : int 540
## .. ..$ : int 1231
## .. ..$ : int 2031
## .. ..$ : int 277
## .. ..$ : int 155
## .. ..$ : int 1858
## .. ..$ : int 2096
## .. ..$ : int 992
## .. ..$ : int 2027
## .. ..$ : int 76
## .. ..$ : int 1557
## .. ..$ : int 1818
## .. ..$ : int 1009
## .. ..$ : int 688
## .. ..$ : int 191
## .. ..$ : int 1887
## .. ..$ : int 1513
## .. ..$ : int 449
## .. ..$ : int 568
## .. ..$ : int 2415
## .. ..$ : int 1239
## .. ..$ : int 515
## .. ..$ : int 2057
## .. ..$ : int 1484
## .. ..$ : int 2052
## .. ..$ : int 107
## .. ..$ : int 1765
## .. ..$ : int 1961
## .. ..$ : int 185
## .. ..$ : int 564
## .. ..$ : int 1769
## .. ..$ : int 1168
## .. ..$ : int 2136
## .. ..$ : int 1438
## .. ..$ : int 358
## .. ..$ : int 460
## .. ..$ : int 459
## .. ..$ : int 1933
## .. ..$ : int 1313
## .. ..$ : int 2040
## .. ..$ : int 775
## .. ..$ : int 690
## .. ..$ : int 123
## .. ..$ : int 752
## .. ..$ : int 1963
## .. ..$ : int 1190
## .. ..$ : int 578
## .. ..$ : int 2162
## .. ..$ : int 732
## .. ..$ : int 1638
## .. ..$ : int 2266
## .. ..$ : int 895
## .. .. [list output truncated]
## .. ..@ ptype: int(0)
## ..- attr(*, ".drop")= logi TRUE
library(tidytext)
library(tm)
safety<- c("Health","Quality","Chemical","Lead","Hazardous","Test","Tests","Temperature","Chemicals","hazards","Testing","kit","Testing","kits","advisory","Notices","Notice","lead","mercury","nitrates","cyanobacteria","arsenic","nitrate","cyanobacterium","Quality","safety","safe","dangerous","recommendations","regulations","recommendation","regulation","fish","advisories","boil","protected","clean","recreation","chlorine","chloromine","estuarine","habitat","pollution","contamination","oil","spill","cleanup","botulism","clear","organisms","bacteria","E.","Coli","virus","parasite","toxic","algae","blooms","salmon","sewage","steelhead","tainted","turbid","turbidity","taste")
affordability<- c("Fund","Funds","Cost","Costs","Low","Income","Vulnerable","Bills","Tax","Bill","Taxes","rate","rates","relief","payments","moratorium","funding")
reliability<- c("Shortages","Shortage","aquifer","precipitation","renovation","culvert","renovations","repair","dam","source","lines","pipe","sewer","recharge","supply","tide","gate","tide","gates","repairs","infrastructure","dams","pipes","sewers","storage","retention","construction","irrigation","restoration","drainage","levee","dike","well","wells","damage","damages","damaged","overflow","combined","sewer","overflow","leak","leaks","leaky","plumbing","utility","irrigation","canals","king","tides","water","rights","recycled","desalination","reclamation","delivery","takings","permit","conserve","conservation","transfers","upstream","downstream","fire","wastewater","drinking","water","main","water","main","break","reservoir")
availability<- c("drought","droughts","flood","floods","scarcity","precipitation","rain","snow","snowmelt","rainwater","groundwater","outflow","ice","tsunami","receding","recede","snowpack","storms")
Oregon_stop_words <- bind_rows(tibble(word = c("water","Oregon", "city", "city's", "Bend", "Baker", "warm","springs", "oregon", "Portland", "portland", "it's", "Vale", "we're", "vale", "bend"), #function to automatically add upper and lower cases?
lexicon = c("custom")),
stop_words)
tidyarticles <- data %>%
filter(when > "2015-12-31") %>%
group_by(article, place, when)
tidyarticles$OWRD <- "UNASSIGNED"
tidyarticles$OWRD[grep("Portland", tidyarticles$place)] <- "North West Region"
tidyarticles$OWRD[grep("Astoria", tidyarticles$place)] <- "North West Region"
tidyarticles$OWRD[grep("Clatskanie", tidyarticles$place)] <- "North West Region"
tidyarticles$OWRD[grep("Salem", tidyarticles$place)] <- "North West Region"
tidyarticles$OWRD[grep("Tillamook", tidyarticles$place)] <- "North West Region"
tidyarticles$OWRD[grep("Dallas", tidyarticles$place)] <- "North West Region"
tidyarticles$OWRD[grep("Newport", tidyarticles$place)] <- "North West Region"
tidyarticles$OWRD[grep("Baker City", tidyarticles$place)] <- "East Region"
tidyarticles$OWRD[grep("Burns", tidyarticles$place)] <- "East Region"
tidyarticles$OWRD[grep("Ontario", tidyarticles$place)] <- "East Region"
tidyarticles$OWRD[grep("Vale", tidyarticles$place)] <- "East Region"
tidyarticles$OWRD[grep("La Grande", tidyarticles$place)] <- "East Region"
tidyarticles$OWRD[grep("Hood River", tidyarticles$place)] <- "North Central Region"
tidyarticles$OWRD[grep("Pendleton", tidyarticles$place)] <- "North Central Region"
tidyarticles$OWRD[grep("The Dalles", tidyarticles$place)] <- "North Central Region"
tidyarticles$OWRD[grep("Bend", tidyarticles$place)] <- "South Central Region"
tidyarticles$OWRD[grep("Klamath Falls", tidyarticles$place)] <- "South Central Region"
tidyarticles$OWRD[grep("Lakeview", tidyarticles$place)] <- "South Central Region"
tidyarticles$OWRD[grep("Sisters", tidyarticles$place)] <- "South Central Region"
tidyarticles$OWRD[grep("Warm Springs", tidyarticles$place)] <- "South Central Region"
tidyarticles$OWRD[grep("Brookings", tidyarticles$place)] <- "South West Region"
tidyarticles$OWRD[grep("Medford", tidyarticles$place)] <- "South West Region"
tidyarticles$OWRD[grep("Roseburg", tidyarticles$place)] <- "South West Region"
tidyarticles$OWRD[grep("Coos Bay", tidyarticles$place)] <- "South West Region"
tokenarticles <- tidyarticles %>%
group_by(article, place, when, OWRD) %>%
unnest_tokens(word,edited)%>%
filter(when > "2015-12-31") %>%
anti_join(stop_words) %>%
anti_join(Oregon_stop_words)
tidyarticles %>%
group_by(place) %>%
distinct(article) %>%
count() %>%
arrange(desc(n)) %>%
print.data.frame()
## place n
## 1 Salem 251
## 2 Portland 245
## 3 Newport 184
## 4 Klamath Falls 176
## 5 Bend 128
## 6 Coos Bay 128
## 7 Pendleton 122
## 8 Tillamook 102
## 9 Roseburg 100
## 10 Ontario 97
## 11 Burns 68
## 12 Hood River 58
## 13 Warm Springs 56
## 14 Baker City 39
## 15 The Dalles 34
## 16 Clatskanie 31
## 17 Dallas 28
## 18 Sisters 24
## 19 Brookings 20
## 20 La Grande 20
## 21 Medford 20
## 22 Vale 20
## 23 Astoria 10
## 24 Lakeview 9
tidyarticles %>%
group_by(OWRD) %>%
distinct(article) %>%
count() %>%
arrange(desc(n)) %>%
print.data.frame()
## OWRD n
## 1 North West Region 851
## 2 South Central Region 393
## 3 South West Region 268
## 4 East Region 244
## 5 North Central Region 214
library(tidyr)
bing <- tokenarticles %>%
inner_join(get_sentiments("bing")) %>% # I'm telling R to join a column of bing sentiment values to my dataset
count(place, OWRD, when, article, sentiment) %>% # creating a count column of my articles and their cumulative sentiments
spread(sentiment, n, fill=0) %>% # splitting that column based on the +/- sentiments...
mutate(sentiment = positive - negative) %>% #...to analyze them again
rename(bingraw = sentiment) %>% # and now renaming
dplyr::select(-positive, -negative) #and tidying
head(bing)%>%
arrange(desc(when))
## # A tibble: 6 x 5
## # Groups: article, place, when, OWRD [6]
## article place when OWRD bingraw
## <chr> <chr> <date> <chr> <dbl>
## 1 "‘Extreme draining’ of Oregon reservoi… Portla… 2019-05-21 North West… -10
## 2 " ‘Polluted by Money’ series underscor… Portla… 2019-03-23 North West… 7
## 3 "‘Affordability concerns’: Costly arse… Ontario 2018-10-09 East Region -5
## 4 "‘Carbon pollution isn’t free’: How Or… Portla… 2017-03-09 North West… 19
## 5 "‘Connecting Past to Future’: Tribal l… Hood R… 2017-01-13 North Cent… 0
## 6 "‘Hotter, earlier, longer’: Groups pla… Hood R… 2016-08-27 North Cent… 0
afinn <- tokenarticles %>%
inner_join(get_sentiments("afinn")) %>%
group_by(place, OWRD, when, article, value) %>%
summarise(afinnraw = sum(value)) %>% # Summarizing my article Afinn scores slightly differently here
drop_na() %>%
summarise(afinnraw = sum(afinnraw))
head(afinn)
## # A tibble: 6 x 5
## # Groups: place, OWRD, when [6]
## place OWRD when article afinnraw
## <chr> <chr> <date> <chr> <dbl>
## 1 Astor… North West… 2016-06-21 Astoria city dam likely to survive qua… -9
## 2 Astor… North West… 2016-06-22 Stormwater projects top of the list in… 15
## 3 Astor… North West… 2017-12-21 Commercial Crabbing to Start in January 0
## 4 Astor… North West… 2018-01-09 Oregon transportation workers spray it… -18
## 5 Astor… North West… 2018-01-12 Salmon are losing their genetic divers… -3
## 6 Astor… North West… 2018-01-15 Knappa Water Association flushing wate… -1
duolex <- data.frame(inner_join(afinn, bing))
duosent <- duolex %>%
gather(key= "sentiment", value = "scores", -c(place, OWRD, when, article)) #alternatively, use pivot_longer to grab just 1 key-value pair
head(duosent)
## place OWRD when
## 1 Astoria North West Region 2016-06-21
## 2 Astoria North West Region 2016-06-22
## 3 Astoria North West Region 2017-12-21
## 4 Astoria North West Region 2018-01-09
## 5 Astoria North West Region 2018-01-12
## 6 Astoria North West Region 2018-01-15
## article
## 1 Astoria city dam likely to survive quake
## 2 Stormwater projects top of the list in Port of Astoria budget
## 3 Commercial Crabbing to Start in January
## 4 Oregon transportation workers spray it safe on Clatsop County highways in winter
## 5 Salmon are losing their genetic diversity
## 6 Knappa Water Association flushing water mains
## sentiment scores
## 1 afinnraw -9
## 2 afinnraw 15
## 3 afinnraw 0
## 4 afinnraw -18
## 5 afinnraw -3
## 6 afinnraw -1
library(scales)
duosent$abscores <- abs(duosent$scores) # created a new column of absolute sentiment values
duosent$perc <- rescale(duosent$scores, to=c(0,100)) # creating a new column of normalized scores - converting the entire range of sentiment scores to a 0-100 scale
duosent$overall <- ifelse(duosent$scores >0, "positive", "negative") #and yet another column of another variable
head(duosent)
## place OWRD when
## 1 Astoria North West Region 2016-06-21
## 2 Astoria North West Region 2016-06-22
## 3 Astoria North West Region 2017-12-21
## 4 Astoria North West Region 2018-01-09
## 5 Astoria North West Region 2018-01-12
## 6 Astoria North West Region 2018-01-15
## article
## 1 Astoria city dam likely to survive quake
## 2 Stormwater projects top of the list in Port of Astoria budget
## 3 Commercial Crabbing to Start in January
## 4 Oregon transportation workers spray it safe on Clatsop County highways in winter
## 5 Salmon are losing their genetic diversity
## 6 Knappa Water Association flushing water mains
## sentiment scores abscores perc overall
## 1 afinnraw -9 9 48.73646 negative
## 2 afinnraw 15 15 57.40072 positive
## 3 afinnraw 0 0 51.98556 negative
## 4 afinnraw -18 18 45.48736 negative
## 5 afinnraw -3 3 50.90253 negative
## 6 afinnraw -1 1 51.62455 negative
library(ggplot2)
plotduosent <- ggplot(duosent, aes(x=when, y=scores, color=overall)) +
geom_jitter()+
geom_smooth(color="black", linetype ="dashed") +
xlab("") #I'm hiding this axis on purpose!
plotduosent+
theme_minimal()
whereplot <- ggplot(duosent, aes(x=place, y=scores))
whenplot <- ggplot(duosent, aes(x=when,y=scores))
whereplot+
# geom_bar(stat='identity', aes(levels(factor(fill=duosent$overall))), position="dodge")+ # I like to write out the code of the tasks I'm trying to accomplish even if the syntax isn't correct
geom_bar(stat = "identity", position="dodge", aes(y=abscores, fill=overall))+
theme_minimal()+
theme(axis.text.x = element_text(angle = 60))
This figure shows all positive and negative sentiment scores in all the newspapers from Oregon.
It seems like sentiment scores are pretty evenly positive and negative across most locations - Roseburg and Hood River MAYBE are some of the obvious exceptions.
whenplot+
geom_bar(stat = "identity", position="stack", aes(y=scores, fill=overall))+ #I'm constructing this even though it will barely be visible
geom_smooth(method="loess")+ #chose the loess method for smoothing because of the presence of outliers
guides(fill=FALSE)+ #removed the legend
scale_x_date(date_labels = "%y")+
theme_minimal()+
theme_linedraw()+
facet_wrap(~place, ncol = 4)
Resulting in:
“LDA is a mathematical method for …. finding the mixture of words that is associated with each topic, while also determining the mixture of topics that describes each document.” Silge 2017
Provides a beta: per-topic-per-word probability
token_Portland<-subset(tokenarticles, place=="Portland")
token_Salem<-subset(tokenarticles, place=="Salem")
token_Sisters<-subset(tokenarticles, place=="Sisters")
token_Newport<-subset(tokenarticles, place=="Newport")
token_Klamath_Falls<-subset(tokenarticles, place=="Klamath Falls")
token_Bend<-subset(tokenarticles, place=="Bend")
token_Coos_Bay<-subset(tokenarticles, place=="Coos Bay")
token_Pendleton<-subset(tokenarticles, place=="Pendleton")
token_Tillamook<-subset(tokenarticles, place=="Tillamook")
token_Roseburg<-subset(tokenarticles, place=="Roseburg")
token_Ontario<-subset(tokenarticles, place=="Ontario")
token_Burns<-subset(tokenarticles, place=="Burns")
token_Hood_River<-subset(tokenarticles, place=="Hood River")
token_Warm_Springs<-subset(tokenarticles, place=="Warm Springs")
token_Baker_City<-subset(tokenarticles, place=="Baker City")
token_The_Dalles<-subset(tokenarticles, place=="The Dalles")
token_Clatskanie<-subset(tokenarticles, place=="Clatskanie")
token_Dallas<-subset(tokenarticles, place=="Dallas")
token_Brookings<-subset(tokenarticles, place=="Brookings")
token_La_Grande<-subset(tokenarticles, place=="La Grande")
token_Medford<-subset(tokenarticles, place=="Medford")
token_Vale<-subset(tokenarticles, place=="Vale")
token_Astoria<-subset(tokenarticles, place=="Astoria")
token_Lakeview<-subset(tokenarticles, place=="Lakeview")
library(topicmodels)
library(tidyr)
Warm_Springs_dtm <- token_Warm_Springs %>%
anti_join(stop_words) %>%
anti_join(Oregon_stop_words) %>%
count(article, word, sort = TRUE) %>%
cast_dtm(article, word, n)
Warm_Springs_LDA <- LDA(Warm_Springs_dtm, k = 4)
Warm_Springs_LDA
## A LDA_VEM topic model with 4 topics.
Warm_Spring_topics <- tidy(Warm_Springs_LDA, matrix = "beta")
Warm_Spring_topics
## # A tibble: 11,976 x 3
## topic term beta
## <int> <chr> <dbl>
## 1 1 treatment 0.000724
## 2 2 treatment 0.0145
## 3 3 treatment 0.0108
## 4 4 treatment 0.00173
## 5 1 community 0.00482
## 6 2 community 0.00636
## 7 3 community 0.00434
## 8 4 community 0.0123
## 9 1 tribal 0.00301
## 10 2 tribal 0.0127
## # … with 11,966 more rows
Warm_Springs_top_terms <- Warm_Spring_topics %>%
group_by(topic) %>%
top_n(10, beta) %>%
ungroup() %>%
arrange(topic, -beta)
Warm_Springs_top_terms
## # A tibble: 40 x 3
## topic term beta
## <int> <chr> <dbl>
## 1 1 salmon 0.0139
## 2 1 tribes 0.0116
## 3 1 fish 0.0111
## 4 1 dams 0.0106
## 5 1 judge 0.00890
## 6 1 river 0.00867
## 7 1 tower 0.00835
## 8 1 spill 0.00723
## 9 1 court 0.00723
## 10 1 federal 0.00698
## # … with 30 more rows
Warm_Springs_top_terms %>%
mutate(term = reorder_within(term, beta, topic)) %>%
ggplot(aes(beta, term, fill = factor(topic))) +
geom_col(show.legend = FALSE) +
facet_wrap(~ topic, scales = "free") +
scale_y_reordered()
saracounts <- tokenarticles %>% distinct(article, word, .keep_all=TRUE)
saracounts$safety<- as.numeric(saracounts$word %in% safety)
saracounts$reliability<- as.numeric(saracounts$word %in% reliability)
saracounts$availibility<- as.numeric(saracounts$word %in% availability)
saracounts$affordability<- as.numeric(saracounts$word %in% affordability)
saracounttidy <- saracounts[rowSums(saracounts[, 6:9] == 0) < 4,]
saratidyarticle <- aggregate(cbind(safety,reliability, availibility, affordability) ~ article, data = saracounttidy, FUN = sum, na.rm = TRUE)
saratidyplace <- aggregate(cbind(safety,reliability, availibility, affordability) ~ place, data = saracounttidy, FUN = sum, na.rm = TRUE)
saratidywrd <- aggregate(cbind(safety,reliability, availibility, affordability) ~ OWRD, data = saracounttidy, FUN = sum, na.rm = TRUE)
saratidywrd
## OWRD safety reliability availibility affordability
## 1 East Region 371 743 202 77
## 2 North Central Region 432 735 142 87
## 3 North West Region 1990 2461 540 267
## 4 South Central Region 936 1565 290 138
## 5 South West Region 582 737 150 65
saratidyplace
## place safety reliability availibility affordability
## 1 Astoria 19 22 4 2
## 2 Baker City 41 120 51 17
## 3 Bend 323 666 119 41
## 4 Brookings 39 54 14 1
## 5 Burns 116 177 54 37
## 6 Clatskanie 73 91 18 8
## 7 Coos Bay 263 312 45 46
## 8 Dallas 43 93 6 11
## 9 Hood River 122 142 36 15
## 10 Klamath Falls 463 623 127 50
## 11 La Grande 30 46 15 5
## 12 Lakeview 9 31 16 3
## 13 Medford 41 57 16 5
## 14 Newport 328 426 76 42
## 15 Ontario 144 333 52 17
## 16 Pendleton 228 467 97 58
## 17 Portland 808 843 162 86
## 18 Roseburg 239 314 75 13
## 19 Salem 504 763 216 81
## 20 Sisters 35 81 20 16
## 21 The Dalles 82 126 9 14
## 22 Tillamook 215 223 58 37
## 23 Vale 40 67 30 1
## 24 Warm Springs 106 164 8 28
head(saratidyarticle,5)
## article
## 1 ‘Polluted by Money’ series underscores our commitment to journalism in the public interest (column)
## 2 ‘Affordability concerns’: Costly arsenic solution will trickle down to city’s residents
## 3 ‘Carbon pollution isn’t free’: How Oregon can cap, trade, reinvest
## 4 ‘Connecting Past to Future’: Tribal liaison Paul Lumley speaks about issues, crises facing tribes along the river
## 5 ‘Extreme draining’ of Oregon reservoir eliminates invasive species, helps endangered salmon
## safety reliability availibility affordability
## 1 5 0 0 0
## 2 2 4 0 0
## 3 5 1 2 0
## 4 2 3 0 0
## 5 3 6 0 0
library(ggplot2)
library(scales)
t_saraowrd <- saratidywrd %>%
gather(safety:affordability, key= SARA, value = Count)
t_saraowrd$OWRD <- as.factor(t_saraowrd$OWRD)
t_saraowrd$SARA <- as.factor(t_saraowrd$SARA)
str(t_saraowrd)
## 'data.frame': 20 obs. of 3 variables:
## $ OWRD : Factor w/ 5 levels "East Region",..: 1 2 3 4 5 1 2 3 4 5 ...
## $ SARA : Factor w/ 4 levels "affordability",..: 4 4 4 4 4 3 3 3 3 3 ...
## $ Count: num 371 432 1990 936 582 ...
p_owrd <- group_by(t_saraowrd, OWRD) %>%
mutate(percent = ((Count/sum(Count)*100)))
p_owrd <- p_owrd[, -3]
str(p_owrd)
## tibble [20 × 3] (S3: grouped_df/tbl_df/tbl/data.frame)
## $ OWRD : Factor w/ 5 levels "East Region",..: 1 2 3 4 5 1 2 3 4 5 ...
## $ SARA : Factor w/ 4 levels "affordability",..: 4 4 4 4 4 3 3 3 3 3 ...
## $ percent: num [1:20] 26.6 30.9 37.8 32 37.9 ...
## - attr(*, "groups")= tibble [5 × 2] (S3: tbl_df/tbl/data.frame)
## ..$ OWRD : Factor w/ 5 levels "East Region",..: 1 2 3 4 5
## ..$ .rows: list<int> [1:5]
## .. ..$ : int [1:4] 1 6 11 16
## .. ..$ : int [1:4] 2 7 12 17
## .. ..$ : int [1:4] 3 8 13 18
## .. ..$ : int [1:4] 4 9 14 19
## .. ..$ : int [1:4] 5 10 15 20
## .. ..@ ptype: int(0)
## ..- attr(*, ".drop")= logi TRUE
p1 <- ggplot(p_owrd) +
geom_col(aes(x=SARA, y=percent), stat= 'identity', position= 'dodge')+
theme_minimal()
p1
p1 <- ggplot(p_owrd) +
geom_col(aes(x=SARA, y=percent, fill = SARA), stat= 'identity', position= 'dodge')+
facet_wrap(p_owrd$OWRD)+
theme_minimal()+
theme(axis.text.x = element_text(angle = 35))
p1
library(ggplot2)
library(scales)
t_saraplace <- saratidyplace %>%
gather(safety:affordability, key= SARA, value = Count)
t_saraplace$place <- as.factor(t_saraplace$place)
t_saraplace$SARA <- as.factor(t_saraplace$SARA)
str(t_saraplace)
## 'data.frame': 96 obs. of 3 variables:
## $ place: Factor w/ 24 levels "Astoria","Baker City",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ SARA : Factor w/ 4 levels "affordability",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ Count: num 19 41 323 39 116 73 263 43 122 463 ...
p_place <- group_by(t_saraplace, place) %>%
mutate(percent = ((Count/sum(Count)*100)))
p_place <- p_place[, -3]
str(p_place)
## tibble [96 × 3] (S3: grouped_df/tbl_df/tbl/data.frame)
## $ place : Factor w/ 24 levels "Astoria","Baker City",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ SARA : Factor w/ 4 levels "affordability",..: 4 4 4 4 4 4 4 4 4 4 ...
## $ percent: num [1:96] 40.4 17.9 28.1 36.1 30.2 ...
## - attr(*, "groups")= tibble [24 × 2] (S3: tbl_df/tbl/data.frame)
## ..$ place: Factor w/ 24 levels "Astoria","Baker City",..: 1 2 3 4 5 6 7 8 9 10 ...
## ..$ .rows: list<int> [1:24]
## .. ..$ : int [1:4] 1 25 49 73
## .. ..$ : int [1:4] 2 26 50 74
## .. ..$ : int [1:4] 3 27 51 75
## .. ..$ : int [1:4] 4 28 52 76
## .. ..$ : int [1:4] 5 29 53 77
## .. ..$ : int [1:4] 6 30 54 78
## .. ..$ : int [1:4] 7 31 55 79
## .. ..$ : int [1:4] 8 32 56 80
## .. ..$ : int [1:4] 9 33 57 81
## .. ..$ : int [1:4] 10 34 58 82
## .. ..$ : int [1:4] 11 35 59 83
## .. ..$ : int [1:4] 12 36 60 84
## .. ..$ : int [1:4] 13 37 61 85
## .. ..$ : int [1:4] 14 38 62 86
## .. ..$ : int [1:4] 15 39 63 87
## .. ..$ : int [1:4] 16 40 64 88
## .. ..$ : int [1:4] 17 41 65 89
## .. ..$ : int [1:4] 18 42 66 90
## .. ..$ : int [1:4] 19 43 67 91
## .. ..$ : int [1:4] 20 44 68 92
## .. ..$ : int [1:4] 21 45 69 93
## .. ..$ : int [1:4] 22 46 70 94
## .. ..$ : int [1:4] 23 47 71 95
## .. ..$ : int [1:4] 24 48 72 96
## .. ..@ ptype: int(0)
## ..- attr(*, ".drop")= logi TRUE
p1 <- ggplot(p_place) +
geom_col(aes(x=place, y=percent, fill = SARA), stat= 'identity', position= 'fill')+
theme_minimal()+
theme(axis.text.x = element_text(angle = 35))
p1
p1 <- ggplot(p_place) +
geom_col(aes(x=SARA, y=percent, fill = SARA), stat= 'identity', position= 'dodge')+
facet_wrap(p_place$place)+
theme_minimal()+
theme(axis.text.x = element_text(angle = 35))
p1