Recap:Blog Post 3

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.1.1     v dplyr   1.0.5
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(quanteda)
## Package version: 3.0.0
## Unicode version: 10.0
## ICU version: 61.1
## Parallel computing: 6 of 6 threads used.
## See https://quanteda.io for tutorials and examples.
library(readtext)
library(quanteda.textplots)
library(spacyr)
library(stopwords)
library(devtools)
## Loading required package: usethis
library(tidytext)
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following object is masked from 'package:purrr':
## 
##     compact
library(dplyr)

Load Files

my_texts <- readtext::readtext("C:/Users/kebre/Rwd/Minimum Wage Research/State")

Build Corpus

#State
minwage_corpus <- corpus(my_texts)
minwage_summary <- summary(minwage_corpus)
minwage_summary
## Corpus consisting of 14 documents, showing 14 documents:
## 
##               Text Types Tokens Sentences
##       Arkansas.pdf  2049  14290       675
##       Colorado.pdf   118    185         7
##      Florida.1.txt   399   1503        49
##      Florida.2.txt   383    978        18
##           FLSA.pdf  3386  77668      2357
##        Georgia.txt   185    394         6
##  Massachusetts.pdf   267   2332        22
##      Minnesota.pdf   528   2172        46
##          notes.txt   116    770         1
##   Pennsylvania.pdf  1513  10363       493
##          Texas.txt   799   4088       245
##           Utah.pdf   551   2343        48
##      Wisconsin.pdf  2487  21060       793
##        Wyoming.pdf  5445  97310      2094

Metadata

#State
docvars(minwage_corpus)
## data frame with 0 columns and 14 rows
minwage_summary$book <- "Minimum Wage Legislation"
minwage_summary
## Corpus consisting of 14 documents, showing 14 documents:
## 
##               Text Types Tokens Sentences                     book
##       Arkansas.pdf  2049  14290       675 Minimum Wage Legislation
##       Colorado.pdf   118    185         7 Minimum Wage Legislation
##      Florida.1.txt   399   1503        49 Minimum Wage Legislation
##      Florida.2.txt   383    978        18 Minimum Wage Legislation
##           FLSA.pdf  3386  77668      2357 Minimum Wage Legislation
##        Georgia.txt   185    394         6 Minimum Wage Legislation
##  Massachusetts.pdf   267   2332        22 Minimum Wage Legislation
##      Minnesota.pdf   528   2172        46 Minimum Wage Legislation
##          notes.txt   116    770         1 Minimum Wage Legislation
##   Pennsylvania.pdf  1513  10363       493 Minimum Wage Legislation
##          Texas.txt   799   4088       245 Minimum Wage Legislation
##           Utah.pdf   551   2343        48 Minimum Wage Legislation
##      Wisconsin.pdf  2487  21060       793 Minimum Wage Legislation
##        Wyoming.pdf  5445  97310      2094 Minimum Wage Legislation
minwage_summary$chapter <- as.numeric(str_extract(minwage_summary$Text, "[0-9]+"))
minwage_summary
## Corpus consisting of 14 documents, showing 14 documents:
## 
##               Text Types Tokens Sentences                     book chapter
##       Arkansas.pdf  2049  14290       675 Minimum Wage Legislation      NA
##       Colorado.pdf   118    185         7 Minimum Wage Legislation      NA
##      Florida.1.txt   399   1503        49 Minimum Wage Legislation       1
##      Florida.2.txt   383    978        18 Minimum Wage Legislation       2
##           FLSA.pdf  3386  77668      2357 Minimum Wage Legislation      NA
##        Georgia.txt   185    394         6 Minimum Wage Legislation      NA
##  Massachusetts.pdf   267   2332        22 Minimum Wage Legislation      NA
##      Minnesota.pdf   528   2172        46 Minimum Wage Legislation      NA
##          notes.txt   116    770         1 Minimum Wage Legislation      NA
##   Pennsylvania.pdf  1513  10363       493 Minimum Wage Legislation      NA
##          Texas.txt   799   4088       245 Minimum Wage Legislation      NA
##           Utah.pdf   551   2343        48 Minimum Wage Legislation      NA
##      Wisconsin.pdf  2487  21060       793 Minimum Wage Legislation      NA
##        Wyoming.pdf  5445  97310      2094 Minimum Wage Legislation      NA
docvars(minwage_corpus) <- minwage_summary
docvars(minwage_corpus)
##                 Text Types Tokens Sentences                     book chapter
## 1       Arkansas.pdf  2049  14290       675 Minimum Wage Legislation      NA
## 2       Colorado.pdf   118    185         7 Minimum Wage Legislation      NA
## 3      Florida.1.txt   399   1503        49 Minimum Wage Legislation       1
## 4      Florida.2.txt   383    978        18 Minimum Wage Legislation       2
## 5           FLSA.pdf  3386  77668      2357 Minimum Wage Legislation      NA
## 6        Georgia.txt   185    394         6 Minimum Wage Legislation      NA
## 7  Massachusetts.pdf   267   2332        22 Minimum Wage Legislation      NA
## 8      Minnesota.pdf   528   2172        46 Minimum Wage Legislation      NA
## 9          notes.txt   116    770         1 Minimum Wage Legislation      NA
## 10  Pennsylvania.pdf  1513  10363       493 Minimum Wage Legislation      NA
## 11         Texas.txt   799   4088       245 Minimum Wage Legislation      NA
## 12          Utah.pdf   551   2343        48 Minimum Wage Legislation      NA
## 13     Wisconsin.pdf  2487  21060       793 Minimum Wage Legislation      NA
## 14       Wyoming.pdf  5445  97310      2094 Minimum Wage Legislation      NA

Smol

#State
small_corpus <- corpus_subset(minwage_corpus, Tokens < 5000)
summary(small_corpus)
## Corpus consisting of 9 documents, showing 9 documents:
## 
##               Text Types Tokens Sentences              Text Types Tokens
##       Colorado.pdf   118    185         7      Colorado.pdf   118    185
##      Florida.1.txt   399   1503        49     Florida.1.txt   399   1503
##      Florida.2.txt   383    978        18     Florida.2.txt   383    978
##        Georgia.txt   185    394         6       Georgia.txt   185    394
##  Massachusetts.pdf   267   2332        22 Massachusetts.pdf   267   2332
##      Minnesota.pdf   528   2172        46     Minnesota.pdf   528   2172
##          notes.txt   116    770         1         notes.txt   116    770
##          Texas.txt   799   4088       245         Texas.txt   799   4088
##           Utah.pdf   551   2343        48          Utah.pdf   551   2343
##  Sentences                     book chapter
##          7 Minimum Wage Legislation      NA
##         49 Minimum Wage Legislation       1
##         18 Minimum Wage Legislation       2
##          6 Minimum Wage Legislation      NA
##         22 Minimum Wage Legislation      NA
##         46 Minimum Wage Legislation      NA
##          1 Minimum Wage Legislation      NA
##        245 Minimum Wage Legislation      NA
##         48 Minimum Wage Legislation      NA

White space

#State
# the default breaks on white space
minwage_tokens <- tokens(minwage_corpus)
print(minwage_tokens)
## Tokens consisting of 14 documents and 6 docvars.
## Arkansas.pdf :
##  [1] "AGENCY"         "#010.14"        "ADMINISTRATIVE" "RULES"         
##  [5] "REGARDING"      "THE"            "ARKANSAS"       "MINIMUM"       
##  [9] "WAGE"           "ACT"            "LABOR"          "STANDARDS"     
## [ ... and 14,278 more ]
## 
## Colorado.pdf :
##  [1] "Division"   "of"         "Labor"      "Standards"  "and"       
##  [6] "Statistics" "633"        "17th"       "Street"     ","         
## [11] "Suite"      "600"       
## [ ... and 173 more ]
## 
## Florida.1.txt :
##  [1] "448.110â"      "\200"             "ƒState"        "minimum"      
##  [5] "wage"          ";"             "annual"        "wage"         
##  [9] "adjustment"    ";"             "enforcement.â" "\200"            
## [ ... and 1,491 more ]
## 
## Florida.2.txt :
##  [1] "SECTION"  "24"       "."        "â"        "\200"        "ƒFlorida"
##  [7] "minimum"  "wage.â"   "\200"        "\""       "("        "a"       
## [ ... and 966 more ]
## 
## FLSA.pdf :
##  [1] "29"        "USC"       "Ch"        "."         "8"         ":"        
##  [7] "FAIR"      "LABOR"     "STANDARDS" "From"      "Title"     "29"       
## [ ... and 77,656 more ]
## 
## Georgia.txt :
##  [1] "2018"       "Georgia"    "Code"       "Title"      "34"        
##  [6] "-"          "Labor"      "and"        "Industrial" "Relations" 
## [11] "Chapter"    "4"         
## [ ... and 382 more ]
## 
## [ reached max_ndoc ... 8 more documents ]
# as well as numbers
minwage_tokens <- tokens(minwage_corpus, 
    remove_punct = T,
    remove_numbers = T)
print(minwage_tokens)
## Tokens consisting of 14 documents and 6 docvars.
## Arkansas.pdf :
##  [1] "AGENCY"         "#010.14"        "ADMINISTRATIVE" "RULES"         
##  [5] "REGARDING"      "THE"            "ARKANSAS"       "MINIMUM"       
##  [9] "WAGE"           "ACT"            "LABOR"          "STANDARDS"     
## [ ... and 11,752 more ]
## 
## Colorado.pdf :
##  [1] "Division"   "of"         "Labor"      "Standards"  "and"       
##  [6] "Statistics" "17th"       "Street"     "Suite"      "Denver"    
## [11] "CO"         "80202-2107"
## [ ... and 125 more ]
## 
## Florida.1.txt :
##  [1] "448.110â"      "\200"             "ƒState"        "minimum"      
##  [5] "wage"          "annual"        "wage"          "adjustment"   
##  [9] "enforcement.â" "\200"             "â"             "\200"            
## [ ... and 1,258 more ]
## 
## Florida.2.txt :
##  [1] "SECTION"  "â"        "\200"        "ƒFlorida" "minimum"  "wage.â"  
##  [7] "\200"        "a"        "â"        "\200"        "ƒPUBLIC"  "POLICY.â"
## [ ... and 858 more ]
## 
## FLSA.pdf :
##  [1] "USC"       "Ch"        "FAIR"      "LABOR"     "STANDARDS" "From"     
##  [7] "Title"     "LABOR"     "CHAPTER"   "FAIR"      "LABOR"     "STANDARDS"
## [ ... and 52,565 more ]
## 
## Georgia.txt :
##  [1] "Georgia"    "Code"       "Title"      "Labor"      "and"       
##  [6] "Industrial" "Relations"  "Chapter"    "Minimum"    "Wage"      
## [11] "Law"        "Â"         
## [ ... and 308 more ]
## 
## [ reached max_ndoc ... 8 more documents ]

Keyowrd in Context

#employee/employer may/may not
kwic.1 <- kwic(minwage_tokens,
      pattern = phrase("employer may"))
head(kwic.1)
## Keyword-in-context with 6 matches.                                                                              
##    [Arkansas.pdf, 895:896]        working on fixed schedules an | employer may
##  [Arkansas.pdf, 1058:1059] Arkansas In unusual circumstances an | employer may
##  [Arkansas.pdf, 1942:1943]        A Conditions of employment An | employer may
##  [Arkansas.pdf, 2287:2288] Learners Learners and Apprentices An | employer may
##  [Arkansas.pdf, 6018:6019]      Deductions from minimum wage An | employer may
##  [Arkansas.pdf, 6069:6070]           the employee in writing An | employer may
##                                       
##  | maintain records showing instead of
##  | petition the director to maintain  
##  | pay a full-time student a          
##  | employ a learner a student         
##  | not make deductions from the       
##  | not make deductions from the
kwic.2 <- kwic(minwage_tokens,
      pattern = phrase("employee may"))
head(kwic.2)
## Keyword-in-context with 6 matches.                                                                     
##    [Arkansas.pdf, 7641:7642]               work time For example an |
##    [Arkansas.pdf, 8596:8597]              more the employer and the |
##  [Arkansas.pdf, 10565:10566]       Enforcement A Employee Claims An |
##      [FLSA.pdf, 16692:16693]         maximum period during which an |
##      [FLSA.pdf, 16725:16726] <U+FFFD> <U+FFFD> <U+FFFD> No eligible |
##      [FLSA.pdf, 17345:17346]                       i and ii that an |
##                                                
##  employee may | voluntarily continue to work at
##  employee may | agree to exclude bona fide     
##  employee may | file a claim with the          
##  employee may | be paid such wage as           
##  employee may | be paid the wage authorized    
##  employee may | be paid the wage authorized
kwic.3 <- kwic(minwage_tokens,
      pattern = phrase("employer may not"))
head(kwic.3)
## Keyword-in-context with 6 matches.                                                                               
##  [Arkansas.pdf, 6018:6020] Deductions from minimum wage An | employer may not |
##  [Arkansas.pdf, 6069:6071]      the employee in writing An | employer may not |
##      [FLSA.pdf, 3157:3159]     regularly receive tips B An | employer may not |
##      [FLSA.pdf, 4630:4632]       of tips determined by the | employer may not |
##      [FLSA.pdf, 5222:5224]       of tips determined by the | employer may not |
##     [Texas.txt, 1121:1123]    EMPLOYEES SUBJECT TO CALL An | employer may not |
##                                     
##  make deductions from the minimum   
##  make deductions from the applicable
##  keep tips received by its          
##  exceed the value of tips           
##  exceed the value of tips           
##  be required to pay an
kwic.4 <- kwic(minwage_tokens,
      pattern = phrase("employee may not"))
head(kwic.4)
## Keyword-in-context with 2 matches.                                                                         
##        [Texas.txt, 2602:2604]         similarly affected employees b An |
##  [Wisconsin.pdf, 13362:13364] uninterrupted or employees relieving that |
##                                         
##  employee may not | be a plaintiff to an
##  employee may not | be on duty for more

Blog Post 4

Objective: Define the parts of institutional statements to locate and hand code documents

Attribute: An actor (individual or corporate) that carries out, or is expected to (or not to) carry out the action(Aim) of the statement. (May contain descriptors of the actor)

Aim: The goal or action of the statement assigned to the statement Attribute.

Context: The context instantiates settings in which the focal action of a statement applies, or qualifies the action indicated in an institutional statement.

Object: The inanimate or animate part of an institutional statement that is the receiver of the action captured in the Aim.

Deontic: A prescriptive operator that defines to what extent the action of an institutional statement is compelled, restrained, or discretionary.

Or else: An incentivising provision associated with te action indicated in a particular institutional statement that can exist wholly within an institutional statement, or be represented in a nested institutional statement

Example:

“…a written notification of proposed suspension orrevocation of certification …”

Attribute: Aim: “may”, “may not”, “shall”, “shall not” Context: Object: “employer”, “employee” Deontic: “wage”, “minimum”, “poor”, “work”, “labor”, “families”, “percent”, “level”, “one”, “union”, “non”, “state”, “poverty”, “income” Or else:

I worked with Nvivo to atuo-code the documents, that way I could pick out which words I want to focus on:

Word Frequency in State Corpus Word Frequency in State Corpus

“Wage” Query in State Corpus “Wage” Query in State Corpus

“May” Query in State Corpus “May” Query in State Corpus

“May Not” Query in State Corpus “May Not” Query in State Corpus

I also created a binder with all the documents and cheat sheets as a guide

Blog Post 5

Objective: Create document feature matrix to identify where key terms show up throughout the states

# create the dfm
minwage_dfm <- dfm(minwage_corpus,
        tolower = TRUE,
        remove_punct = TRUE,
        stem = FALSE,
        remove = stopwords("english")
        )
## Warning: 'dfm.corpus()' is deprecated. Use 'tokens()' first.
## Warning: '...' should not be used for tokens() arguments; use 'tokens()' first.
## Warning: 'remove' is deprecated; use dfm_remove() instead
## Warning: 'stem' is deprecated; use dfm_wordstem() instead
# find out a quick summary of the dfm
minwage_dfm
## Document-feature matrix of: 14 documents, 5,533 features (84.93% sparse) and 6 docvars.
##                features
## docs            agenc #010.14 administr rule regard arkansa minimum wage act
##   Arkansas.pdf     12       1        14   67      4      14      38   81  67
##   Colorado.pdf      0       0         0    0      0       0       5    5   0
##   Florida.1.txt     1       0         0    2      0       0      21   27   7
##   Florida.2.txt     2       0         1    2      0       0      13   16   1
##   FLSA.pdf        104       0        86   12     19       0     118  252 272
##   Georgia.txt       0       0         0    0      0       0       6    6   2
##                features
## docs            labor
##   Arkansas.pdf     43
##   Colorado.pdf      1
##   Florida.1.txt     5
##   Florida.2.txt     2
##   FLSA.pdf        231
##   Georgia.txt       1
## [ reached max_ndoc ... 8 more documents, reached max_nfeat ... 5,523 more features ]
topfeatures(minwage_dfm, 20)
## <U+FFFD>  employ employe   shall section       1     act    wage     may       b 
##    6320    2779    1929    1905    1165    1126    1021     963     921     871 
##    work  provid    rate   state       3       2    hour       l  servic     pub 
##     774     771     756     725     718     694     687     655     626     608
set.seed(1234)

# draw the wordcloud
textplot_wordcloud(minwage_dfm, min_count = 50, random_order = FALSE)

minwageDict <- dictionary(list(articles = c("the", "a", "and"),
                              conjunctions = c("and", "but", "or", "nor", "for", "yet", "so"),
                               deontics = c("wage", "minimum", "poor", "work", "labor", "families", "percent", "level", "one", "union", "non", "state", "poverty", "income"), 
                              employee = c("employee"),
                              employer = c("employer"),
                          posactions = c("may", "shall"),
                          negactions = c("may not", "shall not")))

minwageDfm <- dfm(minwage_corpus, dictionary = minwageDict)
## Warning: 'dfm.corpus()' is deprecated. Use 'tokens()' first.
## Warning: 'dictionary' and 'thesaurus' are deprecated; use dfm_lookup() instead
minwageDfm[1:10,]
## Document-feature matrix of: 10 documents, 7 features (27.14% sparse) and 6 docvars.
##                features
## docs            articles conjunctions deontics employee employer posactions
##   Arkansas.pdf      1403          825      236      152      109        108
##   Colorado.pdf         9            6       15        0        0          2
##   Florida.1.txt      151           64       71        0        8         34
##   Florida.2.txt       82           65       40        0        3         24
##   FLSA.pdf          4820         2883      795      359      191        593
##   Georgia.txt         23           21       14        3        7          6
##                features
## docs            negactions
##   Arkansas.pdf          12
##   Colorado.pdf           0
##   Florida.1.txt          2
##   Florida.2.txt          2
##   FLSA.pdf              73
##   Georgia.txt            3
## [ reached max_ndoc ... 4 more documents ]

Reflection

For the next steps, I can look into how the positive and negative action terms interact with the employer vs employee terms, as well as the various other key words I’ve identified throughout the documents.