setwd("C:\\Users\\Adiza Ojei\\Documents\\2021 Hard drive Docs\\HULT ONLINE CLASS\\Data Science R\\Intermediate R\\articles")

nm <- list.files(path="C:\\Users\\Adiza Ojei\\Documents\\2021 Hard drive Docs\\HULT ONLINE CLASS\\Data Science R\\Intermediate R\\articles")

Rpdf <- readPDF(control = list(text = "-layout"))

sdg7 <- Corpus(URISource(nm), 
                        readerControl = list(reader = Rpdf))
sdg7.corpus <- tm_map(sdg7, tolower) 
sdg7.corpus <- tm_map(sdg7, removeNumbers)
sdg7.corpus <- tm_map(sdg7, removePunctuation)
sdg7.corpus <- tm_map(sdg7, stripWhitespace)
sdg7.corpus <- tm_map(sdg7, removeWords, stopwords("english"))
sdg7.corpus <- tm_map(sdg7, removeWords, c("per","cent","including"))

sdg7.corpus
## <<VCorpus>>
## Metadata:  corpus specific: 0, document level (indexed): 0
## Content:  documents: 4
sdg7.tdm <- TermDocumentMatrix(sdg7.corpus, 
                                        control = 
                                          list(removePunctuation = TRUE,
                                               stopwords = TRUE,
                                               tolower = TRUE,
                                               stemming = FALSE,
                                               removeNumbers = TRUE,
                                               bounds = list(global = c(3, Inf))))

sdg7.tdm
## <<TermDocumentMatrix (terms: 81, documents: 4)>>
## Non-/sparse entries: 275/49
## Sparsity           : 15%
## Maximal term length: 14
## Weighting          : term frequency (tf)
inspect(sdg7.tdm[1:10,]) 
## <<TermDocumentMatrix (terms: 10, documents: 4)>>
## Non-/sparse entries: 33/7
## Sparsity           : 18%
## Maximal term length: 10
## Weighting          : term frequency (tf)
## Sample             :
##             Docs
## Terms        11436UN-Energy - Goal 7 Ensure access to affordable, reliable, sustainable and modern energy for all.pdf
##   access                                                                                                           10
##   accounted                                                                                                         1
##   achieve                                                                                                           1
##   achieving                                                                                                         1
##   affordable                                                                                                        3
##   africa                                                                                                            9
##   air                                                                                                               0
##   also                                                                                                              0
##   areas                                                                                                             1
##   billion                                                                                                           3
##             Docs
## Terms        Goal-7.pdf SDG_7Final1.pdf SDG7_Brief.pdf
##   access              6              32              3
##   accounted           0               2              1
##   achieve             0               4              2
##   achieving           0               1              3
##   affordable          3               3              4
##   africa              2               5              0
##   air                 2               2              3
##   also                1               1              4
##   areas               0               1              1
##   billion             4              13              2
inspect(sdg7.tdm[11:20,]) 
## <<TermDocumentMatrix (terms: 10, documents: 4)>>
## Non-/sparse entries: 37/3
## Sparsity           : 7%
## Maximal term length: 11
## Weighting          : term frequency (tf)
## Sample             :
##              Docs
## Terms         11436UN-Energy - Goal 7 Ensure access to affordable, reliable, sustainable and modern energy for all.pdf
##   change                                                                                                             1
##   clean                                                                                                              3
##   climate                                                                                                            1
##   cooking                                                                                                            4
##   countries                                                                                                          2
##   developed                                                                                                          3
##   developing                                                                                                         4
##   development                                                                                                        2
##   economic                                                                                                           2
##   economies                                                                                                          0
##              Docs
## Terms         Goal-7.pdf SDG_7Final1.pdf SDG7_Brief.pdf
##   change               1               9              4
##   clean                4              23              5
##   climate              1               8              9
##   cooking              1              16              1
##   countries            3              25              3
##   developed            0               4              1
##   developing           0              10              2
##   development          2               8              4
##   economic             1               5              1
##   economies            1               2              3
findFreqTerms(sdg7.tdm, lowfreq = 15, highfreq = Inf)
##  [1] "access"       "africa"       "billion"      "change"       "clean"       
##  [6] "climate"      "cooking"      "countries"    "developing"   "development" 
## [11] "efficiency"   "electricity"  "energy"       "fuels"        "global"      
## [16] "people"       "progress"     "renewable"    "sustainable"  "technologies"
## [21] "will"
sdg7.frq <- findFreqTerms(sdg7.tdm, lowfreq = 15, highfreq = Inf)

findAssocs(sdg7.tdm, terms = "energy", corlimit = 0.75) #testing correlation to energy
## $energy
##        improve      renewable    development        efforts            new 
##           1.00           1.00           0.99           0.99           0.99 
##        poverty         sector         change          clean         global 
##           0.99           0.99           0.98           0.98           0.98 
##     investment    sustainable        systems   technologies      countries 
##           0.98           0.98           0.98           0.98           0.97 
##         ensure         health        sectors           will        achieve 
##           0.97           0.97           0.97           0.97           0.96 
##       progress           year        cooking     developing       economic 
##           0.96           0.96           0.95           0.94           0.94 
##         people transformation        without      transport         access 
##           0.94           0.94           0.94           0.93           0.92 
##        billion      education      emissions          major          power 
##           0.92           0.91           0.91           0.91           0.91 
##         public          water      accounted    eradication       improved 
##           0.91           0.91           0.90           0.90           0.90 
##   particularly         system          human          lives           many 
##           0.90           0.87           0.81           0.81           0.75
dm.sdg7 <- as.matrix(sdg7.tdm[sdg7.frq, ]) #creating data matrix

dm.sdg7
##               Docs
## Terms          11436UN-Energy - Goal 7 Ensure access to affordable, reliable, sustainable and modern energy for all.pdf
##   access                                                                                                             10
##   africa                                                                                                              9
##   billion                                                                                                             3
##   change                                                                                                              1
##   clean                                                                                                               3
##   climate                                                                                                             1
##   cooking                                                                                                             4
##   countries                                                                                                           2
##   developing                                                                                                          4
##   development                                                                                                         2
##   efficiency                                                                                                          1
##   electricity                                                                                                         6
##   energy                                                                                                             23
##   fuels                                                                                                               4
##   global                                                                                                              6
##   people                                                                                                              5
##   progress                                                                                                            3
##   renewable                                                                                                           4
##   sustainable                                                                                                         4
##   technologies                                                                                                        3
##   will                                                                                                                2
##               Docs
## Terms          Goal-7.pdf SDG_7Final1.pdf SDG7_Brief.pdf
##   access                6              32              3
##   africa                2               5              0
##   billion               4              13              2
##   change                1               9              4
##   clean                 4              23              5
##   climate               1               8              9
##   cooking               1              16              1
##   countries             3              25              3
##   developing            0              10              2
##   development           2               8              4
##   efficiency            0               7             10
##   electricity          11              18              1
##   energy               16              83             33
##   fuels                 3               4              4
##   global                1              19              8
##   people                5              12              4
##   progress              0              11              1
##   renewable             2              19              6
##   sustainable           6              20              9
##   technologies          1               8              3
##   will                  1              14              7
sort_dm.sdg7 <- sort(rowSums(dm.sdg7),decreasing = TRUE)


df_sdg7 <- data.frame(word = names(sort_dm.sdg7), freq = sort_dm.sdg7) #creating data frame

set.seed(1234)

wordcloud(words = df_sdg7$word, freq = df_sdg7$freq, min.freq = 1,
          max.words = 75, random.order = FALSE, rot.per = 0.35,
          colors = brewer.pal(8, "Dark2"))

sdg7_barchart <- ggplot(data = df_sdg7, aes(x = word, y = freq, fill = word)) + 
  geom_col(position = position_dodge()) 

sdg7_barchart

ggraph(df_sdg7, layout = "fr") +
  geom_edge_link()+
  geom_node_point()+
  geom_node_text(aes(label=name), vjust =1, hjust=1)

chart_sdg7 <- ggplot(df_sdg7, aes(x = word, y = freq, fill = word)) +
  geom_col() + coord_polar("y", start = 0)

chart_sdg7

ggplot(df_sdg7,
       aes(x = word,
           y = freq,
           col = word)) +
  geom_point()