#rm(list=ls())

library(tm)
## Loading required package: NLP
getReaders()
##  [1] "readDataframe"           "readDOC"                
##  [3] "readPDF"                 "readPlain"              
##  [5] "readRCV1"                "readRCV1asPlain"        
##  [7] "readReut21578XML"        "readReut21578XMLasPlain"
##  [9] "readTagged"              "readXML"
getSources()
## [1] "DataframeSource" "DirSource"       "URISource"       "VectorSource"   
## [5] "XMLSource"       "ZipSource"
library(wordcloud)
## Loading required package: RColorBrewer
txt2='C:\\Users\\ajaohri\\Desktop\\TUTORIALS\\tm'
b=Corpus(DirSource(txt2), readerControl = list(language = 'eng'))
inspect(b)
## <<SimpleCorpus>>
## Metadata:  corpus specific: 1, document level (indexed): 0
## Content:  documents: 1
## 
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      new.txt 
##                                                                                                        AJAY OHRI\nPrincipal Data Scientist \nEmail id: Ohri2007@gmail.com ; Mobile: 8800772721; DOB: 7th June, 1977\n\nOne of the 5 data scientists in the world featured by KD Nuggets in A Day in the Life of a Data Scientist. https://www.kdnuggets.com/2017/11/day-life-data-scientist.html \nProfile Summary\nAjay Ohri is an experienced Data Scientist with around 14 years work experience. His extensive experience in data science includes the domains of financial services including cross selling cards, loans, insurance, across automobile, telecom and entertainment among others. Proficient in SAS, Python, R, SQL and Tableau. He has worked with remote servers in Data Warehouse environments as well as cloud computing. In addition, he has run a blog Decisionstats.com with more than 1 lakh views per annum every year for 6 years and has written extensively on analytics and APIs for industry websites. His publications include two internationally recognized books in R with a third book in Python, worked and taught on data science especially visualization, machine learning, exploratory data analysis, RFM and LTV analysis, regression modeling, data cleaning and data manipulation. \n\n\nAreas of Expertise\nR, Python, Data Mining, Machine Learning, Data Science, Data Visualization, SAS, Tableau, SQL, Cloud Computing, Exploratory Data Analysis, Financial Services.\n\nEducation\n?\tMBA (Finance, Operations, Marketing) from Indian Institute of Management, Lucknow (2001 – 2003)\n?\tBE (Mechanical Engineering) from Delhi College of Engineering (1997 – 2001)\n\n\nEMPLOYMENT HISTORY\nOrganization\tDesignation\tFrom\tTo\nKogentix\tData Scientist\tJuly 2017\tPresent\nWipro\tPrincipal Consultant, India- Analytics\tFeb 2017\tJuly 2017\nDecisionstats.com (blog)\tConsultant and Founder\tApril 2007\tPresent\nCiti Financial\tManager\tJun 2006\tMar 2007\nWNS\tManager\tMar 2005\tJun 2006\nGECIS\tBusiness Analyst\tFeb 2004\tMar 2005\nExperience in Significant Projects\nName of Project\tBrief of Project\tRole\tTech Environment\tDuration From\tDuration To\nData Science for Indonesian Bank (KOGENTIX)\t1)\tDid customer 360 using Big Data (Hive and PySpark)\n2)\tBuilt K Means Clustering for Customer Segments\n3)\tBuilding Churn Models using AMP (Proprietary product built on PySpark and mySQL)\n4)\tHelped with graph analysis for churn influencers\tSenior Data Scientist\tCloudera Hadoop, Linux\tJul 2017\tPresent\nData Science for DG Systems Central Board for Excise and Customs (CBEC).\n Ministry of Finance, Government of India\n(WIPRO) \t1)\tPredict annual revenues using SAS Enterprise Miner for central excise, imports, exports, service tax\n2)\tCreate clusters of countries based imports and exports data using SAS Enterprise Miner\n3)\tDo L2 Validation for analytical reports using SQL (Sybase) created by developers and L1 validation.\n4)\tHelp with team management and coordination\n5)\tMeet client for client requirements\tPrincipal Consultant\tSAS (Base) and SAS Enterprise Miner, \nSAS Visual Analytics and SAS Fraud Framework\tFeb 2017\tJul-2017\nTraining for multiple clients\n(DECISIONSTATS)\tTraining in SAS, Python and R\tInstructor\tPython, R, SAS\tSept 2016\tFeb 2017\nE commerce and taxation for Govt of Ontario\n(DECISIONSTATS)\tEstimate black market shadow economy of Ontario Canada by web scraping \n\tData Scientist\tPython (Beautiful Soup) and Ubuntu Linux\tOct 2015\tNov 2015\nSpatial analytics and Econometrics- Indicus (Acquired by AC Nielsen)\n(DECISIONSTATS)\tOptimize estimation of economic metrics at micro level of India by  spatial analytics\tData Scientist\tR, PostgresSQL\tJan 2014\tMar 2014\nVisualization and Analytics for Contata\n(DECISIONSTATS)\tHelped with creation of social media analytics engine. \nHelped with segmentation analysis for customers.\n Helped with R based analytics\n\tData Scientist\tR, Javascript\tNov 2013\tJan 2014\nAnalytics for Sutherland Global\n(DECISIONSTATS)\tLed, managed and contributed as a consultant to a large offshoring firm in the practice of financial firm analytics (primarily Middle East Asian Banks). Included training them in SAS as well as working on projects\n\tConsultant\tSAS, Windows\tAug 2011\tJan 2012\nSpatial Analytics and Econometrics for Cinepolis\n(DECISIONSTATS)\tPlacement of cinemas in malls using analytics. This was done by using existing catchment , tickets sold, and fitting in variables like number of screens , road size, parking area, competing malls nearby, socio-economic profile (using SEC market research data) and interpolating spatial analytics using Google Earth\n\tConsultant\tExcel, Solver, Google Earth (KMZ)\tNov 2008\tMar 2009\nAutomobile analytics for Trilogy\n(DECISIONSTATS)\testimate demand of automobiles and forecast inventory for north American automobile dealership chain  based on website leads\n\tAnalyst\tR, Linux\tNov 2007\tOct 2008\nAnalytics for Personal Loans for Citi Financial India\tLed projects on deriving insights from multiple databases which led to sale of extra 50000 credit cards by cross-selling on dormant personal loans which had a clean payment history.\n This was done using both SAS BI reporting tool as well as SAS/BASE and SAS/STAT\n\tManager Analytics\tSAS, SQL, SQL Server, Windows\tJun 2006\tMar 2007\nAnalytics for Insurance Client (Personal) for WNS (Client name is GMAC Insurance)\tled project >150,000$ a year to create unique alphanumeric id for customers for GMACI (insurance arm of General Motors Finance company) and remove duplicates using SAS software. \nAlso created visualization for campaigns of marketing for Insurance\n\tManager Analytics\tSAS, Windows\tMar 2005 \tJun 2006\nAnalytics for GE Consumer Finance\tCreated regression models in SAS for propensity to apply. \nCreated dashboards for business snapshot\tBusiness Analyst\tVBA, SAS, Windows\tFeb 2004\tMar 2005\n\nConferences and Talks  \n1) Big Data Big Analytics – Workshop on  Statistical Machine Learning and Game Theory  Approaches for Large Scale Data Analysis  9 July 2012 – 14 July 2012  Sponsored by Mathematical Sciences, Division of Science and Engineering  Research Board at Bangalore India  Department of Science & Technology Government of India.   http://krishnarajpm.com/bigdata/abstract.pdf\n2) Data Analytics using the Cloud- Challenges and Opportunities for India at 1st International Symposium on Big Data and Cloud Computing Challenges(ISBCC-2014) March 27-28, 2014 VIT University, Chennai, India Sponsored by BRNS http://chennai.vit.ac.in/isbcc/\n3) Open Source Analytics at OSSCamp  2014\n4) Society for Industrial and Applied Mathematics- Delhi Technological University Evolute 2015 : Annual Symposium Speaker\n5) Talk on Analytics as a profession at Indian Institute of Technology Delhi July 2014\n6) Data Analytics Summit at Delhi School of Economics  13/8/2016 \n7) Crash Course on Data Science (Part I) Byte Academy New York Sunday, Jun 19, 2016 \nPublications \n1.\tWrote a blog Decisionstats.com with views> 100,000 annually for past six years. Total 1million views.\n2.\tI was cited by Wired Magazine and ReadWriteWeb for espousing a marketplace for algorithms.\n3.\tWrote two books on R for Springer (http://www.springer.com/in/book/9781461443421 and http://www.springer.com/in/book/9781493917013 ) and first book is scheduled to be translated in Chinese \n4.\tWrote around 50 articles for ProgrammableWeb and many articles for KDNuggets \n5.\tRemaining articles are at https://decisionstats.com/decisionstats.org/\n6.\tPython for R users is published by Wiley (https://www.amazon.co.uk/Python-R-Users-Ajay-Ohri/dp/1119126762\n \n
b = tm_map(b, stripWhitespace)
b <- tm_map(b, tolower)
b<- tm_map(b, removeWords, stopwords("english"))
b <- tm_map(b, removeNumbers)
#Plain Text Document
b <- tm_map(b, PlainTextDocument)
## Warning in tm_map.SimpleCorpus(b, PlainTextDocument): transformation drops
## documents
c=DocumentTermMatrix(b)
inspect(c[,1:10])
## <<DocumentTermMatrix (documents: 2, terms: 10)>>
## Non-/sparse entries: 10/10
## Sparsity           : 50%
## Maximal term length: 12
## Weighting          : term frequency (tf)
## Sample             :
##          Terms
## Docs      abstract academy acquired across addition ajay algorithms
##   content        1       1        1      1        1    3          1
##   meta           0       0        0      0        0    0          0
##          Terms
## Docs      alphanumeric also amazon
##   content            1    1      1
##   meta               0    0      0
c2=TermDocumentMatrix(b)
inspect(c2[1:50,])
## <<TermDocumentMatrix (terms: 50, documents: 2)>>
## Non-/sparse entries: 50/50
## Sparsity           : 50%
## Maximal term length: 12
## Weighting          : term frequency (tf)
## Sample             :
##             Docs
## Terms        content meta
##   ajay             3    0
##   analysis         6    0
##   analyst          3    0
##   analytics       24    0
##   articles         3    0
##   automobile       3    0
##   based            3    0
##   big              4    0
##   blog             3    0
##   book             4    0
dim(c2)
## [1] 420   2
findFreqTerms(c,5)
##  [1] "analysis"      "analytics"     "com"           "consultant"   
##  [5] "data"          "decisionstats" "feb"           "financial"    
##  [9] "india"         "insurance"     "july"          "jun"          
## [13] "mar"           "python"        "sas"           "science"      
## [17] "scientist"     "sql"           "using"
c3=as.data.frame(as.matrix(c2))
c3$Terms=row.names(c3)

names(c3)=c("freqs","terms")
c4=c3[order(c3$freqs),]
c4
##                 freqs terms              NA
## abstract            1     0        abstract
## academy             1     0         academy
## acquired            1     0        acquired
## across              1     0          across
## addition            1     0        addition
## algorithms          1     0      algorithms
## alphanumeric        1     0    alphanumeric
## also                1     0            also
## amazon              1     0          amazon
## american            1     0        american
## among               1     0           among
## amp                 1     0             amp
## analytical          1     0      analytical
## annually            1     0        annually
## annum               1     0           annum
## apis                1     0            apis
## applied             1     0         applied
## apply               1     0           apply
## approaches          1     0      approaches
## april               1     0           april
## area                1     0            area
## areas               1     0           areas
## arm                 1     0             arm
## asian               1     0           asian
## aug                 1     0             aug
## automobiles         1     0     automobiles
## bangalore           1     0       bangalore
## bank                1     0            bank
## banks               1     0           banks
## beautiful           1     0       beautiful
## bigdata             1     0         bigdata
## black               1     0           black
## brief               1     0           brief
## brns                1     0            brns
## building            1     0        building
## byte                1     0            byte
## campaigns           1     0       campaigns
## canada              1     0          canada
## catchment           1     0       catchment
## cbec                1     0            cbec
## chain               1     0           chain
## chinese             1     0         chinese
## cinemas             1     0         cinemas
## cinepolis           1     0       cinepolis
## cited               1     0           cited
## clean               1     0           clean
## cleaning            1     0        cleaning
## clients             1     0         clients
## cloudera            1     0        cloudera
## clustering          1     0      clustering
## clusters            1     0        clusters
## college             1     0         college
## commerce            1     0        commerce
## company             1     0         company
## competing           1     0       competing
## conferences         1     0     conferences
## consumer            1     0        consumer
## contata             1     0         contata
## contributed         1     0     contributed
## coordination        1     0    coordination
## countries           1     0       countries
## course              1     0          course
## crash               1     0           crash
## creation            1     0        creation
## credit              1     0          credit
## customs             1     0         customs
## dashboards          1     0      dashboards
## databases           1     0       databases
## dealership          1     0      dealership
## demand              1     0          demand
## department          1     0      department
## deriving            1     0        deriving
## designation         1     0     designation
## developers          1     0      developers
## division            1     0        division
## dob                 1     0             dob
## domains             1     0         domains
## dormant             1     0         dormant
## duplicates          1     0      duplicates
## east                1     0            east
## economics           1     0       economics
## economy             1     0         economy
## education           1     0       education
## email               1     0           email
## employment          1     0      employment
## engine              1     0          engine
## entertainment       1     0   entertainment
## environment         1     0     environment
## environments        1     0    environments
## especially          1     0      especially
## espousing           1     0       espousing
## estimation          1     0      estimation
## every               1     0           every
## evolute             1     0         evolute
## excel               1     0           excel
## existing            1     0        existing
## experienced         1     0     experienced
## expertise           1     0       expertise
## extensive           1     0       extensive
## extensively         1     0     extensively
## extra               1     0           extra
## featured            1     0        featured
## first               1     0           first
## fitting             1     0         fitting
## forecast            1     0        forecast
## founder             1     0         founder
## framework           1     0       framework
## fraud               1     0           fraud
## game                1     0            game
## gecis               1     0           gecis
## general             1     0         general
## global              1     0          global
## gmac                1     0            gmac
## gmaci               1     0           gmaci
## gmail               1     0           gmail
## govt                1     0            govt
## graph               1     0           graph
## hadoop              1     0          hadoop
## help                1     0            help
## hive                1     0            hive
## html                1     0            html
## include             1     0         include
## included            1     0        included
## includes            1     0        includes
## including           1     0       including
## indicus             1     0         indicus
## indonesian          1     0      indonesian
## industrial          1     0      industrial
## industry            1     0        industry
## influencers         1     0     influencers
## insights            1     0        insights
## instructor          1     0      instructor
## international       1     0   international
## internationally     1     0 internationally
## interpolating       1     0   interpolating
## inventory           1     0       inventory
## javascript          1     0      javascript
## june                1     0            june
## kmz                 1     0             kmz
## krishnarajpm        1     0    krishnarajpm
## lakh                1     0            lakh
## leads               1     0           leads
## level               1     0           level
## like                1     0            like
## ltv                 1     0             ltv
## lucknow             1     0         lucknow
## magazine            1     0        magazine
## managed             1     0         managed
## manipulation        1     0    manipulation
## many                1     0            many
## march               1     0           march
## marketplace         1     0     marketplace
## mathematical        1     0    mathematical
## mathematics         1     0     mathematics
## mba                 1     0             mba
## means               1     0           means
## mechanical          1     0      mechanical
## media               1     0           media
## meet                1     0            meet
## metrics             1     0         metrics
## micro               1     0           micro
## middle              1     0          middle
## million             1     0         million
## mining              1     0          mining
## ministry            1     0        ministry
## mobile              1     0          mobile
## modeling            1     0        modeling
## motors              1     0          motors
## mysql               1     0           mysql
## nearby              1     0          nearby
## new                 1     0             new
## nielsen             1     0         nielsen
## north               1     0           north
## nuggets             1     0         nuggets
## number              1     0          number
## offshoring          1     0      offshoring
## one                 1     0             one
## open                1     0            open
## operations          1     0      operations
## opportunities       1     0   opportunities
## optimize            1     0        optimize
## org                 1     0             org
## organization        1     0    organization
## osscamp             1     0         osscamp
## others              1     0          others
## parking             1     0         parking
## part                1     0            part
## past                1     0            past
## payment             1     0         payment
## pdf                 1     0             pdf
## per                 1     0             per
## placement           1     0       placement
## postgressql         1     0     postgressql
## practice            1     0        practice
## predict             1     0         predict
## primarily           1     0       primarily
## product             1     0         product
## profession          1     0      profession
## proficient          1     0      proficient
## programmableweb     1     0 programmableweb
## propensity          1     0      propensity
## proprietary         1     0     proprietary
## published           1     0       published
## readwriteweb        1     0    readwriteweb
## recognized          1     0      recognized
## remaining           1     0       remaining
## remote              1     0          remote
## remove              1     0          remove
## reporting           1     0       reporting
## reports             1     0         reports
## requirements        1     0    requirements
## revenues            1     0        revenues
## rfm                 1     0             rfm
## road                1     0            road
## role                1     0            role
## run                 1     0             run
## sale                1     0            sale
## scale               1     0           scale
## scheduled           1     0       scheduled
## school              1     0          school
## sciences            1     0        sciences
## scientists          1     0      scientists
## scraping            1     0        scraping
## screens             1     0         screens
## sec                 1     0             sec
## segmentation        1     0    segmentation
## segments            1     0        segments
## senior              1     0          senior
## sept                1     0            sept
## server              1     0          server
## servers             1     0         servers
## service             1     0         service
## shadow              1     0          shadow
## significant         1     0     significant
## six                 1     0             six
## size                1     0            size
## snapshot            1     0        snapshot
## social              1     0          social
## society             1     0         society
## socio               1     0           socio
## software            1     0        software
## sold                1     0            sold
## solver              1     0          solver
## soup                1     0            soup
## source              1     0          source
## speaker             1     0         speaker
## stat                1     0            stat
## statistical         1     0     statistical
## summary             1     0         summary
## summit              1     0          summit
## sunday              1     0          sunday
## sutherland          1     0      sutherland
## sybase              1     0          sybase
## systems             1     0         systems
## talk                1     0            talk
## talks               1     0           talks
## taught              1     0          taught
## tax                 1     0             tax
## taxation            1     0        taxation
## team                1     0            team
## tech                1     0            tech
## technological       1     0   technological
## telecom             1     0         telecom
## theory              1     0          theory
## third               1     0           third
## tickets             1     0         tickets
## tool                1     0            tool
## total               1     0           total
## translated          1     0      translated
## trilogy             1     0         trilogy
## ubuntu              1     0          ubuntu
## unique              1     0          unique
## variables           1     0       variables
## vba                 1     0             vba
## visual              1     0          visual
## warehouse           1     0       warehouse
## web                 1     0             web
## website             1     0         website
## websites            1     0        websites
## wiley               1     0           wiley
## wired               1     0           wired
## work                1     0            work
## working             1     0         working
## workshop            1     0        workshop
## world               1     0           world
## written             1     0         written
## york                1     0            york
## annual              2     0          annual
## around              2     0          around
## base                2     0            base
## board               2     0           board
## books               2     0           books
## built               2     0           built
## cards               2     0           cards
## central             2     0         central
## challenges          2     0      challenges
## chennai             2     0         chennai
## churn               2     0           churn
## citi                2     0            citi
## create              2     0          create
## cross               2     0           cross
## customer            2     0        customer
## customers           2     0       customers
## day                 2     0             day
## done                2     0            done
## duration            2     0        duration
## earth               2     0           earth
## econometrics        2     0    econometrics
## economic            2     0        economic
## estimate            2     0        estimate
## excise              2     0          excise
## exploratory         2     0     exploratory
## exports             2     0         exports
## firm                2     0            firm
## google              2     0          google
## government          2     0      government
## history             2     0         history
## imports             2     0         imports
## indian              2     0          indian
## institute           2     0       institute
## isbcc               2     0           isbcc
## jul                 2     0             jul
## kdnuggets           2     0       kdnuggets
## kogentix            2     0        kogentix
## large               2     0           large
## life                2     0            life
## malls               2     0           malls
## management          2     0      management
## market              2     0          market
## marketing           2     0       marketing
## models              2     0          models
## multiple            2     0        multiple
## name                2     0            name
## oct                 2     0             oct
## ontario             2     0         ontario
## profile             2     0         profile
## publications        2     0    publications
## pyspark             2     0         pyspark
## regression          2     0      regression
## research            2     0        research
## selling             2     0         selling
## services            2     0        services
## sponsored           2     0       sponsored
## symposium           2     0       symposium
## tableau             2     0         tableau
## technology          2     0      technology
## two                 2     0             two
## university          2     0      university
## users               2     0           users
## validation          2     0      validation
## vit                 2     0             vit
## wipro               2     0           wipro
## wns                 2     0             wns
## worked              2     0          worked
## year                2     0            year
## ajay                3     0            ajay
## analyst             3     0         analyst
## articles            3     0        articles
## automobile          3     0      automobile
## based               3     0           based
## blog                3     0            blog
## business            3     0        business
## computing           3     0       computing
## engineering         3     0     engineering
## enterprise          3     0      enterprise
## experience          3     0      experience
## https               3     0           https
## jan                 3     0             jan
## learning            3     0        learning
## linux               3     0           linux
## loans               3     0           loans
## machine             3     0         machine
## miner               3     0           miner
## personal            3     0        personal
## present             3     0         present
## principal           3     0       principal
## project             3     0         project
## projects            3     0        projects
## springer            3     0        springer
## training            3     0        training
## views               3     0           views
## well                3     0            well
## wrote               3     0           wrote
## years               3     0           years
## big                 4     0             big
## book                4     0            book
## client              4     0          client
## cloud               4     0           cloud
## created             4     0         created
## delhi               4     0           delhi
## finance             4     0         finance
## helped              4     0          helped
## http                4     0            http
## led                 4     0             led
## manager             4     0         manager
## nov                 4     0             nov
## ohri                4     0            ohri
## spatial             4     0         spatial
## visualization       4     0   visualization
## windows             4     0         windows
## www                 4     0             www
## feb                 5     0             feb
## financial           5     0       financial
## insurance           5     0       insurance
## july                5     0            july
## jun                 5     0             jun
## sql                 5     0             sql
## analysis            6     0        analysis
## consultant          6     0      consultant
## india               8     0           india
## mar                 8     0             mar
## python              8     0          python
## science             8     0         science
## com                 9     0             com
## scientist           9     0       scientist
## decisionstats      12     0   decisionstats
## using              12     0           using
## sas                20     0             sas
## analytics          24     0       analytics
## data               31     0            data
str(c4)
## 'data.frame':    420 obs. of  3 variables:
##  $ freqs: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ terms: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ NA   : chr  "abstract" "academy" "acquired" "across" ...
names(c4)=c('freq','meta','words')
str(c4)
## 'data.frame':    420 obs. of  3 variables:
##  $ freq : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ meta : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ words: chr  "abstract" "academy" "acquired" "across" ...
summary(c4)
##       freq             meta      words          
##  Min.   : 1.000   Min.   :0   Length:420        
##  1st Qu.: 1.000   1st Qu.:0   Class :character  
##  Median : 1.000   Median :0   Mode  :character  
##  Mean   : 1.831   Mean   :0                     
##  3rd Qu.: 2.000   3rd Qu.:0                     
##  Max.   :31.000   Max.   :0
c4$meta=NULL
head(c4)
##            freq      words
## abstract      1   abstract
## academy       1    academy
## acquired      1   acquired
## across        1     across
## addition      1   addition
## algorithms    1 algorithms
c5=c4[order(c4$freq,decreasing = TRUE),]
head(c5)
##               freq         words
## data            31          data
## analytics       24     analytics
## sas             20           sas
## decisionstats   12 decisionstats
## using           12         using
## com              9           com
tail(c5,20)
##            freq      words
## total         1      total
## translated    1 translated
## trilogy       1    trilogy
## ubuntu        1     ubuntu
## unique        1     unique
## variables     1  variables
## vba           1        vba
## visual        1     visual
## warehouse     1  warehouse
## web           1        web
## website       1    website
## websites      1   websites
## wiley         1      wiley
## wired         1      wired
## work          1       work
## working       1    working
## workshop      1   workshop
## world         1      world
## written       1    written
## york          1       york
wordcloud(c5$words,c5$freq,colors = brewer.pal(9,'Set1'))