View(SMSSpamCollection)
str(SMSSpamCollection)
'data.frame':   3184 obs. of  2 variables:
 $ V1: Factor w/ 2 levels "ham","spam": 1 1 2 1 1 2 1 1 2 2 ...
 $ V2: Factor w/ 3027 levels "'An Amazing Quote'' - Sometimes in life its difficult to decide whats wrong!! a lie that brings a smile or the "| __truncated__,..: 700 1898 638 2517 1692 656 591 248 2792 770 ...

THIS FUNCTION SHOWS THE STRUCTURE OF THE COLLECTION.

SMSSpamCollection_analysis=SMSSpamCollection

“SMSSPAMCOLLECTION_ANALYSIS” IS CREATED AS A SUB FILE WERE IT HOLD ALL THE COLLECTION OF “SMSSPAMCOLLECTION” TO WORK WITHOUT LACKING DATA.

SMSSpamCollection_analysis$V1=factor(SMSSpamCollection_analysis$V1)

FACTOR IS USED FOR “FINITE NUMBER OF VALUES” IN THE COLLECTION.

table(SMSSpamCollection_analysis$V1)

 ham spam 
2746  438 

THE TABLES FUNCTIONS SHOWS THE NO OF MESSAGES IN ITS CATEGORY.

library(NLP)

LIBRARY (NLP) “NATURAL LANGUAGE PROCESSING” IS USED ALLOW OTHER LANGUAGES TOO.

library(tm)

TO ACCESS THE CORPUS FUNCTION THE LIBRARY (TM) AND TO REPRESNT A NUMBER OF DATA’S.

Sms_corpus=Corpus(VectorSource(SMSSpamCollection_analysis$V2))

THE FUNCTION “VECTOR SOURCE” WHICH STORES CHARACTER IN SERVER.

print(Sms_corpus)
<<SimpleCorpus>>
Metadata:  corpus specific: 1, document level (indexed): 0
Content:  documents: 3184

THE FUNCTION PRINT IS USED TO VIEW THE TOTAL NUMBER OF DOCUMENTS.

inspect(Sms_corpus[1:3])
<<SimpleCorpus>>
Metadata:  corpus specific: 1, document level (indexed): 0
Content:  documents: 3

[1] Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...                                            
[2] Ok lar... Joking wif u oni...                                                                                                                              
[3] Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's

TO VIEW THE MESSAGES FROM THE COLLECTION THE “INSPECT” CMD IS USED.

Corpus_clean=tm_map(Corpus_clean,removeWords,stopwords())
Corpus_clean=tm_map(Corpus_clean,removePunctuation)
Corpus_clean=tm_map(Corpus_clean,stripWhitespace)

THE “TM_MAP” CLEANS THE DECLARED THINGS, WORDS, PUNCTIONS AND WHITESPACE.

sms_dtm=DocumentTermMatrix(Corpus_clean)
sms_corpus_train=Corpus_clean[1:2500]
sms_corpus_test=Corpus_clean[2501:3184]

APPLYING COMPRESSED MESSAGES INTO A MATRIX FORMAT, AND USING TRAIN AND TEST FOR THE MESSAGES.

install.packages("wordcloud")
library(wordcloud)

THE PACKAGE “WORD CLOUD” IS INSTALLED AND THE LIBRARY IS CALLED-OUT.

THE TRAIN FUNCTION IS CALLED AND FREQUENCY IS SET AND THEN THE COLOR IS SET AS BLACK.

TO GET RANDOM COLORS SET “RANDOMCOLOR=TRUE”,COL=“RAINBOW(7)”.

LS0tDQp0aXRsZTogIlJBR1VMIERBVEEgQU5BTFlTVC0gV29yZCBDbG91ZCINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCg0KDQpgYGB7cn0NClZpZXcoU01TU3BhbUNvbGxlY3Rpb24pDQpgYGANCg0KYGBge3J9DQpzdHIoU01TU3BhbUNvbGxlY3Rpb24pDQpgYGANClRISVMgRlVOQ1RJT04gU0hPV1MgVEhFIFNUUlVDVFVSRSBPRiBUSEUgQ09MTEVDVElPTi4gIA0KYGBge3J9DQpTTVNTcGFtQ29sbGVjdGlvbl9hbmFseXNpcz1TTVNTcGFtQ29sbGVjdGlvbg0KYGBgDQoiU01TU1BBTUNPTExFQ1RJT05fQU5BTFlTSVMiIElTIENSRUFURUQgQVMgQSBTVUIgRklMRSBXRVJFIElUIEhPTEQgQUxMIFRIRSBDT0xMRUNUSU9OIE9GICJTTVNTUEFNQ09MTEVDVElPTiIgVE8gV09SSyBXSVRIT1VUIExBQ0tJTkcgREFUQS4NCmBgYHtyfQ0KU01TU3BhbUNvbGxlY3Rpb25fYW5hbHlzaXMkVjE9ZmFjdG9yKFNNU1NwYW1Db2xsZWN0aW9uX2FuYWx5c2lzJFYxKQ0KYGBgDQpGQUNUT1IgSVMgVVNFRCBGT1IgIkZJTklURSBOVU1CRVIgT0YgVkFMVUVTICIgIElOIFRIRSBDT0xMRUNUSU9OLiANCmBgYHtyfQ0KdGFibGUoU01TU3BhbUNvbGxlY3Rpb25fYW5hbHlzaXMkVjEpDQpgYGANClRIRSBUQUJMRVMgRlVOQ1RJT05TIFNIT1dTIFRIRSBOTyBPRiBNRVNTQUdFUyBJTiBJVFMgQ0FURUdPUlkuDQpgYGB7cn0NCmxpYnJhcnkoTkxQKQ0KYGBgDQpMSUJSQVJZIChOTFApICJOQVRVUkFMIExBTkdVQUdFIFBST0NFU1NJTkciIElTIFVTRUQgQUxMT1cgT1RIRVIgTEFOR1VBR0VTIFRPTy4NCmBgYHtyfQ0KbGlicmFyeSh0bSkNCmBgYA0KVE8gQUNDRVNTIFRIRSAgQ09SUFVTIEZVTkNUSU9OIFRIRSBMSUJSQVJZIChUTSkgQU5EIFRPIFJFUFJFU05UIEEgTlVNQkVSIE9GIERBVEEnUy4NCmBgYHtyfQ0KU21zX2NvcnB1cz1Db3JwdXMoVmVjdG9yU291cmNlKFNNU1NwYW1Db2xsZWN0aW9uX2FuYWx5c2lzJFYyKSkNCmBgYA0KVEhFIEZVTkNUSU9OICJWRUNUT1IgU09VUkNFIiBXSElDSCBTVE9SRVMgQ0hBUkFDVEVSIElOIFNFUlZFUi4gIA0KYGBge3J9DQpwcmludChTbXNfY29ycHVzKQ0KYGBgDQpUSEUgRlVOQ1RJT04gUFJJTlQgSVMgVVNFRCBUTyBWSUVXIFRIRSBUT1RBTCBOVU1CRVIgT0YgRE9DVU1FTlRTLg0KYGBge3J9DQppbnNwZWN0KFNtc19jb3JwdXNbMTozXSkNCmBgYA0KVE8gVklFVyBUSEUgTUVTU0FHRVMgRlJPTSBUSEUgQ09MTEVDVElPTiBUSEUgIklOU1BFQ1QiIENNRCBJUyBVU0VELg0KYGBge3J9DQpDb3JwdXNfY2xlYW49dG1fbWFwKENvcnB1c19jbGVhbixyZW1vdmVXb3JkcyxzdG9wd29yZHMoKSkNCkNvcnB1c19jbGVhbj10bV9tYXAoQ29ycHVzX2NsZWFuLHJlbW92ZVB1bmN0dWF0aW9uKQ0KQ29ycHVzX2NsZWFuPXRtX21hcChDb3JwdXNfY2xlYW4sc3RyaXBXaGl0ZXNwYWNlKQ0KYGBgDQpUSEUgIlRNX01BUCIgQ0xFQU5TIFRIRSBERUNMQVJFRCBUSElOR1MsIFdPUkRTLCBQVU5DVElPTlMgQU5EIFdISVRFU1BBQ0UuDQpgYGB7cn0NCnNtc19kdG09RG9jdW1lbnRUZXJtTWF0cml4KENvcnB1c19jbGVhbikNCnNtc19jb3JwdXNfdHJhaW49Q29ycHVzX2NsZWFuWzE6MjUwMF0NCnNtc19jb3JwdXNfdGVzdD1Db3JwdXNfY2xlYW5bMjUwMTozMTg0XQ0KYGBgDQpBUFBMWUlORyBDT01QUkVTU0VEIE1FU1NBR0VTIElOVE8gQSBNQVRSSVggRk9STUFULCBBTkQgVVNJTkcgVFJBSU4gQU5EIFRFU1QgRk9SIFRIRSBNRVNTQUdFUy4NCmBgYHtyfQ0KaW5zdGFsbC5wYWNrYWdlcygid29yZGNsb3VkIikNCmxpYnJhcnkod29yZGNsb3VkKQ0KYGBgDQpUSEUgUEFDS0FHRSAiV09SRCBDTE9VRCIgSVMgSU5TVEFMTEVEIEFORCBUSEUgTElCUkFSWSBJUyBDQUxMRUQtT1VULg0KYGBge3J9DQp3b3JkY2xvdWQoc21zX2NvcnB1c190cmFpbixtaW4uZnJlcSA9IDQwLHJhbmRvbS5vcmRlciA9IEZBTFNFLGNvbD0iQkxBQ0siKQ0KYGBgDQpUSEUgVFJBSU4gRlVOQ1RJT04gSVMgQ0FMTEVEIEFORCBGUkVRVUVOQ1kgSVMgU0VUIEFORCBUSEVOIFRIRSBDT0xPUiBJUyBTRVQgQVMgQkxBQ0suDQpgYGB7cn0NCndvcmRjbG91ZChzbXNfY29ycHVzX3RyYWluLG1pbi5mcmVxID0gNDAscmFuZG9tLm9yZGVyID0gRkFMU0UscmFuZG9tLmNvbG9yID0gVFJVRSxjb2w9cmFpbmJvdyg3KSkNCmBgYA0KVE8gR0VUIFJBTkRPTSBDT0xPUlMgU0VUICJSQU5ET01DT0xPUj1UUlVFIixDT0w9IlJBSU5CT1coNykiLg==