This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.
#set default repository to avoid mirror error
options(repos=structure(c(CRAN="http://cran.r-project.org")))
# Sentiment Analysis with Machine Learning Algos
# You can compare the accuracy of SVM, Naive Bayes, Random Forest and GlmNet
install.packages('data.table')
## Installing package into 'C:/Users/Rowen/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'data.table' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Rowen\AppData\Local\Temp\Rtmpk9wDzp\downloaded_packages
install.packages('tm')
## Installing package into 'C:/Users/Rowen/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'tm' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Rowen\AppData\Local\Temp\Rtmpk9wDzp\downloaded_packages
install.packages('e1071')
## Installing package into 'C:/Users/Rowen/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'e1071' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Rowen\AppData\Local\Temp\Rtmpk9wDzp\downloaded_packages
install.packages('gmodels')
## Installing package into 'C:/Users/Rowen/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'gmodels' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Rowen\AppData\Local\Temp\Rtmpk9wDzp\downloaded_packages
install.packages('caret')
## Installing package into 'C:/Users/Rowen/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'caret' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Rowen\AppData\Local\Temp\Rtmpk9wDzp\downloaded_packages
install.packages('stringr')
## Installing package into 'C:/Users/Rowen/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'stringr' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Rowen\AppData\Local\Temp\Rtmpk9wDzp\downloaded_packages
install.packages('purrrlyr')
## Installing package into 'C:/Users/Rowen/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'purrrlyr' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Rowen\AppData\Local\Temp\Rtmpk9wDzp\downloaded_packages
install.packages('tidyverse')
## Installing package into 'C:/Users/Rowen/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'tidyverse' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Rowen\AppData\Local\Temp\Rtmpk9wDzp\downloaded_packages
install.packages('text2vec')
## Installing package into 'C:/Users/Rowen/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'text2vec' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Rowen\AppData\Local\Temp\Rtmpk9wDzp\downloaded_packages
install.packages('glmnet')
## Installing package into 'C:/Users/Rowen/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'glmnet' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Rowen\AppData\Local\Temp\Rtmpk9wDzp\downloaded_packages
install.packages('ggrepel')
## Installing package into 'C:/Users/Rowen/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'ggrepel' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Rowen\AppData\Local\Temp\Rtmpk9wDzp\downloaded_packages
install.packages('randomForest')
## Installing package into 'C:/Users/Rowen/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'randomForest' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Rowen\AppData\Local\Temp\Rtmpk9wDzp\downloaded_packages
install.packages('SnowballC')
## Installing package into 'C:/Users/Rowen/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'SnowballC' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Rowen\AppData\Local\Temp\Rtmpk9wDzp\downloaded_packages
library(data.table)
library(tm)
## Loading required package: NLP
library(e1071)
library(gmodels)
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
##
## annotate
library(stringr)
library(purrrlyr)
library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------------------- tidyverse 1.2.1 --
## v tibble 2.1.1 v purrr 0.3.2
## v tidyr 0.8.3 v dplyr 0.8.0.1
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ------------------------------------------------------------------------------- tidyverse_conflicts() --
## x ggplot2::annotate() masks NLP::annotate()
## x dplyr::between() masks data.table::between()
## x dplyr::filter() masks stats::filter()
## x dplyr::first() masks data.table::first()
## x dplyr::lag() masks stats::lag()
## x dplyr::last() masks data.table::last()
## x purrr::lift() masks caret::lift()
## x purrr::transpose() masks data.table::transpose()
library(text2vec)
library(glmnet)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
##
## expand
## Loading required package: foreach
##
## Attaching package: 'foreach'
## The following objects are masked from 'package:purrr':
##
## accumulate, when
## Loaded glmnet 2.0-16
library(ggrepel)
library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:ggplot2':
##
## margin
library(SnowballC)
tweets_classified <- readRDS("tweets_classified.RDS")
# At first, we only use a small subset of the data to save time
set.seed(2340)
tweets_Index <- createDataPartition(tweets_classified$sentiment, p = 0.001,
list = FALSE,
times = 1)
train_tweets <- tweets_classified[tweets_Index, ]
# Divide into train and test data sets
train_index <- sample(1600, 1200)
train_data <- train_tweets[train_index, ]
test_data <- train_tweets[-train_index, ]
# Create DTM
data <- rbind(train_data,test_data)
data_corpus <- VCorpus(VectorSource(data$text))
data_dtm <- DocumentTermMatrix(data_corpus, control = list(
tolower = TRUE,
removeNumbers = TRUE,
stopwords = TRUE,
removePunctuation = TRUE,
stemming = TRUE
))
data_dtm <- as.matrix(data_dtm)
train_dtm <- data_dtm[1:1200, ]
test_dtm <- data_dtm[1201:1600, ]
bayes_classifier <- naiveBayes(train_dtm, as.factor(train_data$sentiment))
bayes_test_pred <- predict(bayes_classifier, test_dtm)
CrossTable(bayes_test_pred, test_data$sentiment,
prop.chisq = FALSE, prop.t = FALSE, prop.r = FALSE,
dnn = c('predicted', 'actual'))
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Col Total |
## |-------------------------|
##
##
## Total Observations in Table: 400
##
##
## | actual
## predicted | 0 | 1 | Row Total |
## -------------|-----------|-----------|-----------|
## 0 | 195 | 162 | 357 |
## | 0.899 | 0.885 | |
## -------------|-----------|-----------|-----------|
## 1 | 22 | 21 | 43 |
## | 0.101 | 0.115 | |
## -------------|-----------|-----------|-----------|
## Column Total | 217 | 183 | 400 |
## | 0.542 | 0.458 | |
## -------------|-----------|-----------|-----------|
##
##
auc(as.numeric(test_data$sentiment), bayes_test_pred)
## [1] 0.5066858
#0.5066858, 400 obs in table
svm_classifier <- svm(train_dtm, as.factor(train_data$sentiment))
## Warning in svm.default(train_dtm, as.factor(train_data$sentiment)):
## Variable(s) 'aaaal' and 'aamyhaanson' and 'abba' and 'abil' and 'abu'
## and 'ace' and 'ack' and 'acknowledg' and 'acostalyn' and 'acount' and
## 'across' and 'actress' and 'adam' and 'adriannecurri' and 'adult' and
## 'adventur' and 'advic' and 'africa' and 'aghh' and 'agoodappetit' and
## 'ahahahaha' and 'ahahha' and 'alicecullen' and 'allen' and 'allll' and
## 'alo' and 'alpha' and 'amandabohan' and 'amitbhawani' and 'among' and
## 'amongst' and 'ampextrav' and 'amsterdam' and 'andrea' and 'anjali' and
## 'aparajuli' and 'area' and 'arrrggghhhhhh' and 'artsi' and 'artyart'
## and 'ashetalketh' and 'asia' and 'assign' and 'assum' and 'athlet' and
## 'atlant' and 'atlanti' and 'auburn' and 'audiodidact' and 'aussi' and
## 'austin' and 'awdruh' and 'awhil' and 'awwe' and 'ayanami' and 'azulski'
## and 'baaawnot' and 'babyrhysgear' and 'backstreetboy' and 'ball' and
## 'bank' and 'banksyart' and 'barrel' and 'basebal' and 'bcuz' and 'beard'
## and 'beckykingston' and 'benjimonicus' and 'besti' and 'bestttttttt' and
## 'beter' and 'beyond' and 'bhellabel' and 'bialovesnkotb' and 'biggest'
## and 'binder' and 'birmingham' and 'bitter' and 'bkfast' and 'blackpool'
## and 'bldi' and 'blogtv' and 'blush' and 'board' and 'boat' and 'bold'
## and 'bonfir' and 'booksa' and 'booktwitt' and 'booooo' and 'bootabl'
## and 'boredd' and 'boriss' and 'borrow' and 'boyquot' and 'brand' and
## 'breastfeed' and 'bright' and 'brightesttt' and 'brioch' and 'brked'
## and 'bros' and 'bsb' and 'bubbi' and 'buckley' and 'buildingcomplex'
## and 'burgertub' and 'burnt' and 'butter' and 'butteri' and 'bytey'
## and 'bzelasko' and 'cafe' and 'calm' and 'camaro' and 'camera' and
## 'candykiss' and 'cap' and 'capac' and 'careftw' and 'cash' and 'cbdw'
## and 'celadonnewtown' and 'cellabub' and 'celz' and 'chai' and 'challeng'
## and 'changeisgood' and 'chantaleto' and 'charleston' and 'chicbud'
## and 'choosehmmmgood' and 'chris' and 'chrishasboob' and 'chrisk' and
## 'christian' and 'christin' and 'ckkl' and 'classic' and 'coki' and 'coldi'
## and 'college' and 'colour' and 'column' and 'com' and 'communityslow' and
## 'comp' and 'compil' and 'conclud' and 'condit' and 'cone' and 'conform'
## and 'continu' and 'contract' and 'contri' and 'convinc' and 'cooki' and
## 'coop' and 'copycat' and 'cord' and 'cori' and 'correct' and 'covent' and
## 'craigkuhn' and 'crappp' and 'craterpop' and 'creat' and 'cricket' and
## 'crippl' and 'crowdquot' and 'cup' and 'cvilma' and 'cyrus' and 'danni' and
## 'dare' and 'dat' and 'data' and 'daviddraiman' and 'dawn' and 'dead' and
## 'deadtwom' and 'deasaurr' and 'dec' and 'decemb' and 'decent' and 'decibel'
## and 'deepest' and 'definit' and 'degrassi' and 'delic' and 'delici' and
## 'deluv' and 'devast' and 'devic' and 'devil' and 'devious' and 'devout'
## and 'dhabi' and 'dian' and 'dinosaur' and 'dint' and 'discussinbut' and
## 'diseas' and 'distract' and 'dito' and 'divis' and 'dmg' and 'doin' and
## 'dokey' and 'doma' and 'dominiccampbel' and 'donellama' and 'donni' and
## 'dontforgetchao' and 'doodoo' and 'downor' and 'drawback' and 'dreami'
## and 'drinkup' and 'drizzl' and 'dtphulp' and 'due' and 'dun' and 'dunde'
## and 'economi' and 'effort' and 'eight' and 'eliclaudell' and 'elising'
## and 'elivazeth' and 'elon' and 'emamdjom' and 'encourag' and 'enhanc' and
## 'enorm' and 'equal' and 'errquotlight' and 'eskimo' and 'essteeyou' and
## 'estim' and 'euh' and 'evaekeis' and 'evanheis' and 'ewout' and 'exercis'
## and 'exploit' and 'extraaaa' and 'extravaganza' and 'faculti' and 'famous'
## and 'fanniest' and 'farmland' and 'favour' and 'featherquot' and 'feedback'
## and 'fennel' and 'ferdinand' and 'ferri' and 'fic' and 'fifa' and 'file'
## and 'finetun' and 'floodedviolet' and 'florida' and 'flow' and 'flown'
## and 'flush' and 'fml' and 'followerskeep' and 'fooood' and 'forcast' and
## 'forehead' and 'foreign' and 'forev' and 'foreverrr' and 'fpanewsflash'
## and 'frackyland' and 'franc' and 'francoisplanqu' and 'franz' and 'frustat'
## and 'ftw' and 'funquot' and 'gaiman' and 'gamingstev' and 'gangster' and
## 'garethcliff' and 'gave' and 'gavin' and 'gee' and 'geoffsawesom' and
## 'german' and 'gimp' and 'glamor' and 'glasto' and 'gloomi' and 'glui'
## and 'golf' and 'golli' and 'goss' and 'grade' and 'grader' and 'gramma'
## and 'grant' and 'greatest' and 'greenfe' and 'greermcdonald' and 'gross'
## and 'gtgt' and 'gtsyrup' and 'guitar' and 'haaaaa' and 'hahahahahha'
## and 'haii' and 'halfroom' and 'handdddd' and 'happay' and 'happier' and
## 'headachehop' and 'headsup' and 'heart' and 'heavier' and 'heehe' and
## 'heey' and 'henryl' and 'hereford' and 'heyi' and 'hide' and 'hiii' and
## 'hilarius' and 'histor' and 'hitler' and 'hmm' and 'hollymccaig' and
## 'holm' and 'homeim' and 'honour' and 'hooooom' and 'hooray' and 'hors' and
## 'hotel' and 'house' and 'housemovingblu' and 'houston' and 'hrsquotbut'
## and 'httpbitlyomsdk' and 'httpbitlypdmb' and 'httpbitlywybza' and
## 'httpblipfmch' and 'httpbudurlcomfreeu' and 'httphourkitdeviantartcom'
## and 'httpmypictmelz' and 'httpplurkcompdct' and 'httpplurkcompxq' and
## 'httpplurkcompxufug' and 'httptinyurlcomdjjc' and 'httptinyurlcomleh' and
## 'httptinyurlcomnyomgu' and 'httptinyurlcomqmsmw' and 'httptumblrcomxnepahi'
## and 'httptwitpiccomalu' and 'httptwitpiccomft' and 'httptwitpiccomg' and
## 'httpwwwaxureworldcom' and 'httpwwwnasagovmultimedianasatvindexhtml' and
## 'hubbi' and 'hubbl' and 'huggtlt' and 'huhuhuhu' and 'hull' and 'hungari'
## and 'huntington' and 'husband' and 'hve' and 'hydrat' and 'iancleari' and
## 'icki' and 'icoop' and 'ide' and 'idungiveafuck' and 'iima' and 'imac'
## and 'indiebird' and 'indonesia' and 'inevit' and 'inform' and 'inlaw'
## and 'insan' and 'insanitea' and 'instant' and 'invit' and 'inworship' and
## 'ioh' and 'ionlydrumnak' and 'iow' and 'ipadr' and 'ipl' and 'irinatulip'
## and 'itsadinosaur' and 'itself' and 'itsjenniferamo' and 'itun' and
## 'janellechant' and 'jas' and 'jaybreezi' and 'jazjess' and 'jealoussss'
## and 'jennixd' and 'jericho' and 'jerseybash' and 'joelmadden' and
## 'johngray' and 'joli' and 'jonaslt' and 'jonastwilight' and 'jonathan' and
## 'jonathankochi' and 'jonathanrknight' and 'jturcuato' and 'junction' and
## 'justinlevi' and 'jwhof' and 'kareenxd' and 'karmadillo' and 'kateyi' and
## 'kati' and 'katieh' and 'katyk' and 'katyperri' and 'kela' and 'kellykfog'
## and 'kelz' and 'kenni' and 'kennywood' and 'key' and 'killer' and
## 'kinoflyhigh' and 'kkmariejb' and 'kris' and 'kristineeenf' and 'kruger'
## and 'kyleowyang' and 'label' and 'lake' and 'lalala' and 'lambert' and
## 'land' and 'lap' and 'largest' and 'lastsoul' and 'launch' and 'lauren'
## and 'leader' and 'leagu' and 'leakag' and 'ledg' and 'leed' and 'lib' and
## 'lilli' and 'lindsaylou' and 'littlefurybug' and 'livetorqu' and 'lobbi'
## and 'lock' and 'loke' and 'lolspiltercel' and 'longboard' and 'longest'
## and 'lookin' and 'loopygine' and 'lose' and 'lprofil' and 'ltitsabbygt'
## and 'ltlt' and 'ltmyblackbirdlt' and 'ltnick' and 'lust' and 'lutherstadt'
## and 'luvjanelli' and 'macbook' and 'makenag' and 'makin' and 'mami' and
## 'mana' and 'manic' and 'manitsingh' and 'margotrobbi' and 'mariandougan'
## and 'marriag' and 'marshmallow' and 'maryrsnyd' and 'mcmuffin' and
## 'mcrfanclub' and 'membership' and 'merit' and 'mesomaniac' and 'met' and
## 'metal' and 'meteora' and 'metro' and 'mexico' and 'mford' and 'minor'
## and 'mirror' and 'misabel' and 'missin' and 'moleskin' and 'momma' and
## 'monica' and 'moonchic' and 'mountain' and 'moveor' and 'movesold' and
## 'mps' and 'mrs' and 'mtvsunday' and 'mummi' and 'muskat' and 'mustach'
## and 'mythbust' and 'nai' and 'naidaaa' and 'nanavett' and 'naontiotami'
## and 'nappp' and 'natashacairn' and 'nate' and 'nayyna' and 'neeeeed'
## and 'neil' and 'nerd' and 'neshiak' and 'nevermind' and 'newcastl' and
## 'newlyalmostsingl' and 'nicoladetisi' and 'niec' and 'niecemirabell' and
## 'nijmegen' and 'nikkilopez' and 'nocturnali' and 'noddi' and 'nonsens'
## and 'nooooooooanoth' and 'note' and 'nothin' and 'notorioustori' and 'nut'
## and 'oatmeal' and 'obnoxi' and 'obv' and 'oftenbut' and 'okey' and 'omesh'
## and 'onsit' and 'opinion' and 'opportun' and 'orkney' and 'outback' and
## 'outsideth' and 'overkil' and 'pair' and 'panic' and 'papi' and 'pasta' and
## 'peanut' and 'pell' and 'penguinj' and 'pentaton' and 'pepper' and 'pet'
## and 'phopho' and 'photograph' and 'pictureee' and 'pillow' and 'placedo'
## and 'p
svm_test_pred <- predict(svm_classifier, test_dtm)
CrossTable(svm_test_pred, as.factor(test_data$sentiment),
prop.chisq = FALSE, prop.t = FALSE, prop.r = FALSE,
dnn = c('predicted', 'actual'))
##
##
## Cell Contents
## |-------------------------|
## | N |
## |-------------------------|
##
##
## Total Observations in Table: 400
##
##
## | as.factor(test_data$sentiment)
## svm_test_pred | 0 | 1 | Row Total |
## --------------|-----------|-----------|-----------|
## 1 | 217 | 183 | 400 |
## --------------|-----------|-----------|-----------|
## Column Total | 217 | 183 | 400 |
## --------------|-----------|-----------|-----------|
##
##
auc(as.numeric(test_data$sentiment), svm_test_pred)
## [1] 0.5
#0.5, 400 observations in table
randomforest_classifier <- randomForest(train_dtm, as.factor(train_data$sentiment), ntree=100)
rf_test_pred <- predict(randomforest_classifier, test_dtm)
CrossTable(rf_test_pred, as.factor(test_data$sentiment),
prop.chisq = FALSE, prop.t = FALSE, prop.r = FALSE,
dnn = c('predicted', 'actual'))
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Col Total |
## |-------------------------|
##
##
## Total Observations in Table: 400
##
##
## | actual
## predicted | 0 | 1 | Row Total |
## -------------|-----------|-----------|-----------|
## 0 | 107 | 27 | 134 |
## | 0.493 | 0.148 | |
## -------------|-----------|-----------|-----------|
## 1 | 110 | 156 | 266 |
## | 0.507 | 0.852 | |
## -------------|-----------|-----------|-----------|
## Column Total | 217 | 183 | 400 |
## | 0.542 | 0.458 | |
## -------------|-----------|-----------|-----------|
##
##
auc(as.numeric(test_data$sentiment), rf_test_pred)
## [1] 0.6727733
plot(randomforest_classifier)
#0.6727733, 400 observations in table
prep_fun <- tolower
tok_fun <- word_tokenizer
it_train <- itoken(train_data$text,
preprocessor = prep_fun,
tokenizer = tok_fun,
ids = train_data$id,
progressbar = TRUE)
it_test <- itoken(test_data$text,
preprocessor = prep_fun,
tokenizer = tok_fun,
ids = test_data$id,
progressbar = TRUE)
vocab <- create_vocabulary(it_train)
##
|
|====== | 10%
|
|============= | 20%
|
|==================== | 30%
|
|========================== | 40%
|
|================================ | 50%
|
|======================================= | 60%
|
|============================================== | 70%
|
|==================================================== | 80%
|
|========================================================== | 90%
|
|=================================================================| 100%
vectorizer <- vocab_vectorizer(vocab)
dtm_train <- create_dtm(it_train, vectorizer)
##
|
|====== | 10%
|
|============= | 20%
|
|==================== | 30%
|
|========================== | 40%
|
|================================ | 50%
|
|======================================= | 60%
|
|============================================== | 70%
|
|==================================================== | 80%
|
|========================================================== | 90%
|
|=================================================================| 100%
dtm_test <- create_dtm(it_test, vectorizer)
##
|
|====== | 10%
|
|============= | 20%
|
|==================== | 30%
|
|========================== | 40%
|
|================================ | 50%
|
|======================================= | 60%
|
|============================================== | 70%
|
|==================================================== | 80%
|
|========================================================== | 90%
|
|=================================================================| 100%
tfidf <- TfIdf$new()
dtm_train_tfidf <- fit_transform(dtm_train, tfidf)
dtm_test_tfidf <- fit_transform(dtm_test, tfidf)
glmnet_classifier <- cv.glmnet(x = dtm_train_tfidf,
y = train_data[['sentiment']],
family = 'binomial',
# L1 penalty
alpha = 1,
# interested in the area under ROC curve
type.measure = "auc",
# 5-fold cross-validation
nfolds = 5,
# high value is less accurate, but has faster training
thresh = 1e-3,
# again lower number of iterations for faster training
maxit = 1e3)
preds <- predict(glmnet_classifier, dtm_test_tfidf, type = 'response')[ ,1]
auc(as.numeric(test_data$sentiment), preds)
## [1] 0.6741205
#0.6741205
plot(glmnet_classifier)
print(paste("max AUC =", round(max(glmnet_classifier$cvm), 4)))
## [1] "max AUC = 0.6933"
#max AUC = 0.6933
#Here GLMNet seems to provide the highest accuracy overall in AUC (Area Under Curve) with a score of 0.6933,
#meaning it best predicts classes determined to be present in the data compared to naive bayes (0.5066858),
#SVM (0.5),
#or randomforest (0.6727733)
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.