library(NbClust)
library(cluster)
## Warning: package 'cluster' was built under R version 3.5.1
library(rpart)
library(class)
## Warning: package 'class' was built under R version 3.5.1
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 3.5.1
library(vcd)
## Warning: package 'vcd' was built under R version 3.5.1
## Loading required package: grid
library(vcdExtra)
## Warning: package 'vcdExtra' was built under R version 3.5.1
## Loading required package: gnm
library(MASS)
## Warning: package 'MASS' was built under R version 3.5.1
library(RColorBrewer)
library(gmodels)
## Warning: package 'gmodels' was built under R version 3.5.1
library(C50)
## Warning: package 'C50' was built under R version 3.5.1
library(ca)
## Warning: package 'ca' was built under R version 3.5.1
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.1
library(bnlearn)
## Warning: package 'bnlearn' was built under R version 3.5.1
##
## Attaching package: 'bnlearn'
## The following object is masked from 'package:stats':
##
## sigma
library(psych)
## Warning: package 'psych' was built under R version 3.5.1
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(naivebayes)
library(SnowballC)
library(klaR)
## Warning: package 'klaR' was built under R version 3.5.1
library(corrplot)
## Warning: package 'corrplot' was built under R version 3.5.1
## corrplot 0.84 loaded
library(caret)
## Warning: package 'caret' was built under R version 3.5.1
## Loading required package: lattice
##
## Attaching package: 'lattice'
## The following object is masked from 'package:gnm':
##
## barley
library(leaps)
## Warning: package 'leaps' was built under R version 3.5.1
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.1
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:vcdExtra':
##
## summarise
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(kernlab)
##
## Attaching package: 'kernlab'
## The following object is masked from 'package:psych':
##
## alpha
## The following object is masked from 'package:ggplot2':
##
## alpha
library(e1071)
## Warning: package 'e1071' was built under R version 3.5.1
##
## Attaching package: 'e1071'
## The following object is masked from 'package:bnlearn':
##
## impute
library(tm)
## Warning: package 'tm' was built under R version 3.5.1
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
##
## annotate
library(logmult)
## Warning: package 'logmult' was built under R version 3.5.1
##
## Attaching package: 'logmult'
## The following object is masked from 'package:gnm':
##
## se
## The following object is masked from 'package:vcd':
##
## assoc
library(stats)
data <- read.csv("C:/Users/charl/Documents/ANLY 530--Wholesale(1)_customers_data.csv")
str(data)
## 'data.frame': 440 obs. of 8 variables:
## $ Channel : int 2 2 2 1 2 2 2 2 1 2 ...
## $ Region : int 3 3 3 3 3 3 3 3 3 3 ...
## $ Fresh : int 12669 7057 6353 13265 22615 9413 12126 7579 5963 6006 ...
## $ Milk : int 9656 9810 8808 1196 5410 8259 3199 4956 3648 11093 ...
## $ Grocery : int 7561 9568 7684 4221 7198 5126 6975 9426 6192 18881 ...
## $ Frozen : int 214 1762 2405 6404 3915 666 480 1669 425 1159 ...
## $ Detergents_Paper: int 2674 3293 3516 507 1777 1795 3140 3321 1716 7425 ...
## $ Delicassen : int 1338 1776 7844 1788 5185 1451 545 2566 750 2098 ...
summary(data)
## Channel Region Fresh Milk
## Min. :1.000 Min. :1.000 Min. : 3 Min. : 55
## 1st Qu.:1.000 1st Qu.:2.000 1st Qu.: 3128 1st Qu.: 1533
## Median :1.000 Median :3.000 Median : 8504 Median : 3627
## Mean :1.323 Mean :2.543 Mean : 12000 Mean : 5796
## 3rd Qu.:2.000 3rd Qu.:3.000 3rd Qu.: 16934 3rd Qu.: 7190
## Max. :2.000 Max. :3.000 Max. :112151 Max. :73498
## Grocery Frozen Detergents_Paper Delicassen
## Min. : 3 Min. : 25.0 Min. : 3.0 Min. : 3.0
## 1st Qu.: 2153 1st Qu.: 742.2 1st Qu.: 256.8 1st Qu.: 408.2
## Median : 4756 Median : 1526.0 Median : 816.5 Median : 965.5
## Mean : 7951 Mean : 3071.9 Mean : 2881.5 Mean : 1524.9
## 3rd Qu.:10656 3rd Qu.: 3554.2 3rd Qu.: 3922.0 3rd Qu.: 1820.2
## Max. :92780 Max. :60869.0 Max. :40827.0 Max. :47943.0
top.n.custs <- function (data,cols,n=5) { #Requires some data frame and the top N to remove
idx.to.remove <-integer(0) #Initialize a vector to hold customers being removed
for (c in cols){ # For every column in the data we passed to this function
col.order <-order(data[,c],decreasing=T) #Sort column "c" in descending order (bigger on top)
#Order returns the sorted index (e.g. row 15, 3, 7, 1, ...) rather than the actual values sorted.
idx <-head(col.order, n) #Take the first n of the sorted column C to
idx.to.remove <-union(idx.to.remove,idx) #Combine and de-duplicate the row ids that need to be removed
}
return(idx.to.remove) #Return the indexes of customers to be removed
}
top.custs <- top.n.custs(data, cols=3:8, n=5)
length(top.custs)
## [1] 19
data[top.custs, ]
## Channel Region Fresh Milk Grocery Frozen Detergents_Paper Delicassen
## 182 1 3 112151 29627 18148 16745 4948 8550
## 126 1 3 76237 3473 7102 16538 778 918
## 285 1 3 68951 4411 12609 8692 751 2406
## 40 1 3 56159 555 902 10002 212 2916
## 259 1 1 56083 4563 2124 6422 730 3321
## 87 2 3 22925 73498 32114 987 20070 903
## 48 2 3 44466 54259 55571 7782 24171 6465
## 86 2 3 16117 46197 92780 1026 40827 2944
## 184 1 3 36847 43950 20170 36534 239 47943
## 62 2 3 35942 38369 59598 3254 26701 2017
## 334 2 2 8565 4980 67298 131 38102 1215
## 66 2 3 85 20959 45828 36 24231 1423
## 326 1 2 32717 16784 13626 60869 1272 5609
## 94 1 3 11314 3090 2062 35009 71 2698
## 197 1 1 30624 7209 4897 18711 763 2876
## 104 1 3 56082 3504 8906 18028 1480 2498
## 24 2 3 26373 36423 22019 5154 4337 16523
## 72 1 3 18291 1266 21042 5373 4173 14472
## 88 1 3 43265 5025 8117 6312 1579 14351
data.rm.top<-data[-c(top.custs),] #Remove the Customers
set.seed(76964057) #Set the seed for reproducibility
k <-kmeans(data.rm.top[,-c(1,2)], centers=5) #Create 5 clusters, Remove columns 1 and 2
k$centers #Display cluster centers
## Fresh Milk Grocery Frozen Detergents_Paper Delicassen
## 1 4189.747 7645.639 11015.277 1335.145 4750.4819 1387.1205
## 2 16470.870 3026.491 4264.741 3217.306 996.5556 1319.7593
## 3 33120.163 4896.977 5579.860 3823.372 945.4651 1620.1860
## 4 5830.214 15295.048 23449.167 1936.452 10361.6429 1912.7381
## 5 5043.434 2329.683 2786.138 2689.814 652.8276 849.8414
table(k$cluster)
##
## 1 2 3 4 5
## 83 108 43 42 145
rng<-2:20 #K from 2 to 20
tries <-100 #Run the K Means algorithm 100 times
avg.totw.ss <-integer(length(rng)) #Set up an empty vector to hold all of points
for(v in rng){ # For each value of the range variable
v.totw.ss <-integer(tries) #Set up an empty vector to hold the 100 tries
for(i in 1:tries){
k.temp <-kmeans(data.rm.top,centers=v) #Run kmeans
v.totw.ss[i] <-k.temp$tot.withinss#Store the total withinss
}
avg.totw.ss[v-1] <-mean(v.totw.ss) #Average the 100 total withinss
}
plot(rng,avg.totw.ss,type="b", main="Total Within SS by Various K",
ylab="Average Total Within Sum of Squares",
xlab="Value of K")
wine <- read.csv("C:/Users/charl/Documents/ANLY 530--Lab 3--wine.csv")
str(wine)
## 'data.frame': 178 obs. of 14 variables:
## $ Wine : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Alcohol : num 14.2 13.2 13.2 14.4 13.2 ...
## $ Malic.acid : num 1.71 1.78 2.36 1.95 2.59 1.76 1.87 2.15 1.64 1.35 ...
## $ Ash : num 2.43 2.14 2.67 2.5 2.87 2.45 2.45 2.61 2.17 2.27 ...
## $ Acl : num 15.6 11.2 18.6 16.8 21 15.2 14.6 17.6 14 16 ...
## $ Mg : int 127 100 101 113 118 112 96 121 97 98 ...
## $ Phenols : num 2.8 2.65 2.8 3.85 2.8 3.27 2.5 2.6 2.8 2.98 ...
## $ Flavanoids : num 3.06 2.76 3.24 3.49 2.69 3.39 2.52 2.51 2.98 3.15 ...
## $ Nonflavanoid.phenols: num 0.28 0.26 0.3 0.24 0.39 0.34 0.3 0.31 0.29 0.22 ...
## $ Proanth : num 2.29 1.28 2.81 2.18 1.82 1.97 1.98 1.25 1.98 1.85 ...
## $ Color.int : num 5.64 4.38 5.68 7.8 4.32 6.75 5.25 5.05 5.2 7.22 ...
## $ Hue : num 1.04 1.05 1.03 0.86 1.04 1.05 1.02 1.06 1.08 1.01 ...
## $ OD : num 3.92 3.4 3.17 3.45 2.93 2.85 3.58 3.58 2.85 3.55 ...
## $ Proline : int 1065 1050 1185 1480 735 1450 1290 1295 1045 1045 ...
wssplot <- function(data, nc=15, seed=1234){
wss <- (nrow(data)-1)*sum(apply(data,2,var))
for (i in 2:nc){
set.seed(seed)
wss[i] <- sum(kmeans(data, centers=i)$withinss)}
plot(1:nc, wss, type="b", xlab="Number of Clusters",
ylab="Within groups sum of squares")}
df <- scale(wine[-1])
wssplot(df)
library(NbClust)
set.seed(1234)
nc <- NbClust(df, min.nc=2, max.nc = 15, method = "kmeans")
## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 4 proposed 2 as the best number of clusters
## * 15 proposed 3 as the best number of clusters
## * 1 proposed 10 as the best number of clusters
## * 1 proposed 12 as the best number of clusters
## * 1 proposed 14 as the best number of clusters
## * 1 proposed 15 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 3
##
##
## *******************************************************************
print(table(nc$Best.n[1, ]))
##
## 0 1 2 3 10 12 14 15
## 2 1 4 15 1 1 1 1
barplot(table(nc$Best.n[1,]), xlab = "Number of Clusters", ylab = "Number of Criteria", main = "Number of Clusters Chosen by 26 Criteria")
set.seed(1234)
fit.km <- kmeans(df, 3, nstart=25)
(df_km <- table(wine$Wine, fit.km$cluster))
##
## 1 2 3
## 1 59 0 0
## 2 3 65 3
## 3 0 0 48
(Accuracy <- (sum(diag(df_km))/sum(df_km)*100))
## [1] 96.62921
library(cluster)
clusplot(df, fit.km$cluster, main='2D representation of the Cluster solution',
color=TRUE, shade=TRUE,
labels=2, lines=0)
df_rpart <- data.frame(k = fit.km$cluster, df)
rdf <- df_rpart[sample(1:nrow(df_rpart)), ]
train <- rdf[1:(as.integer(.8*nrow(rdf))-1), ]
test <- rdf[(as.integer(.8*nrow(rdf))):nrow(rdf), ]
fit <- rpart(k ~., data = train, method = "class")
pred <- predict(fit, test, type = "class")
(news_tbl <- table(pred, test$k))
##
## pred 1 2 3
## 1 11 1 0
## 2 0 12 1
## 3 0 1 11
(Accuracy <- (sum(diag(news_tbl))/sum(news_tbl)*100))
## [1] 91.89189
wbcd <- read.csv("C:/Users/charl/Downloads/ANLY 530--Lab 3--wisc_bc_data.csv", stringsAsFactors = FALSE)
str(wbcd)
## 'data.frame': 569 obs. of 32 variables:
## $ id : int 87139402 8910251 905520 868871 9012568 906539 925291 87880 862989 89827 ...
## $ diagnosis : chr "B" "B" "B" "B" ...
## $ radius_mean : num 12.3 10.6 11 11.3 15.2 ...
## $ texture_mean : num 12.4 18.9 16.8 13.4 13.2 ...
## $ perimeter_mean : num 78.8 69.3 70.9 73 97.7 ...
## $ area_mean : num 464 346 373 385 712 ...
## $ smoothness_mean : num 0.1028 0.0969 0.1077 0.1164 0.0796 ...
## $ compactness_mean : num 0.0698 0.1147 0.078 0.1136 0.0693 ...
## $ concavity_mean : num 0.0399 0.0639 0.0305 0.0464 0.0339 ...
## $ points_mean : num 0.037 0.0264 0.0248 0.048 0.0266 ...
## $ symmetry_mean : num 0.196 0.192 0.171 0.177 0.172 ...
## $ dimension_mean : num 0.0595 0.0649 0.0634 0.0607 0.0554 ...
## $ radius_se : num 0.236 0.451 0.197 0.338 0.178 ...
## $ texture_se : num 0.666 1.197 1.387 1.343 0.412 ...
## $ perimeter_se : num 1.67 3.43 1.34 1.85 1.34 ...
## $ area_se : num 17.4 27.1 13.5 26.3 17.7 ...
## $ smoothness_se : num 0.00805 0.00747 0.00516 0.01127 0.00501 ...
## $ compactness_se : num 0.0118 0.03581 0.00936 0.03498 0.01485 ...
## $ concavity_se : num 0.0168 0.0335 0.0106 0.0219 0.0155 ...
## $ points_se : num 0.01241 0.01365 0.00748 0.01965 0.00915 ...
## $ symmetry_se : num 0.0192 0.035 0.0172 0.0158 0.0165 ...
## $ dimension_se : num 0.00225 0.00332 0.0022 0.00344 0.00177 ...
## $ radius_worst : num 13.5 11.9 12.4 11.9 16.2 ...
## $ texture_worst : num 15.6 22.9 26.4 15.8 15.7 ...
## $ perimeter_worst : num 87 78.3 79.9 76.5 104.5 ...
## $ area_worst : num 549 425 471 434 819 ...
## $ smoothness_worst : num 0.139 0.121 0.137 0.137 0.113 ...
## $ compactness_worst: num 0.127 0.252 0.148 0.182 0.174 ...
## $ concavity_worst : num 0.1242 0.1916 0.1067 0.0867 0.1362 ...
## $ points_worst : num 0.0939 0.0793 0.0743 0.0861 0.0818 ...
## $ symmetry_worst : num 0.283 0.294 0.3 0.21 0.249 ...
## $ dimension_worst : num 0.0677 0.0759 0.0788 0.0678 0.0677 ...
wbcd <- wbcd[-1]
table(wbcd$diagnosis)
##
## B M
## 357 212
wbcd$diagnosis <- factor(wbcd$diagnosis, levels = c("B", "M"), labels = c("Benign", "Malignant"))
round(prop.table(table(wbcd$diagnosis)) * 100, digits = 1)
##
## Benign Malignant
## 62.7 37.3
normalize <- function(x) {
return ((x - min(x)) / (max(x) - min(x)))
}
wbcd_n <- as.data.frame(lapply(wbcd[2:31], normalize))
summary(wbcd_n)
## radius_mean texture_mean perimeter_mean area_mean
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.2233 1st Qu.:0.2185 1st Qu.:0.2168 1st Qu.:0.1174
## Median :0.3024 Median :0.3088 Median :0.2933 Median :0.1729
## Mean :0.3382 Mean :0.3240 Mean :0.3329 Mean :0.2169
## 3rd Qu.:0.4164 3rd Qu.:0.4089 3rd Qu.:0.4168 3rd Qu.:0.2711
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## smoothness_mean compactness_mean concavity_mean points_mean
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.3046 1st Qu.:0.1397 1st Qu.:0.06926 1st Qu.:0.1009
## Median :0.3904 Median :0.2247 Median :0.14419 Median :0.1665
## Mean :0.3948 Mean :0.2606 Mean :0.20806 Mean :0.2431
## 3rd Qu.:0.4755 3rd Qu.:0.3405 3rd Qu.:0.30623 3rd Qu.:0.3678
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.0000
## symmetry_mean dimension_mean radius_se texture_se
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.2823 1st Qu.:0.1630 1st Qu.:0.04378 1st Qu.:0.1047
## Median :0.3697 Median :0.2439 Median :0.07702 Median :0.1653
## Mean :0.3796 Mean :0.2704 Mean :0.10635 Mean :0.1893
## 3rd Qu.:0.4530 3rd Qu.:0.3404 3rd Qu.:0.13304 3rd Qu.:0.2462
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.0000
## perimeter_se area_se smoothness_se compactness_se
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.04000 1st Qu.:0.02064 1st Qu.:0.1175 1st Qu.:0.08132
## Median :0.07209 Median :0.03311 Median :0.1586 Median :0.13667
## Mean :0.09938 Mean :0.06264 Mean :0.1811 Mean :0.17444
## 3rd Qu.:0.12251 3rd Qu.:0.07170 3rd Qu.:0.2187 3rd Qu.:0.22680
## Max. :1.00000 Max. :1.00000 Max. :1.0000 Max. :1.00000
## concavity_se points_se symmetry_se dimension_se
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.03811 1st Qu.:0.1447 1st Qu.:0.1024 1st Qu.:0.04675
## Median :0.06538 Median :0.2070 Median :0.1526 Median :0.07919
## Mean :0.08054 Mean :0.2235 Mean :0.1781 Mean :0.10019
## 3rd Qu.:0.10619 3rd Qu.:0.2787 3rd Qu.:0.2195 3rd Qu.:0.12656
## Max. :1.00000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## radius_worst texture_worst perimeter_worst area_worst
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.1807 1st Qu.:0.2415 1st Qu.:0.1678 1st Qu.:0.08113
## Median :0.2504 Median :0.3569 Median :0.2353 Median :0.12321
## Mean :0.2967 Mean :0.3640 Mean :0.2831 Mean :0.17091
## 3rd Qu.:0.3863 3rd Qu.:0.4717 3rd Qu.:0.3735 3rd Qu.:0.22090
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## smoothness_worst compactness_worst concavity_worst points_worst
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.3000 1st Qu.:0.1163 1st Qu.:0.09145 1st Qu.:0.2231
## Median :0.3971 Median :0.1791 Median :0.18107 Median :0.3434
## Mean :0.4041 Mean :0.2202 Mean :0.21740 Mean :0.3938
## 3rd Qu.:0.4942 3rd Qu.:0.3025 3rd Qu.:0.30583 3rd Qu.:0.5546
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.0000
## symmetry_worst dimension_worst
## Min. :0.0000 Min. :0.0000
## 1st Qu.:0.1851 1st Qu.:0.1077
## Median :0.2478 Median :0.1640
## Mean :0.2633 Mean :0.1896
## 3rd Qu.:0.3182 3rd Qu.:0.2429
## Max. :1.0000 Max. :1.0000
wbcd_train <- wbcd_n[1:469, ]
wbcd_test <- wbcd_n[470:569, ]
wbcd_train_labels <- wbcd[1:469, 1]
wbcd_test_labels <- wbcd[470:569, 1]
wbcd_test_pred <- knn(train = wbcd_train, test = wbcd_test, cl=wbcd_train_labels, k = 21)
(wbcd_tbl <- table(wbcd_test_pred, wbcd_test_labels))
## wbcd_test_labels
## wbcd_test_pred Benign Malignant
## Benign 61 2
## Malignant 0 37
(Accuracy <- (wbcd_tbl[1]+wbcd_tbl[4])/sum(wbcd_tbl)*100)
## [1] 98
news <- read.csv("C:/Users/charl/Documents/ANLY 530--Lab 3OnlineNewsPopularity (3).csv")
newsShort <- data.frame(news$n_tokens_title, news$n_tokens_content, news$n_unique_tokens, news$n_non_stop_words, news$num_hrefs, news$num_imgs, news$num_videos, news$average_token_length, news$num_keywords, news$kw_max_max, news$global_sentiment_polarity, news$avg_positive_polarity, news$title_subjectivity, news$title_sentiment_polarity, news$abs_title_subjectivity, news$abs_title_sentiment_polarity, news$shares)
colnames(newsShort) <- c("n_tokens_title", "n_tokens_content", "n_unique_tokens", "n_non_stop_words", "num_hrefs", "num_imgs", "num_videos", "average_token_length", "num_keywords", "kw_max_max", "global_sentiment_polarity", "avg_positive_polarity", "title_subjectivity", "title_sentiment_polarity", "abs_title_subjectivity", "abs_title_sentiment_polarity", "shares")
newsShort$popular = rep('na', nrow(newsShort))
for(i in 1:39644) {
if(newsShort$shares[i] >= 1400) {
newsShort$popular[i] = "yes"}
else {newsShort$popular[i] = "no"}
}
newsShort$shares <- as.factor(newsShort$shares)
newsShort <- newsShort[-18]
news_n <- as.data.frame(lapply(newsShort[1:16], normalize))
news_rand <- news_n[order(runif(10000)), ]
set.seed(12345)
#Split the data into training and test datasets
news_train <- news_n[1:9000, ]
news_test <- news_n[9001:10000, ]
news_train_labels <- newsShort[1:9000, 17]
news_test_labels <- newsShort[9001:10000, 17]
news_test_pred <- knn(train = news_train, test = news_test, cl = news_train_labels, k= 5)
(news_tbl <- table(news_test_pred, news_test_labels))
## news_test_labels
## news_test_pred no yes
## no 225 174
## yes 322 279
(Accuracy <- (news_tbl[1] + news_tbl[4])/sum(news_tbl)*100)
## [1] 50.4
news_test_pred1 <- knn(train = news_train, test = news_test, cl = news_train_labels, k= 95)
(news_tbl_1 <- table(news_test_pred1, news_test_labels))
## news_test_labels
## news_test_pred1 no yes
## no 108 62
## yes 439 391
How about the accuracy
(Accuracy <- (news_tbl_1[1] + news_tbl_1[4])/sum(news_tbl_1)*100)
## [1] 49.9