This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
library(readr)
wisc_bc_data <- read_csv("D:/MS Sem 2/Data Minning/Class Work/wisc_bc_data.csv")
## Rows: 569 Columns: 32
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): diagnosis
## dbl (31): id, radius_mean, texture_mean, perimeter_mean, area_mean, smoothne...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(wisc_bc_data)
wd<-wisc_bc_data str(wd)
wd1<-wd[-1]
View(wd1)
wd1$diagnosis
wd1\(diagnosis<-factor(wd1\)diagnosis,level=c(“B”,“M”), labels=c(“Benign”,“Malignant”)) str(wd1$diagnosis)
table(wd1$diagnosis)
prop.table(table(wd1$diagnosis))
summary(wd1[c(“radius_mean”,“area_mean”,“smoothness_mean”)]) #Creating Normalization Function normalize<-function (x) { return ((x-min(x))/(max(x)-min(x))) } normalize(c(1,2,3,4,5))
normalize(c(10,20,30,40,50))
#apply function normalize to features 2 to 31 from the dataset wd1 and store in wd_n using function “lapply” wd_n<-as.data.frame(lapply(wd1[2:31],normalize)) View(wd_n)
summary(wd_n[c(“radius_mean”,“area_mean”,“smoothness_mean”)]) #recode Diagnosis as a factor
wd_train <-wd_n[1:469, ] wd_test <-wd_n[470:569, ]
View(wd_train)
wd_train_labels <-wd1[1:469,1] wd_test_labels <-wd1[470:569,1]
head(wd_test_labels) head(wd_train_labels) #View(wd_train_labels) # Now dataset is ready for pre-processing, the only thing we need to specify is the number of neighbors to include in the vote. # As our training dataset has 669 instances, we might try k=21, an odd number roughly equals to the square root of 469 #using an odd number reduces the chance of having a tie #k <- 21 # Choose the value of K (number of neighbors) #knn_model <- knn(train = X_train, test = X_test, cl = y_train, k = k)
sqrt(469)
library(class) wd_test_pred<-knn(train=wd_train, test=wd_test, cl=wd_train_labels, k=21) wd_test_pred library(gmodels) CrossTable(x=wd_test_labels, y=wd_test_pred,prop.chisq=FALSE) ```