This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.
getwd()
## [1] "C:/Users/prasnaya/Desktop/Personal/python/Data_Science/DS With R-Saharan/Projects"
data<-read.csv("iris.csv")
str(data)
## 'data.frame': 150 obs. of 6 variables:
## $ Id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ SepalLengthCm: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ SepalWidthCm : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ PetalLengthCm: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ PetalWidthCm : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "Iris-setosa",..: 1 1 1 1 1 1 1 1 1 1 ...
set.seed(1000)
rand_run<- runif(nrow(data))
data<-data[order(rand_run),]
head(data)
#install.packages("ggplot2")
library(ggplot2)
scatter <- ggplot(data=data, aes(x = SepalLengthCm, y = SepalWidthCm))
scatter + geom_point(aes(color=Species, shape=Species)) +
xlab("Sepal Length") + ylab("Sepal Width") +
ggtitle("Sepal Length-Width")
scatter <- ggplot(data=data, aes(x = PetalLengthCm, y = PetalWidthCm))
scatter + geom_point(aes(color=Species, shape=Species)) +
xlab("Petal Length") + ylab("Petal Width") +
ggtitle("Petal Length-Width")
library(reshape2)
data1 <- melt(data, id=c("Id","Species"))
data1
bar1 <- ggplot(data=data1, aes(x=Species, y=value, fill=variable))
bar1 + geom_bar(stat="identity", position="dodge") +
scale_fill_manual(values=c("orange", "blue", "darkgreen", "purple"),
name="Measurements",
breaks=c("SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm"),
labels=c("Sepal Length", "Sepal Width", "Petal Length", "Petal Width"))
normalize_fun <- function(x) {
return ((x - min(x)) / (max(x) - min(x)))
}
normalized_data<-normalize_fun(data[,2:5])
summary(normalized_data)
## SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm
## Min. :0.5385 Min. :0.2436 Min. :0.1154 Min. :0.00000
## 1st Qu.:0.6410 1st Qu.:0.3462 1st Qu.:0.1923 1st Qu.:0.02564
## Median :0.7308 Median :0.3718 Median :0.5449 Median :0.15385
## Mean :0.7363 Mean :0.3787 Mean :0.4691 Mean :0.14085
## 3rd Qu.:0.8077 3rd Qu.:0.4103 3rd Qu.:0.6410 3rd Qu.:0.21795
## Max. :1.0000 Max. :0.5513 Max. :0.8718 Max. :0.30769
normalized_data
library(class)
require(class)
data_training <- normalized_data[1:130,]
data_testing <- normalized_data[131:150,]
data_training_target <- data[1:130,6]
data_testing_target <- data[131:150,6]
data_training1 <- normalized_data[1:130,1:2]
data_testing1 <- normalized_data[131:150,1:2]
data_training_target <- data[1:130,6]
data_testing_target <- data[131:150,6]
sqrt(nrow(data))
## [1] 12.24745
m1_2_feature<-knn(train=data_training1,test=data_testing1,cl=data_training_target,k=13)
table(data_testing_target,m1_2_feature)
## m1_2_feature
## data_testing_target Iris-setosa Iris-versicolor Iris-virginica
## Iris-setosa 3 0 0
## Iris-versicolor 0 3 4
## Iris-virginica 0 2 8
m1_4_feature<-knn(train=data_training,test=data_testing,cl=data_training_target,k=13)
table(data_testing_target,m1_4_feature)
## m1_4_feature
## data_testing_target Iris-setosa Iris-versicolor Iris-virginica
## Iris-setosa 3 0 0
## Iris-versicolor 0 7 0
## Iris-virginica 0 1 9