df<-read.csv("https://raw.githubusercontent.com/tuyenhavan/Statistics/Dataset/Training_Data_Landsat.csv")
head(df)
## CLASS_NAME Band1 Band2 Band3 Band4
## 1 Agriculture 0.06729216 0.08459798 0.12928760 0.2104856
## 2 Agriculture 0.04200000 0.05210000 0.04279999 0.2297000
## 3 Agriculture 0.03580000 0.04220000 0.03300000 0.2272000
## 4 Agriculture 0.04699649 0.06019877 0.05739571 0.2275789
## 5 Agriculture 0.03629445 0.04429223 0.03219017 0.2519825
## 6 Agriculture 0.04999718 0.08369680 0.07459542 0.3266895
Select 200 observations for each variable
Agriculture<-df[sample(which(df$CLASS_NAME=="Agriculture"),200),]
Forest<-df[sample(which(df$CLASS_NAME=="Forest"),200),]
Mining<-df[sample(which(df$CLASS_NAME=="Mining"),200),]
Urban<-df[sample(which(df$CLASS_NAME=="Urban"),200),]
Water<-df[sample(which(df$CLASS_NAME=="Water"),200),]
df1<-rbind(Agriculture, Forest,Mining,Urban,Water)
df1<-data.frame(df1)
row.names(df1)<-1:nrow(df1)
names(df1)[1]<-"Land_cover"
head(df1)
## Land_cover Band1 Band2 Band3 Band4
## 1 Agriculture 0.05519275 0.07209589 0.07729418 0.2300969
## 2 Agriculture 0.06419898 0.07549375 0.08009326 0.1748873
## 3 Agriculture 0.07209630 0.08759890 0.11598737 0.2121774
## 4 Agriculture 0.05869728 0.07159732 0.09879190 0.1979835
## 5 Agriculture 0.05789157 0.07389165 0.09859204 0.1877957
## 6 Agriculture 0.05229886 0.06319486 0.07048754 0.1560777