df1<-read.csv("C:/Users/pradeep/OneDrive/dm,cns,cp and JKC/data mining/dm files 2020 passouts/lab/sheet1.csv")
head(df1)
## Sl.No Reg.No Name SSC.. Inter.. B.Tech.. Cap.Non.CAP
## 1 1 RAJ 16 90 88 75 TRUE
## 2 2 RAKHI 13 98 90 85 FALSE
## 3 3 RAM 23 80 80 65 FALSE
## 4 4 SAI 34 85 85 80 TRUE
df2<-read.csv("C:/Users/pradeep/OneDrive/dm,cns,cp and JKC/data mining/dm files 2020 passouts/lab/sheet2.csv")
head(df2)
## Sl.No Reg.No Name SSC.. Inter.. B.Tech.. Cap.Non.CAP
## 1 1 PREETHAM 5B4 93 91 75 TRUE
## 2 2 VARMA 5B5 98 97 81 TRUE
## 3 3 KARTHIK 5B6 98 98 85 TRUE
df<-merge(df1,df2,all=TRUE)
df
## Sl.No Reg.No Name SSC.. Inter.. B.Tech.. Cap.Non.CAP
## 1 1 RAJ 16 90 88 75 TRUE
## 2 1 PREETHAM 5B4 93 91 75 TRUE
## 3 2 RAKHI 13 98 90 85 FALSE
## 4 2 VARMA 5B5 98 97 81 TRUE
## 5 3 RAM 23 80 80 65 FALSE
## 6 3 KARTHIK 5B6 98 98 85 TRUE
## 7 4 SAI 34 85 85 80 TRUE
# here, in c(1,2) c stands for concatenate. And is used for combing more than 1 entity
df[c(1,2),]
## Sl.No Reg.No Name SSC.. Inter.. B.Tech.. Cap.Non.CAP
## 1 1 RAJ 16 90 88 75 TRUE
## 2 1 PREETHAM 5B4 93 91 75 TRUE
# You can also use head command: head(df,2)
write.csv(df,“df.csv”)
# To write the file use command: write.csv(df,"df.csv")
df3<-read.csv("C:/Users/pradeep/OneDrive/dm,cns,cp and JKC/data mining/dm files 2020 passouts/lab/sheet3.csv")
df3
## Sl.No Roll.No Name SSC inter B.Tech.perc CAP..NOT.CAP
## 1 1 11PA1A0501 AVINASH 87.3 65.3 40.00 CAP
## 2 2 11PA1A0503 KEERTHANA 89.0 NA 71.45 NOT_CAP
## 3 3 11PA1A0504 PRAVALIKA 67.0 68.0 45.26 NOT_CAP
## 4 4 11PA1A0505 SAI BABA NA 67.0 NA CAP
## 5 5 11PA1A0506 SRINIVAS 67.0 65.5 42.52 CAP
Now replace the missing values in Inter percentage
# Here, na.rm=TRUE because we want to calculate mean without considering NA. If this is not used, then we get an error.
df3$inter[is.na(df3$inter)]<-mean(df3$inter,na.rm=TRUE)
df3
## Sl.No Roll.No Name SSC inter B.Tech.perc CAP..NOT.CAP
## 1 1 11PA1A0501 AVINASH 87.3 65.30 40.00 CAP
## 2 2 11PA1A0503 KEERTHANA 89.0 66.45 71.45 NOT_CAP
## 3 3 11PA1A0504 PRAVALIKA 67.0 68.00 45.26 NOT_CAP
## 4 4 11PA1A0505 SAI BABA NA 67.00 NA CAP
## 5 5 11PA1A0506 SRINIVAS 67.0 65.50 42.52 CAP
# Use prefix minus ( - ) before the row number to remove that row
df3[-4,]
## Sl.No Roll.No Name SSC inter B.Tech.perc CAP..NOT.CAP
## 1 1 11PA1A0501 AVINASH 87.3 65.30 40.00 CAP
## 2 2 11PA1A0503 KEERTHANA 89.0 66.45 71.45 NOT_CAP
## 3 3 11PA1A0504 PRAVALIKA 67.0 68.00 45.26 NOT_CAP
## 5 5 11PA1A0506 SRINIVAS 67.0 65.50 42.52 CAP