library(latexpdf)
library(latexpdf)
## Warning: package 'latexpdf' was built under R version 3.4.2
library(readr)
library(readr)
custchurn <- read.csv(“Train.csv”) df.churn <- custchurn custdemo <- read.csv(“Train_Demographics.csv”) df.demo <- custdemo
custchurn <- read.csv("Train (3).csv")
df.churn <- custchurn
custdemo <- read.csv("Train_Demographics.csv")
df.demo <- custdemo
library(sqldf) #Manipulate R dataframes using SQL
library(sqldf) #Manipulate R dataframes using SQL
## Loading required package: gsubfn
## Warning: package 'gsubfn' was built under R version 3.4.4
## Loading required package: proto
## Warning in doTryCatch(return(expr), name, parentenv, handler): unable to load shared object '/Library/Frameworks/R.framework/Resources/modules//R_X11.so':
## dlopen(/Library/Frameworks/R.framework/Resources/modules//R_X11.so, 6): Library not loaded: /opt/X11/lib/libSM.6.dylib
## Referenced from: /Library/Frameworks/R.framework/Resources/modules//R_X11.so
## Reason: image not found
## Could not load tcltk. Will use slower R code instead.
## Loading required package: RSQLite
## Warning: package 'RSQLite' was built under R version 3.4.4
M1 <- merge.data.frame(df.churn,df.demo,by.x=“CustomerID”,by.y=“HouseholdID”)
M1 <- merge.data.frame(df.churn,df.demo,by.x="CustomerID",by.y="HouseholdID")
require(sqldf) M2 <- sqldf(“SELECT * FROM custchurn JOIN custdemo ON custchurn.CustomerID = custdemo.HouseholdID ORDER BY CustomerID”)
require(sqldf)
M2 <- sqldf("SELECT * FROM custchurn JOIN custdemo ON custchurn.CustomerID = custdemo.HouseholdID ORDER BY CustomerID")
M2 <- M2[,-3]
M2 <- M2[,-3]
identical(M1,M2) #boolean value of TRUE
identical(M1,M2) #boolean value of TRUE
## [1] TRUE
str(M2) colnames(M2) head(M2)
str(M2)
## 'data.frame': 5298 obs. of 9 variables:
## $ CustomerID : Factor w/ 5298 levels "Cust1201","Cust1202",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Churn : Factor w/ 2 levels "No","Yes": 1 1 1 1 2 1 1 1 1 1 ...
## $ Country : Factor w/ 2 levels "?","India": 2 2 2 2 2 2 2 2 2 2 ...
## $ State : Factor w/ 2 levels "?","Maharashtra": 2 2 2 2 2 2 2 2 2 2 ...
## $ Retired : int 0 0 0 0 0 0 0 0 0 0 ...
## $ HasPartner : int 2 2 2 2 2 2 2 2 2 1 ...
## $ HasDependents: int 2 2 2 1 2 2 2 2 2 1 ...
## $ Education : Factor w/ 6 levels "","Graduation",..: 4 6 6 6 6 3 2 6 3 6 ...
## $ Gender : Factor w/ 3 levels "","Female","Male": 3 3 3 3 2 3 3 2 2 2 ...
colnames(M2)
## [1] "CustomerID" "Churn" "Country" "State"
## [5] "Retired" "HasPartner" "HasDependents" "Education"
## [9] "Gender"
head(M2)
## CustomerID Churn Country State Retired HasPartner HasDependents
## 1 Cust1201 No India Maharashtra 0 2 2
## 2 Cust1202 No India Maharashtra 0 2 2
## 3 Cust1203 No India Maharashtra 0 2 2
## 4 Cust1204 No India Maharashtra 0 2 1
## 5 Cust1205 Yes India Maharashtra 0 2 2
## 6 Cust1206 No India Maharashtra 0 2 2
## Education Gender
## 1 Masters Male
## 2 ProfessionalQalification Male
## 3 ProfessionalQalification Male
## 4 ProfessionalQalification Male
## 5 ProfessionalQalification Female
## 6 Highschool or below Male