What is Data Merging?
data_source1 = data.frame(StudentID = c(1007:1016), Game = sample(c('FootBall','IceHockey', 'Basketball'), 10, replace = TRUE))
data_source2 = data.frame(Minor_Injuries = sample(c('Yes', 'No'), 10, replace = TRUE))
data_source3 = data.frame(StudentID = c(3018:3022), Game = sample(c('Boxing','FieldHockey', 'WaterPool'), 5, replace = TRUE))
data_source4 = data.frame(StudentID = c(1011:1014), Major = sample(c('Computer Science', 'Computer Engineering', 'BioMedical'), 4, replace = TRUE))
-Checking Column names Each dataset
[1] "StudentID" "Game"
[1] "Minor_Injuries"
[1] "StudentID" "Game"
[1] "StudentID" "Major"
-Dimensions of each dataset
[1] 10 2
[1] 10 1
[1] 5 2
[1] 4 2
-str of each dataset
'data.frame': 10 obs. of 2 variables:
$ StudentID: int 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016
$ Game : Factor w/ 3 levels "Basketball","FootBall",..: 1 2 2 3 1 2 2 2 1 1
'data.frame': 10 obs. of 1 variable:
$ Minor_Injuries: Factor w/ 2 levels "No","Yes": 1 2 2 1 1 1 2 2 2 2
'data.frame': 5 obs. of 2 variables:
$ StudentID: int 3018 3019 3020 3021 3022
$ Game : Factor w/ 3 levels "Boxing","FieldHockey",..: 2 2 3 3 1
'data.frame': 5 obs. of 2 variables:
$ StudentID: int 3018 3019 3020 3021 3022
$ Game : Factor w/ 3 levels "Boxing","FieldHockey",..: 2 2 3 3 1
-What is cbind Merging?
cbind(data_source1, data_source2)
StudentID Game Minor_Injuries
1 1007 Basketball No
2 1008 FootBall Yes
3 1009 FootBall Yes
4 1010 IceHockey No
5 1011 Basketball No
6 1012 FootBall No
7 1013 FootBall Yes
8 1014 FootBall Yes
9 1015 Basketball Yes
10 1016 Basketball Yes
-What is Rbind Merging?
rbind(data_source1, data_source3)
StudentID Game
1 1007 Basketball
2 1008 FootBall
3 1009 FootBall
4 1010 IceHockey
5 1011 Basketball
6 1012 FootBall
7 1013 FootBall
8 1014 FootBall
9 1015 Basketball
10 1016 Basketball
11 3018 FieldHockey
12 3019 FieldHockey
13 3020 WaterPool
14 3021 WaterPool
15 3022 Boxing
-What is Inner Join?
merge(x = data_source1, y = data_source4, by = "StudentID", all = TRUE)
StudentID Game Major
1 1007 Basketball <NA>
2 1008 FootBall <NA>
3 1009 FootBall <NA>
4 1010 IceHockey <NA>
5 1011 Basketball Computer Science
6 1012 FootBall Computer Engineering
7 1013 FootBall Computer Engineering
8 1014 FootBall Computer Science
9 1015 Basketball <NA>
10 1016 Basketball <NA>
-What is Left outer?
merge(x = data_source1, y = data_source4, by = "StudentID", all.x = TRUE)
StudentID Game Major
1 1007 Basketball <NA>
2 1008 FootBall <NA>
3 1009 FootBall <NA>
4 1010 IceHockey <NA>
5 1011 Basketball Computer Science
6 1012 FootBall Computer Engineering
7 1013 FootBall Computer Engineering
8 1014 FootBall Computer Science
9 1015 Basketball <NA>
10 1016 Basketball <NA>
-What is Right Join?
merge(x = data_source1, y = data_source4, by = "StudentID", all.y = TRUE)
StudentID Game Major
1 1011 Basketball Computer Science
2 1012 FootBall Computer Engineering
3 1013 FootBall Computer Engineering
4 1014 FootBall Computer Science
-What is Outer Join?
merge(x = data_source1, y = data_source4, by = NULL)
StudentID.x Game StudentID.y Major
1 1007 Basketball 1011 Computer Science
2 1008 FootBall 1011 Computer Science
3 1009 FootBall 1011 Computer Science
4 1010 IceHockey 1011 Computer Science
5 1011 Basketball 1011 Computer Science
6 1012 FootBall 1011 Computer Science
7 1013 FootBall 1011 Computer Science
8 1014 FootBall 1011 Computer Science
9 1015 Basketball 1011 Computer Science
10 1016 Basketball 1011 Computer Science
11 1007 Basketball 1012 Computer Engineering
12 1008 FootBall 1012 Computer Engineering
13 1009 FootBall 1012 Computer Engineering
14 1010 IceHockey 1012 Computer Engineering
15 1011 Basketball 1012 Computer Engineering
16 1012 FootBall 1012 Computer Engineering
17 1013 FootBall 1012 Computer Engineering
18 1014 FootBall 1012 Computer Engineering
19 1015 Basketball 1012 Computer Engineering
20 1016 Basketball 1012 Computer Engineering
21 1007 Basketball 1013 Computer Engineering
22 1008 FootBall 1013 Computer Engineering
23 1009 FootBall 1013 Computer Engineering
24 1010 IceHockey 1013 Computer Engineering
25 1011 Basketball 1013 Computer Engineering
26 1012 FootBall 1013 Computer Engineering
27 1013 FootBall 1013 Computer Engineering
28 1014 FootBall 1013 Computer Engineering
29 1015 Basketball 1013 Computer Engineering
30 1016 Basketball 1013 Computer Engineering
31 1007 Basketball 1014 Computer Science
32 1008 FootBall 1014 Computer Science
33 1009 FootBall 1014 Computer Science
34 1010 IceHockey 1014 Computer Science
35 1011 Basketball 1014 Computer Science
36 1012 FootBall 1014 Computer Science
37 1013 FootBall 1014 Computer Science
38 1014 FootBall 1014 Computer Science
39 1015 Basketball 1014 Computer Science
40 1016 Basketball 1014 Computer Science
install.packages("dplyr",repos = "http://cran.us.r-project.org")
The downloaded binary packages are in
/var/folders/4z/9kr7k7jd1q7gc792xjkrwf040000gn/T//RtmpHyokKA/downloaded_packages
library(ggplot2)
tail(InsectSprays)
count spray
67 13 F
68 10 F
69 26 F
70 26 F
71 24 F
72 13 F
Stents are devices put inside blood vessels that assist in patient recovery after cardiac and reduce the risk of an additional heart attack or death.
getwd()
[1] "/Users/janakiramsundaraneedi/Desktop/dataVis"
setwd("/Users/janakiramsundaraneedi/Desktop/dataVis")
stent30 = read.table("stent30.txt", sep="\t", fill=FALSE, strip.white=TRUE)
stent30=stent30[-1,]
colnames(stent30)<-c("Group1","days30")
stent365 = read.table("stent365.txt", sep="\t", fill=FALSE, strip.white=TRUE)
stent365=stent365[-1,]
colnames(stent365)<-c("Group1","days365")
dim(stent30)
[1] 451 2
head(stent30)
Group1 days30
2 treatment stroke
3 treatment stroke
4 treatment stroke
5 treatment stroke
6 treatment stroke
7 treatment stroke
dim(stent365)
[1] 451 2
tail(stent365)
Group1 days365
447 control no event
448 control no event
449 control no event
450 control no event
451 control no event
452 control no event
dim(stent365)
[1] 451 2
tail(stent365)
Group1 days365
447 control no event
448 control no event
449 control no event
450 control no event
451 control no event
452 control no event
install.packages("sqldf", repos = "http://cran.us.r-project.org")
The downloaded binary packages are in
/var/folders/4z/9kr7k7jd1q7gc792xjkrwf040000gn/T//RtmpHyokKA/downloaded_packages
library(sqldf)
ts_30<-sqldf("select count() as treatment_stroke_30 from stent30 where days30='stroke' and Group1='treatment'")
print(ts_30)
treatment_stroke_30
1 33
cs_30<-sqldf("select count() as control_stroke_30 from stent30 where days30='stroke' and Group1='control'")
print(cs_30)
control_stroke_30
1 13
tne_30<-sqldf("select count() as treatment_stroke_no from stent30 where days30='no event' and Group1='treatment'")
print(tne_30)
treatment_stroke_no
1 191
cne_30<-sqldf("select count() as control_stroke_no from stent30 where days30='no event' and Group1='control'")
print(cne_30)
control_stroke_no
1 214
ts_365<-sqldf("select count() as treatment_stroke_365 from stent365 where days365='stroke' and Group1='treatment'")
print(ts_365)
treatment_stroke_365
1 45
cs_365<-sqldf("select count() as control_stroke_365 from stent365 where days365='stroke' and Group1='control'")
print(cs_365)
control_stroke_365
1 28
tne_no_365<-sqldf("select count() as treatment_stroke_no from stent365 where days365='no event' and Group1='treatment'")
print(tne_no_365)
treatment_stroke_no
1 179
cne_no_365<-sqldf("select count() as control_stroke_no from stent365 where days365='no event' and Group1='control'")
print(cne_no_365)
control_stroke_no
1 199
total_num_treat<-sqldf("select count() as total_Num_treat from stent365 where Group1='treatment'")
print(total_num_treat)
total_Num_treat
1 224
total_num_control<-sqldf("select count() as total_Num_control from stent365 where Group1='control'")
print(total_num_control)
total_Num_control
1 227
print(floor(((ts_365)/total_num_treat)*100))
treatment_stroke_365
1 20
print(floor(((cs_365)/total_num_control)*100))
control_stroke_365
1 12