city<-data.frame(
city=I(c("Chicago","Seattle","New York","San Francisco", "Los Angeles")),
state=I(c("IL","WA","NY","CA","CA")),
population=c(2715000,634535,8337000,825863,3858000),
coastal=c(F,F,T,T,T))
weather<-data.frame(
city=I(c("Chicago","Washington","Seattle","New York","Kansas City","Atlanta","San Francisco", "Los Angeles")),
average_high=c(57,62,58,57,59,63,64,72),
average_low=c(50,55,53,49,50,59,57,68),
inch_snow=c(T,T,F,T,T,F,F,F)
)
Check Data
city
weather
“I” function will stop R from turning string to factors (ex. city names). Similar to stringsAsFactors=F
MATCH
“match” function to match same name
city$city
[1] "Chicago" "Seattle" "New York" "San Francisco" "Los Angeles"
weather$city
[1] "Chicago" "Washington" "Seattle" "New York" "Kansas City"
[6] "Atlanta" "San Francisco" "Los Angeles"
Match tables
match(city$city, weather$city)
[1] 1 3 4 7 8
Subset
create subset to extract values
weather$city[match(city$city, weather$city)]
[1] "Chicago" "Seattle" "New York" "San Francisco" "Los Angeles"
“in” can be used too
city$city %in% weather$city
[1] TRUE TRUE TRUE TRUE TRUE
use “merge” and by.x and by.y to specify columns in X table and Y table to merge. This is useful if spellings are different (ex. one variable uses capital “C” while other does not)
merge(city, weather, by.x="city", by.y="city")
LS0tDQp0aXRsZTogIk1hdGNoaW5nIGFuZCBNZXJnaW5nIg0Kb3V0cHV0Og0KICBodG1sX25vdGVib29rOg0KICAgIHRoZW1lOiBzcGFjZWxhYg0KICBodG1sX2RvY3VtZW50Og0KICAgIGRmX3ByaW50OiBwYWdlZA0KLS0tDQpgYGB7cn0NCmNpdHk8LWRhdGEuZnJhbWUoDQogIGNpdHk9SShjKCJDaGljYWdvIiwiU2VhdHRsZSIsIk5ldyBZb3JrIiwiU2FuIEZyYW5jaXNjbyIsICJMb3MgQW5nZWxlcyIpKSwNCiAgc3RhdGU9SShjKCJJTCIsIldBIiwiTlkiLCJDQSIsIkNBIikpLA0KICBwb3B1bGF0aW9uPWMoMjcxNTAwMCw2MzQ1MzUsODMzNzAwMCw4MjU4NjMsMzg1ODAwMCksDQogIGNvYXN0YWw9YyhGLEYsVCxULFQpKQ0Kd2VhdGhlcjwtZGF0YS5mcmFtZSgNCiAgY2l0eT1JKGMoIkNoaWNhZ28iLCJXYXNoaW5ndG9uIiwiU2VhdHRsZSIsIk5ldyBZb3JrIiwiS2Fuc2FzIENpdHkiLCJBdGxhbnRhIiwiU2FuIEZyYW5jaXNjbyIsICJMb3MgQW5nZWxlcyIpKSwNCiAgYXZlcmFnZV9oaWdoPWMoNTcsNjIsNTgsNTcsNTksNjMsNjQsNzIpLA0KICBhdmVyYWdlX2xvdz1jKDUwLDU1LDUzLDQ5LDUwLDU5LDU3LDY4KSwNCiAgaW5jaF9zbm93PWMoVCxULEYsVCxULEYsRixGKQ0KICApDQpgYGANCkNoZWNrIERhdGENCmBgYHtyfQ0KY2l0eQ0KYGBgDQpgYGB7cn0NCndlYXRoZXINCmBgYA0KIkkiIGZ1bmN0aW9uIHdpbGwgc3RvcCBSIGZyb20gdHVybmluZyBzdHJpbmcgdG8gZmFjdG9ycyAoZXguIGNpdHkgbmFtZXMpLiBTaW1pbGFyIHRvIHN0cmluZ3NBc0ZhY3RvcnM9Rg0KDQojTUFUQ0gNCiJtYXRjaCIgZnVuY3Rpb24gdG8gbWF0Y2ggc2FtZSBuYW1lDQpgYGB7cn0NCmNpdHkkY2l0eQ0KYGBgDQpgYGB7cn0NCndlYXRoZXIkY2l0eQ0KYGBgDQpNYXRjaCB0YWJsZXMNCmBgYHtyfQ0KbWF0Y2goY2l0eSRjaXR5LCB3ZWF0aGVyJGNpdHkpDQpgYGANCiNTdWJzZXQNCmNyZWF0ZSBzdWJzZXQgdG8gZXh0cmFjdCB2YWx1ZXMNCmBgYHtyfQ0Kd2VhdGhlciRjaXR5W21hdGNoKGNpdHkkY2l0eSwgd2VhdGhlciRjaXR5KV0NCmBgYA0KImluIiBjYW4gYmUgdXNlZCB0b28NCmBgYHtyfQ0KY2l0eSRjaXR5ICVpbiUgd2VhdGhlciRjaXR5DQpgYGANCnVzZSAibWVyZ2UiIGFuZCBieS54IGFuZCBieS55IHRvIHNwZWNpZnkgY29sdW1ucyBpbiBYIHRhYmxlIGFuZCBZIHRhYmxlIHRvIG1lcmdlLiBUaGlzIGlzIHVzZWZ1bCBpZiBzcGVsbGluZ3MgYXJlIGRpZmZlcmVudCAoZXguIG9uZSB2YXJpYWJsZSB1c2VzIGNhcGl0YWwgIkMiIHdoaWxlIG90aGVyIGRvZXMgbm90KQ0KYGBge3J9DQptZXJnZShjaXR5LCB3ZWF0aGVyLCBieS54PSJjaXR5IiwgYnkueT0iY2l0eSIpDQpgYGANCg0K