| presid_name | year | winner | opponent | isWinnerTaller | difference | presid_party |
|---|---|---|---|---|---|---|
| Obama | 2008 | 185 | 175 | TRUE | 10 | Dem |
| … | … | … | … | … | … | … |
presid_name= c("Obama","Bush","Bush","Clinton","Clinton","Bush Father","Reagan","Reagan","Carter","Nixon","Nixon","Johnson","Kennedy","Eisenhower","Eisenhower","Truman")
winner = c(185, 182, 182, 188, 188, 188, 185, 185, 177, 182, 182, 193, 183, 179, 179, 175)
opponent = c(175, 193, 185, 187, 188, 173, 180, 177, 183, 185, 180, 180, 182, 178, 178, 173)
Steps of 4 years backwards starting from 2008; ending 1948
year= seq (from= 2008, to= 1948, by=-4)
isWinnerTaller= winner > opponent
isWinnerTaller
## [1] TRUE FALSE FALSE TRUE FALSE TRUE TRUE TRUE FALSE FALSE TRUE TRUE
## [13] TRUE TRUE TRUE TRUE
Summing the number of times the winner is actually taller
sum(isWinnerTaller)
## [1] 11
df_presidents= data.frame(presid_name, year, winner, opponent, isWinnerTaller)
df_presidents
## presid_name year winner opponent isWinnerTaller
## 1 Obama 2008 185 175 TRUE
## 2 Bush 2004 182 193 FALSE
## 3 Bush 2000 182 185 FALSE
## 4 Clinton 1996 188 187 TRUE
## 5 Clinton 1992 188 188 FALSE
## 6 Bush Father 1988 188 173 TRUE
## 7 Reagan 1984 185 180 TRUE
## 8 Reagan 1980 185 177 TRUE
## 9 Carter 1976 177 183 FALSE
## 10 Nixon 1972 182 185 FALSE
## 11 Nixon 1968 182 180 TRUE
## 12 Johnson 1964 193 180 TRUE
## 13 Kennedy 1960 183 182 TRUE
## 14 Eisenhower 1956 179 178 TRUE
## 15 Eisenhower 1952 179 178 TRUE
## 16 Truman 1948 175 173 TRUE
head(df_presidents)
## presid_name year winner opponent isWinnerTaller
## 1 Obama 2008 185 175 TRUE
## 2 Bush 2004 182 193 FALSE
## 3 Bush 2000 182 185 FALSE
## 4 Clinton 1996 188 187 TRUE
## 5 Clinton 1992 188 188 FALSE
## 6 Bush Father 1988 188 173 TRUE
str (df_presidents)
## 'data.frame': 16 obs. of 5 variables:
## $ presid_name : chr "Obama" "Bush" "Bush" "Clinton" ...
## $ year : num 2008 2004 2000 1996 1992 ...
## $ winner : num 185 182 182 188 188 188 185 185 177 182 ...
## $ opponent : num 175 193 185 187 188 173 180 177 183 185 ...
## $ isWinnerTaller: logi TRUE FALSE FALSE TRUE FALSE TRUE ...
colnames(df_presidents)
## [1] "presid_name" "year" "winner" "opponent"
## [5] "isWinnerTaller"
ncol(df_presidents)
## [1] 5
nrow(df_presidents)
## [1] 16
df_presidents$difference = winner - opponent
df_presidents
## presid_name year winner opponent isWinnerTaller difference
## 1 Obama 2008 185 175 TRUE 10
## 2 Bush 2004 182 193 FALSE -11
## 3 Bush 2000 182 185 FALSE -3
## 4 Clinton 1996 188 187 TRUE 1
## 5 Clinton 1992 188 188 FALSE 0
## 6 Bush Father 1988 188 173 TRUE 15
## 7 Reagan 1984 185 180 TRUE 5
## 8 Reagan 1980 185 177 TRUE 8
## 9 Carter 1976 177 183 FALSE -6
## 10 Nixon 1972 182 185 FALSE -3
## 11 Nixon 1968 182 180 TRUE 2
## 12 Johnson 1964 193 180 TRUE 13
## 13 Kennedy 1960 183 182 TRUE 1
## 14 Eisenhower 1956 179 178 TRUE 1
## 15 Eisenhower 1952 179 178 TRUE 1
## 16 Truman 1948 175 173 TRUE 2
Confirming that deletion was not save
df_presidents [ , -6]
## presid_name year winner opponent isWinnerTaller
## 1 Obama 2008 185 175 TRUE
## 2 Bush 2004 182 193 FALSE
## 3 Bush 2000 182 185 FALSE
## 4 Clinton 1996 188 187 TRUE
## 5 Clinton 1992 188 188 FALSE
## 6 Bush Father 1988 188 173 TRUE
## 7 Reagan 1984 185 180 TRUE
## 8 Reagan 1980 185 177 TRUE
## 9 Carter 1976 177 183 FALSE
## 10 Nixon 1972 182 185 FALSE
## 11 Nixon 1968 182 180 TRUE
## 12 Johnson 1964 193 180 TRUE
## 13 Kennedy 1960 183 182 TRUE
## 14 Eisenhower 1956 179 178 TRUE
## 15 Eisenhower 1952 179 178 TRUE
## 16 Truman 1948 175 173 TRUE
df_presidents
## presid_name year winner opponent isWinnerTaller difference
## 1 Obama 2008 185 175 TRUE 10
## 2 Bush 2004 182 193 FALSE -11
## 3 Bush 2000 182 185 FALSE -3
## 4 Clinton 1996 188 187 TRUE 1
## 5 Clinton 1992 188 188 FALSE 0
## 6 Bush Father 1988 188 173 TRUE 15
## 7 Reagan 1984 185 180 TRUE 5
## 8 Reagan 1980 185 177 TRUE 8
## 9 Carter 1976 177 183 FALSE -6
## 10 Nixon 1972 182 185 FALSE -3
## 11 Nixon 1968 182 180 TRUE 2
## 12 Johnson 1964 193 180 TRUE 13
## 13 Kennedy 1960 183 182 TRUE 1
## 14 Eisenhower 1956 179 178 TRUE 1
## 15 Eisenhower 1952 179 178 TRUE 1
## 16 Truman 1948 175 173 TRUE 2
Confirming that deletion was saved
df_presidents = df_presidents [, -6]
df_presidents
## presid_name year winner opponent isWinnerTaller
## 1 Obama 2008 185 175 TRUE
## 2 Bush 2004 182 193 FALSE
## 3 Bush 2000 182 185 FALSE
## 4 Clinton 1996 188 187 TRUE
## 5 Clinton 1992 188 188 FALSE
## 6 Bush Father 1988 188 173 TRUE
## 7 Reagan 1984 185 180 TRUE
## 8 Reagan 1980 185 177 TRUE
## 9 Carter 1976 177 183 FALSE
## 10 Nixon 1972 182 185 FALSE
## 11 Nixon 1968 182 180 TRUE
## 12 Johnson 1964 193 180 TRUE
## 13 Kennedy 1960 183 182 TRUE
## 14 Eisenhower 1956 179 178 TRUE
## 15 Eisenhower 1952 179 178 TRUE
## 16 Truman 1948 175 173 TRUE
Adding back difference column
df_presidents$difference = winner - opponent
df_presidents
## presid_name year winner opponent isWinnerTaller difference
## 1 Obama 2008 185 175 TRUE 10
## 2 Bush 2004 182 193 FALSE -11
## 3 Bush 2000 182 185 FALSE -3
## 4 Clinton 1996 188 187 TRUE 1
## 5 Clinton 1992 188 188 FALSE 0
## 6 Bush Father 1988 188 173 TRUE 15
## 7 Reagan 1984 185 180 TRUE 5
## 8 Reagan 1980 185 177 TRUE 8
## 9 Carter 1976 177 183 FALSE -6
## 10 Nixon 1972 182 185 FALSE -3
## 11 Nixon 1968 182 180 TRUE 2
## 12 Johnson 1964 193 180 TRUE 13
## 13 Kennedy 1960 183 182 TRUE 1
## 14 Eisenhower 1956 179 178 TRUE 1
## 15 Eisenhower 1952 179 178 TRUE 1
## 16 Truman 1948 175 173 TRUE 2
Deleting last column with different method
df_presidents[ , colnames(df_presidents) != 'difference' ]
## presid_name year winner opponent isWinnerTaller
## 1 Obama 2008 185 175 TRUE
## 2 Bush 2004 182 193 FALSE
## 3 Bush 2000 182 185 FALSE
## 4 Clinton 1996 188 187 TRUE
## 5 Clinton 1992 188 188 FALSE
## 6 Bush Father 1988 188 173 TRUE
## 7 Reagan 1984 185 180 TRUE
## 8 Reagan 1980 185 177 TRUE
## 9 Carter 1976 177 183 FALSE
## 10 Nixon 1972 182 185 FALSE
## 11 Nixon 1968 182 180 TRUE
## 12 Johnson 1964 193 180 TRUE
## 13 Kennedy 1960 183 182 TRUE
## 14 Eisenhower 1956 179 178 TRUE
## 15 Eisenhower 1952 179 178 TRUE
## 16 Truman 1948 175 173 TRUE
Storing deletion of last column with different method
df_presidents= df_presidents[ , colnames(df_presidents) != 'difference' ]
df_presidents
## presid_name year winner opponent isWinnerTaller
## 1 Obama 2008 185 175 TRUE
## 2 Bush 2004 182 193 FALSE
## 3 Bush 2000 182 185 FALSE
## 4 Clinton 1996 188 187 TRUE
## 5 Clinton 1992 188 188 FALSE
## 6 Bush Father 1988 188 173 TRUE
## 7 Reagan 1984 185 180 TRUE
## 8 Reagan 1980 185 177 TRUE
## 9 Carter 1976 177 183 FALSE
## 10 Nixon 1972 182 185 FALSE
## 11 Nixon 1968 182 180 TRUE
## 12 Johnson 1964 193 180 TRUE
## 13 Kennedy 1960 183 182 TRUE
## 14 Eisenhower 1956 179 178 TRUE
## 15 Eisenhower 1952 179 178 TRUE
## 16 Truman 1948 175 173 TRUE
df_presidents[ , 2]
## [1] 2008 2004 2000 1996 1992 1988 1984 1980 1976 1972 1968 1964 1960 1956 1952
## [16] 1948
df_presidents[ , ncol(df_presidents)]
## [1] TRUE FALSE FALSE TRUE FALSE TRUE TRUE TRUE FALSE FALSE TRUE TRUE
## [13] TRUE TRUE TRUE TRUE
df_presidents [c(1,2,3), c(3,4)]
## winner opponent
## 1 185 175
## 2 182 193
## 3 182 185
subset (df_presidents, df_presidents$isWinnerTaller==TRUE)
## presid_name year winner opponent isWinnerTaller
## 1 Obama 2008 185 175 TRUE
## 4 Clinton 1996 188 187 TRUE
## 6 Bush Father 1988 188 173 TRUE
## 7 Reagan 1984 185 180 TRUE
## 8 Reagan 1980 185 177 TRUE
## 11 Nixon 1968 182 180 TRUE
## 12 Johnson 1964 193 180 TRUE
## 13 Kennedy 1960 183 182 TRUE
## 14 Eisenhower 1956 179 178 TRUE
## 15 Eisenhower 1952 179 178 TRUE
## 16 Truman 1948 175 173 TRUE
subset (df_presidents$presid_name, df_presidents$isWinnerTaller==TRUE)
## [1] "Obama" "Clinton" "Bush Father" "Reagan" "Reagan"
## [6] "Nixon" "Johnson" "Kennedy" "Eisenhower" "Eisenhower"
## [11] "Truman"
assigning vector to new column
party= c("Dem", "Rep", "Rep", "Dem", "Dem", "Rep", "Rep", "Rep", "Dem","Rep","Rep","Dem","Dem","Rep","Rep","Dem")
df_presidents$presid_party= party
df_presidents
## presid_name year winner opponent isWinnerTaller presid_party
## 1 Obama 2008 185 175 TRUE Dem
## 2 Bush 2004 182 193 FALSE Rep
## 3 Bush 2000 182 185 FALSE Rep
## 4 Clinton 1996 188 187 TRUE Dem
## 5 Clinton 1992 188 188 FALSE Dem
## 6 Bush Father 1988 188 173 TRUE Rep
## 7 Reagan 1984 185 180 TRUE Rep
## 8 Reagan 1980 185 177 TRUE Rep
## 9 Carter 1976 177 183 FALSE Dem
## 10 Nixon 1972 182 185 FALSE Rep
## 11 Nixon 1968 182 180 TRUE Rep
## 12 Johnson 1964 193 180 TRUE Dem
## 13 Kennedy 1960 183 182 TRUE Dem
## 14 Eisenhower 1956 179 178 TRUE Rep
## 15 Eisenhower 1952 179 178 TRUE Rep
## 16 Truman 1948 175 173 TRUE Dem
mean of heights per party
tapply (df_presidents$winner, df_presidents$presid_party, mean)
## Dem Rep
## 184.1429 182.6667
max heights per party
tapply (df_presidents$winner, df_presidents$presid_party, max)
## Dem Rep
## 193 188
mean of heights by winner and opponent
apply (df_presidents [, c("winner", "opponent")] ,2, mean)
## winner opponent
## 183.3125 181.0625
means of heights by winner and opponent
colMeans (df_presidents [, c("winner", "opponent")])
## winner opponent
## 183.3125 181.0625
SD of winners and opponents
apply (df_presidents [, c("winner", "opponent")] , 2, sd)
## winner opponent
## 4.629165 5.579352
SD of winners
sd(df_presidents [, "winner"])
## [1] 4.629165
SD of opponents
sd(df_presidents [, "opponent"])
## [1] 5.579352