Presidents

presid_name year winner opponent isWinnerTaller difference presid_party
Obama 2008 185 175 TRUE 10 Dem

Creating basis for Data Frame

Defining Vectors: Presidents’ names, winners’ heights, opponents’ heights

presid_name= c("Obama","Bush","Bush","Clinton","Clinton","Bush Father","Reagan","Reagan","Carter","Nixon","Nixon","Johnson","Kennedy","Eisenhower","Eisenhower","Truman")

winner = c(185, 182, 182, 188, 188, 188, 185, 185, 177, 182, 182, 193, 183, 179, 179, 175)

opponent = c(175, 193, 185, 187, 188, 173, 180, 177, 183, 185, 180, 180, 182, 178, 178, 173)

Defining election years

Steps of 4 years backwards starting from 2008; ending 1948

year= seq (from= 2008, to= 1948, by=-4)

Is Winner Taller: Boolean

isWinnerTaller= winner > opponent

isWinnerTaller
##  [1]  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE
## [13]  TRUE  TRUE  TRUE  TRUE

Summing the number of times the winner is actually taller

sum(isWinnerTaller)
## [1] 11

Creating Data Frame

Data Frame creation with vector to column assignment

df_presidents= data.frame(presid_name, year, winner, opponent, isWinnerTaller)
df_presidents
##    presid_name year winner opponent isWinnerTaller
## 1        Obama 2008    185      175           TRUE
## 2         Bush 2004    182      193          FALSE
## 3         Bush 2000    182      185          FALSE
## 4      Clinton 1996    188      187           TRUE
## 5      Clinton 1992    188      188          FALSE
## 6  Bush Father 1988    188      173           TRUE
## 7       Reagan 1984    185      180           TRUE
## 8       Reagan 1980    185      177           TRUE
## 9       Carter 1976    177      183          FALSE
## 10       Nixon 1972    182      185          FALSE
## 11       Nixon 1968    182      180           TRUE
## 12     Johnson 1964    193      180           TRUE
## 13     Kennedy 1960    183      182           TRUE
## 14  Eisenhower 1956    179      178           TRUE
## 15  Eisenhower 1952    179      178           TRUE
## 16      Truman 1948    175      173           TRUE

Data Frame Insights

First five rows

head(df_presidents)
##   presid_name year winner opponent isWinnerTaller
## 1       Obama 2008    185      175           TRUE
## 2        Bush 2004    182      193          FALSE
## 3        Bush 2000    182      185          FALSE
## 4     Clinton 1996    188      187           TRUE
## 5     Clinton 1992    188      188          FALSE
## 6 Bush Father 1988    188      173           TRUE

Data Frame Structure

str (df_presidents)
## 'data.frame':    16 obs. of  5 variables:
##  $ presid_name   : chr  "Obama" "Bush" "Bush" "Clinton" ...
##  $ year          : num  2008 2004 2000 1996 1992 ...
##  $ winner        : num  185 182 182 188 188 188 185 185 177 182 ...
##  $ opponent      : num  175 193 185 187 188 173 180 177 183 185 ...
##  $ isWinnerTaller: logi  TRUE FALSE FALSE TRUE FALSE TRUE ...

Column Names

colnames(df_presidents)
## [1] "presid_name"    "year"           "winner"         "opponent"      
## [5] "isWinnerTaller"

Number of Columns

ncol(df_presidents)
## [1] 5

Number of Rows

nrow(df_presidents)
## [1] 16

Adding calculated column

Adding the height difference to DataFrame

df_presidents$difference = winner - opponent
df_presidents
##    presid_name year winner opponent isWinnerTaller difference
## 1        Obama 2008    185      175           TRUE         10
## 2         Bush 2004    182      193          FALSE        -11
## 3         Bush 2000    182      185          FALSE         -3
## 4      Clinton 1996    188      187           TRUE          1
## 5      Clinton 1992    188      188          FALSE          0
## 6  Bush Father 1988    188      173           TRUE         15
## 7       Reagan 1984    185      180           TRUE          5
## 8       Reagan 1980    185      177           TRUE          8
## 9       Carter 1976    177      183          FALSE         -6
## 10       Nixon 1972    182      185          FALSE         -3
## 11       Nixon 1968    182      180           TRUE          2
## 12     Johnson 1964    193      180           TRUE         13
## 13     Kennedy 1960    183      182           TRUE          1
## 14  Eisenhower 1956    179      178           TRUE          1
## 15  Eisenhower 1952    179      178           TRUE          1
## 16      Truman 1948    175      173           TRUE          2

Column Deletion

deleting last column without storing result

Confirming that deletion was not save

df_presidents [ , -6]
##    presid_name year winner opponent isWinnerTaller
## 1        Obama 2008    185      175           TRUE
## 2         Bush 2004    182      193          FALSE
## 3         Bush 2000    182      185          FALSE
## 4      Clinton 1996    188      187           TRUE
## 5      Clinton 1992    188      188          FALSE
## 6  Bush Father 1988    188      173           TRUE
## 7       Reagan 1984    185      180           TRUE
## 8       Reagan 1980    185      177           TRUE
## 9       Carter 1976    177      183          FALSE
## 10       Nixon 1972    182      185          FALSE
## 11       Nixon 1968    182      180           TRUE
## 12     Johnson 1964    193      180           TRUE
## 13     Kennedy 1960    183      182           TRUE
## 14  Eisenhower 1956    179      178           TRUE
## 15  Eisenhower 1952    179      178           TRUE
## 16      Truman 1948    175      173           TRUE
df_presidents
##    presid_name year winner opponent isWinnerTaller difference
## 1        Obama 2008    185      175           TRUE         10
## 2         Bush 2004    182      193          FALSE        -11
## 3         Bush 2000    182      185          FALSE         -3
## 4      Clinton 1996    188      187           TRUE          1
## 5      Clinton 1992    188      188          FALSE          0
## 6  Bush Father 1988    188      173           TRUE         15
## 7       Reagan 1984    185      180           TRUE          5
## 8       Reagan 1980    185      177           TRUE          8
## 9       Carter 1976    177      183          FALSE         -6
## 10       Nixon 1972    182      185          FALSE         -3
## 11       Nixon 1968    182      180           TRUE          2
## 12     Johnson 1964    193      180           TRUE         13
## 13     Kennedy 1960    183      182           TRUE          1
## 14  Eisenhower 1956    179      178           TRUE          1
## 15  Eisenhower 1952    179      178           TRUE          1
## 16      Truman 1948    175      173           TRUE          2

Deleting last column with assigment

Confirming that deletion was saved

df_presidents = df_presidents [, -6]
df_presidents
##    presid_name year winner opponent isWinnerTaller
## 1        Obama 2008    185      175           TRUE
## 2         Bush 2004    182      193          FALSE
## 3         Bush 2000    182      185          FALSE
## 4      Clinton 1996    188      187           TRUE
## 5      Clinton 1992    188      188          FALSE
## 6  Bush Father 1988    188      173           TRUE
## 7       Reagan 1984    185      180           TRUE
## 8       Reagan 1980    185      177           TRUE
## 9       Carter 1976    177      183          FALSE
## 10       Nixon 1972    182      185          FALSE
## 11       Nixon 1968    182      180           TRUE
## 12     Johnson 1964    193      180           TRUE
## 13     Kennedy 1960    183      182           TRUE
## 14  Eisenhower 1956    179      178           TRUE
## 15  Eisenhower 1952    179      178           TRUE
## 16      Truman 1948    175      173           TRUE

Adding back difference column

df_presidents$difference = winner - opponent
df_presidents
##    presid_name year winner opponent isWinnerTaller difference
## 1        Obama 2008    185      175           TRUE         10
## 2         Bush 2004    182      193          FALSE        -11
## 3         Bush 2000    182      185          FALSE         -3
## 4      Clinton 1996    188      187           TRUE          1
## 5      Clinton 1992    188      188          FALSE          0
## 6  Bush Father 1988    188      173           TRUE         15
## 7       Reagan 1984    185      180           TRUE          5
## 8       Reagan 1980    185      177           TRUE          8
## 9       Carter 1976    177      183          FALSE         -6
## 10       Nixon 1972    182      185          FALSE         -3
## 11       Nixon 1968    182      180           TRUE          2
## 12     Johnson 1964    193      180           TRUE         13
## 13     Kennedy 1960    183      182           TRUE          1
## 14  Eisenhower 1956    179      178           TRUE          1
## 15  Eisenhower 1952    179      178           TRUE          1
## 16      Truman 1948    175      173           TRUE          2

Deleting last column with different method

df_presidents[ , colnames(df_presidents) != 'difference' ]
##    presid_name year winner opponent isWinnerTaller
## 1        Obama 2008    185      175           TRUE
## 2         Bush 2004    182      193          FALSE
## 3         Bush 2000    182      185          FALSE
## 4      Clinton 1996    188      187           TRUE
## 5      Clinton 1992    188      188          FALSE
## 6  Bush Father 1988    188      173           TRUE
## 7       Reagan 1984    185      180           TRUE
## 8       Reagan 1980    185      177           TRUE
## 9       Carter 1976    177      183          FALSE
## 10       Nixon 1972    182      185          FALSE
## 11       Nixon 1968    182      180           TRUE
## 12     Johnson 1964    193      180           TRUE
## 13     Kennedy 1960    183      182           TRUE
## 14  Eisenhower 1956    179      178           TRUE
## 15  Eisenhower 1952    179      178           TRUE
## 16      Truman 1948    175      173           TRUE

Storing deletion of last column with different method

df_presidents= df_presidents[ , colnames(df_presidents) != 'difference' ]
df_presidents
##    presid_name year winner opponent isWinnerTaller
## 1        Obama 2008    185      175           TRUE
## 2         Bush 2004    182      193          FALSE
## 3         Bush 2000    182      185          FALSE
## 4      Clinton 1996    188      187           TRUE
## 5      Clinton 1992    188      188          FALSE
## 6  Bush Father 1988    188      173           TRUE
## 7       Reagan 1984    185      180           TRUE
## 8       Reagan 1980    185      177           TRUE
## 9       Carter 1976    177      183          FALSE
## 10       Nixon 1972    182      185          FALSE
## 11       Nixon 1968    182      180           TRUE
## 12     Johnson 1964    193      180           TRUE
## 13     Kennedy 1960    183      182           TRUE
## 14  Eisenhower 1956    179      178           TRUE
## 15  Eisenhower 1952    179      178           TRUE
## 16      Truman 1948    175      173           TRUE

Subsetting Data

Selecting second column

df_presidents[ , 2]
##  [1] 2008 2004 2000 1996 1992 1988 1984 1980 1976 1972 1968 1964 1960 1956 1952
## [16] 1948

Selecting last column

df_presidents[ , ncol(df_presidents)]
##  [1]  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE
## [13]  TRUE  TRUE  TRUE  TRUE

Selecting first three rows of thrid and fourth columns

df_presidents [c(1,2,3), c(3,4)]
##   winner opponent
## 1    185      175
## 2    182      193
## 3    182      185

Subsetting with subset()

subset (df_presidents, df_presidents$isWinnerTaller==TRUE)
##    presid_name year winner opponent isWinnerTaller
## 1        Obama 2008    185      175           TRUE
## 4      Clinton 1996    188      187           TRUE
## 6  Bush Father 1988    188      173           TRUE
## 7       Reagan 1984    185      180           TRUE
## 8       Reagan 1980    185      177           TRUE
## 11       Nixon 1968    182      180           TRUE
## 12     Johnson 1964    193      180           TRUE
## 13     Kennedy 1960    183      182           TRUE
## 14  Eisenhower 1956    179      178           TRUE
## 15  Eisenhower 1952    179      178           TRUE
## 16      Truman 1948    175      173           TRUE

Selecting presid_name column where winner is taller with subset()

subset (df_presidents$presid_name, df_presidents$isWinnerTaller==TRUE)
##  [1] "Obama"       "Clinton"     "Bush Father" "Reagan"      "Reagan"     
##  [6] "Nixon"       "Johnson"     "Kennedy"     "Eisenhower"  "Eisenhower" 
## [11] "Truman"

Adding Party to Data Frame

Creating vector with political parties

assigning vector to new column

party= c("Dem", "Rep", "Rep", "Dem", "Dem", "Rep", "Rep", "Rep", "Dem","Rep","Rep","Dem","Dem","Rep","Rep","Dem")
df_presidents$presid_party= party
df_presidents
##    presid_name year winner opponent isWinnerTaller presid_party
## 1        Obama 2008    185      175           TRUE          Dem
## 2         Bush 2004    182      193          FALSE          Rep
## 3         Bush 2000    182      185          FALSE          Rep
## 4      Clinton 1996    188      187           TRUE          Dem
## 5      Clinton 1992    188      188          FALSE          Dem
## 6  Bush Father 1988    188      173           TRUE          Rep
## 7       Reagan 1984    185      180           TRUE          Rep
## 8       Reagan 1980    185      177           TRUE          Rep
## 9       Carter 1976    177      183          FALSE          Dem
## 10       Nixon 1972    182      185          FALSE          Rep
## 11       Nixon 1968    182      180           TRUE          Rep
## 12     Johnson 1964    193      180           TRUE          Dem
## 13     Kennedy 1960    183      182           TRUE          Dem
## 14  Eisenhower 1956    179      178           TRUE          Rep
## 15  Eisenhower 1952    179      178           TRUE          Rep
## 16      Truman 1948    175      173           TRUE          Dem

Simple Calculations

TApply

mean of heights per party

tapply (df_presidents$winner, df_presidents$presid_party, mean)
##      Dem      Rep 
## 184.1429 182.6667

max heights per party

tapply (df_presidents$winner, df_presidents$presid_party, max)
## Dem Rep 
## 193 188

Apply

mean of heights by winner and opponent

apply (df_presidents [, c("winner", "opponent")] ,2, mean)
##   winner opponent 
## 183.3125 181.0625

ColMeans

means of heights by winner and opponent

colMeans (df_presidents [, c("winner", "opponent")])
##   winner opponent 
## 183.3125 181.0625

Standard Deviation

SD of winners and opponents

apply (df_presidents [, c("winner", "opponent")] , 2, sd)
##   winner opponent 
## 4.629165 5.579352

SD of winners

sd(df_presidents [, "winner"])
## [1] 4.629165

SD of opponents

sd(df_presidents [, "opponent"])
## [1] 5.579352