This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
plot(cars)
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.
# Just copy the data in the file and paste it here. Then, run this code chunk to create the vectors
presid_name= c("Obama","Bush","Bush","Clinton","Clinton","Bush Father","Reagan","Reagan","Carter","Nixon","Nixon","Johnson","Kennedy","Eisenhower","Eisenhower","Truman")
winner = c(185, 182, 182, 188, 188, 188, 185, 185, 177, 182, 182, 193, 183, 179, 179, 175)
opponent = c(175, 193, 185, 187, 188, 173, 180, 177, 183, 185, 180, 180, 182, 178, 178, 173)
year= seq (from= 2008, to= 1948, by=-4)
#created another vector to store election year
isWinnerTaller= winner > opponent
isWinnerTaller
## [1] TRUE FALSE FALSE TRUE FALSE TRUE TRUE TRUE FALSE FALSE TRUE TRUE
## [13] TRUE TRUE TRUE TRUE
#created a vector called iswinnertaller.
sum(isWinnerTaller)
## [1] 11
#How many times the winner is taller than the opponent.
df_presidents= data.frame(presid_name, year, winner, opponent, isWinnerTaller)
df_presidents
#Created a data frame with all the data.
head(df_presidents)
#show the first six rows.
str (df_presidents)
## 'data.frame': 16 obs. of 5 variables:
## $ presid_name : chr "Obama" "Bush" "Bush" "Clinton" ...
## $ year : num 2008 2004 2000 1996 1992 ...
## $ winner : num 185 182 182 188 188 188 185 185 177 182 ...
## $ opponent : num 175 193 185 187 188 173 180 177 183 185 ...
## $ isWinnerTaller: logi TRUE FALSE FALSE TRUE FALSE TRUE ...
#Shows the five variables.
colnames(df_presidents)
## [1] "presid_name" "year" "winner" "opponent"
## [5] "isWinnerTaller"
#The name of the columns.
ncol(df_presidents)
## [1] 5
#The number of columns.
nrow(df_presidents)
## [1] 16
#The number of rows.
df_presidents$difference = winner - opponent
df_presidents
#Add a column named difference.
df_presidents [ , -6]
#Delete a column by using index.
df_presidents
#Making a table of the data frame.
df_presidents = df_presidents [, -6]
#Delete the column 6 from the data frame.
df_presidents
#The column was deleted from the data frame.
df_presidents[ , colnames(df_presidents) != 'difference' ]
#Delete the column by using it's name.
df_presidents= df_presidents[ , colnames(df_presidents) != 'difference' ]
df_presidents
#Deleted the column difference.
df_presidents[ , 2]
## [1] 2008 2004 2000 1996 1992 1988 1984 1980 1976 1972 1968 1964 1960 1956 1952
## [16] 1948
#The second column of the data frame.
df_presidents[ , ncol(df_presidents)]
## [1] TRUE FALSE FALSE TRUE FALSE TRUE TRUE TRUE FALSE FALSE TRUE TRUE
## [13] TRUE TRUE TRUE TRUE
#The data for the last column of data frame.
df_presidents [c(1,2,3), c(3,4)]
#The first three rows of winner's and opponent's heights.
subset (df_presidents, df_presidents$isWinnerTaller==TRUE)
#subset funstion where winner is > opponent.
subset (df_presidents$presid_name, df_presidents$isWinnerTaller==TRUE)
## [1] "Obama" "Clinton" "Bush Father" "Reagan" "Reagan"
## [6] "Nixon" "Johnson" "Kennedy" "Eisenhower" "Eisenhower"
## [11] "Truman"
#names only for cases where winner > opponent.
party= c("Dem", "Rep", "Rep", "Dem", "Dem", "Rep", "Rep", "Rep", "Dem","Rep","Rep","Dem","Dem","Rep","Rep","Dem")
df_presidents$presid_party= party
df_presidents
#add a column with the party of the winner.
tapply (df_presidents$winner, df_presidents$presid_party, mean)
## Dem Rep
## 184.1429 182.6667
#the mean height of the presidents.
tapply (df_presidents$winner, df_presidents$presid_party, max)
## Dem Rep
## 193 188
#the max height for the presidents.
apply (df_presidents [, c("winner", "opponent")] ,2, mean)
## winner opponent
## 183.3125 181.0625
#compute the mean height for presidents and opponents.
colMeans (df_presidents [, c("winner", "opponent")])
## winner opponent
## 183.3125 181.0625
#the means of the column.
apply (df_presidents [, c("winner", "opponent")] , 2, sd)
## winner opponent
## 4.629165 5.579352
#compute the sd of the height for winners and opponents.
sd(df_presidents [, "winner"])
## [1] 4.629165
#the sd of the height for winners.
sd(df_presidents [, "opponent"])
## [1] 5.579352
#the sd of the height for opponents.