Sys.time()
## [1] "2023-06-25 01:06:30 +01"
# Question 1
# creating five vectors of integers
a <- c(1,2,3,4,5,6)
b <- c(2,4,6,8,10,12)
c <- seq(10,60,10)
d <- seq(1,12,2)
e <- 10:15
# combining the vectors to form a matrix
A <- cbind(a,b,c,d,e)
rownames(A) <- letters[21:26];A
## a b c d e
## u 1 2 10 1 10
## v 2 4 20 3 11
## w 3 6 30 5 12
## x 4 8 40 7 13
## y 5 10 50 9 14
## z 6 12 60 11 15
# modifying the code to make each vector a row
# we can transpose the matrix, A
t(A)
## u v w x y z
## a 1 2 3 4 5 6
## b 2 4 6 8 10 12
## c 10 20 30 40 50 60
## d 1 3 5 7 9 11
## e 10 11 12 13 14 15
# Question 2
is.matrix(A)
## [1] TRUE
# A is a matrix since the return is TRUE
# Question 3
# creating a vector with 20 integers
scores <- sample(0:100,size = 20,replace = F);scores
## [1] 84 19 1 6 40 83 88 46 73 37 20 95 78 31 30 56 96 51 75 54
B <- matrix(scores,nrow=5,ncol=4)
X <- matrix(scores,nrow=5,ncol=4,byrow=T,
dimnames=list(c("Zeric","Ranita","Angela", "Moses" , "Joseph"),
c("STAT222", "STAT226", "STAT224","GERSHON")));X
## STAT222 STAT226 STAT224 GERSHON
## Zeric 84 19 1 6
## Ranita 40 83 88 46
## Angela 73 37 20 95
## Moses 78 31 30 56
## Joseph 96 51 75 54
B # B is filled column wise
## [,1] [,2] [,3] [,4]
## [1,] 84 83 20 56
## [2,] 19 88 95 96
## [3,] 1 46 78 51
## [4,] 6 73 31 75
## [5,] 40 37 30 54
# Transpose of X
t(X)
## Zeric Ranita Angela Moses Joseph
## STAT222 84 40 73 78 96
## STAT226 19 83 37 31 51
## STAT224 1 88 20 30 75
## GERSHON 6 46 95 56 54
# Trace
sum(diag(X))
## [1] 243
# a 3x3 sub matrix
subX <- X[c(1:3),-4];subX
## STAT222 STAT226 STAT224
## Zeric 84 19 1
## Ranita 40 83 88
## Angela 73 37 20
# Question 4
# (a) creating a data frame
ID <- factor(1021:1030)
Age <- c(19,22,24,30,19,23,28,21,20,35)
Gender <- factor(c("Male","Male","Female",
"Male","Female","Female",
"Female","Male","Female","Male"))
GP <- c(2.35,1.75,3.13,3.01,3.73,2.53,2.89,3.89,2.83,2.45)
Residential_Status <- c("Resident","Resident",
"Non-resident","Non-resident",
"Resident","Non-resident","Non-resident",
"Resident","Non-resident","Non-resident")
Data <- data.frame(ID,Age,Gender,GP,Residential_Status,
row.names=c("Eric","Yaw","Sally",
"Ben","Ranita",
"Portia","Roberta",
"Mensah","Queen",
"Fred"));head(Data)
## ID Age Gender GP Residential_Status
## Eric 1021 19 Male 2.35 Resident
## Yaw 1022 22 Male 1.75 Resident
## Sally 1023 24 Female 3.13 Non-resident
## Ben 1024 30 Male 3.01 Non-resident
## Ranita 1025 19 Female 3.73 Resident
## Portia 1026 23 Female 2.53 Non-resident
# 4 (b)
COVID_Test <- factor(c("Yes","Yes","No","No","Yes",
"No","No","Yes","No","Yes"))
DATA <- data.frame(Data,COVID_Test);head(DATA)
## ID Age Gender GP Residential_Status COVID_Test
## Eric 1021 19 Male 2.35 Resident Yes
## Yaw 1022 22 Male 1.75 Resident Yes
## Sally 1023 24 Female 3.13 Non-resident No
## Ben 1024 30 Male 3.01 Non-resident No
## Ranita 1025 19 Female 3.73 Resident Yes
## Portia 1026 23 Female 2.53 Non-resident No
# (4.b.i)
# code to check number of rows and column
dim(DATA)
## [1] 10 6
# it has 10 rows and 6 columns
# (4.b.ii)
# Code to check the class of each data type
str(DATA)
## 'data.frame': 10 obs. of 6 variables:
## $ ID : Factor w/ 10 levels "1021","1022",..: 1 2 3 4 5 6 7 8 9 10
## $ Age : num 19 22 24 30 19 23 28 21 20 35
## $ Gender : Factor w/ 2 levels "Female","Male": 2 2 1 2 1 1 1 2 1 2
## $ GP : num 2.35 1.75 3.13 3.01 3.73 2.53 2.89 3.89 2.83 2.45
## $ Residential_Status: chr "Resident" "Resident" "Non-resident" "Non-resident" ...
## $ COVID_Test : Factor w/ 2 levels "No","Yes": 2 2 1 1 2 1 1 2 1 2
# (4.b.iii)
paste("It has a sample size of ", dim(DATA)[1])
## [1] "It has a sample size of 10"
# Question 5
str(state.center)
## List of 2
## $ x: num [1:50] -86.8 -127.2 -111.6 -92.3 -119.8 ...
## $ y: num [1:50] 32.6 49.2 34.2 34.7 36.5 ...
# A list with two vectors, X and Y
head(as.data.frame(state.center)) # converting to data frame
## x y
## 1 -86.7509 32.5901
## 2 -127.2500 49.2500
## 3 -111.6250 34.2192
## 4 -92.2992 34.7336
## 5 -119.7730 36.5341
## 6 -105.5130 38.6777
is.data.frame(as.data.frame(state.center))
## [1] TRUE
# Question 6
a <- c(12,32,43,9,7,5);a
## [1] 12 32 43 9 7 5
b <- sample(1:100,size=6);b
## [1] 17 24 40 36 48 74
c <- round(runif(6,0,50),2);c
## [1] 38.56 29.92 14.69 0.55 48.40 46.08
my.data <- data.frame(a,b,c,
row.names=LETTERS[1:6])
a.order <- sort(a,decreasing = F)
# Using the first column to order the entire data frame
my.data.ordered <- rbind.data.frame(
my.data[my.data$a==a.order[1],],
my.data[my.data$a==a.order[2],],
my.data[my.data$a==a.order[3],],
my.data[my.data$a==a.order[4],],
my.data[my.data$a==a.order[5],],
my.data[my.data$a==a.order[6],]
);my.data.ordered
## a b c
## F 5 74 46.08
## E 7 48 48.40
## D 9 36 0.55
## A 12 17 38.56
## B 32 24 29.92
## C 43 40 14.69
# Question 7
# (a) Checking if the VADeaths death data is a data frame
is.data.frame(VADeaths)
## [1] FALSE
# Converting the VADeaths data to a data frame
Vad <- as.data.frame(VADeaths);Vad
## Rural Male Rural Female Urban Male Urban Female
## 50-54 11.7 8.7 15.4 8.4
## 55-59 18.1 11.7 24.3 13.6
## 60-64 26.9 20.3 37.0 19.3
## 65-69 41.0 30.9 54.6 35.1
## 70-74 66.0 54.3 71.1 50.0
# (b) Creating a variable called Total to sum the rows
Total <- rowSums(Vad);Total
## 50-54 55-59 60-64 65-69 70-74
## 44.2 67.7 103.5 161.6 241.4
# (c) Adding the Total to the data frame
VAD <- data.frame(Total,Vad);VAD
## Total Rural.Male Rural.Female Urban.Male Urban.Female
## 50-54 44.2 11.7 8.7 15.4 8.4
## 55-59 67.7 18.1 11.7 24.3 13.6
## 60-64 103.5 26.9 20.3 37.0 19.3
## 65-69 161.6 41.0 30.9 54.6 35.1
## 70-74 241.4 66.0 54.3 71.1 50.0
# Question 8
# (a) Checking if the state.x77 data is a data frame
is.data.frame(state.x77)
## [1] FALSE
# Converting the state.x77 data set to a dataframe
dat <- as.data.frame(state.x77);head(dat)
## Population Income Illiteracy Life Exp Murder HS Grad Frost Area
## Alabama 3615 3624 2.1 69.05 15.1 41.3 20 50708
## Alaska 365 6315 1.5 69.31 11.3 66.7 152 566432
## Arizona 2212 4530 1.8 70.55 7.8 58.1 15 113417
## Arkansas 2110 3378 1.9 70.66 10.1 39.9 65 51945
## California 21198 5114 1.1 71.71 10.3 62.6 20 156361
## Colorado 2541 4884 0.7 72.06 6.8 63.9 166 103766
# (b) states with income greater than 5000
sum(dat$Income>5000)
## [1] 8
# 8 states have income greater than 5000
# (c) the states with the highest and the lowest life expectancy in years.
dat[dat$`Life Exp`==max(dat$`Life Exp`),]
## Population Income Illiteracy Life Exp Murder HS Grad Frost Area
## Hawaii 868 4963 1.9 73.6 6.2 61.9 0 6425
dat[dat$`Life Exp`==min(dat$`Life Exp`),]
## Population Income Illiteracy Life Exp Murder HS Grad Frost Area
## South Carolina 2816 3635 2.3 67.96 11.6 37.8 65 30225
# (d) information for states with land area in squares miles greater over
# 55000 and Illiteracy rate less than 1.0.
head(dat[(dat$Area>55000)&dat$Illiteracy<1.0,])
## Population Income Illiteracy Life Exp Murder HS Grad Frost Area
## Colorado 2541 4884 0.7 72.06 6.8 63.9 166 103766
## Idaho 813 4119 0.6 71.87 5.3 59.5 126 82677
## Illinois 11197 5107 0.9 70.14 10.3 52.6 127 55748
## Iowa 2861 4628 0.5 72.56 2.3 59.0 140 55941
## Kansas 2280 4669 0.6 72.58 4.5 59.9 114 81787
## Michigan 9111 4751 0.9 70.63 11.1 52.8 125 56817
# Question 9
# Checkoing if the swiss data set is a data frame
is.data.frame(swiss)
## [1] TRUE
# Creating a data frame with some selected rows and columns
SWISS <- swiss[c(1,2,3,10,11,12,13),
c("Examination", "Education", "Infant.Mortality")];SWISS
## Examination Education Infant.Mortality
## Courtelary 15 12 22.2
## Delemont 6 9 22.2
## Franches-Mnt 5 5 20.2
## Sarine 16 13 24.4
## Veveyse 14 6 24.5
## Aigle 21 12 16.5
## Aubonne 14 7 19.1
# (a) correcting a value
(SWISS["Sarine","Infant.Mortality"]=NA);SWISS
## [1] NA
## Examination Education Infant.Mortality
## Courtelary 15 12 22.2
## Delemont 6 9 22.2
## Franches-Mnt 5 5 20.2
## Sarine 16 13 NA
## Veveyse 14 6 24.5
## Aigle 21 12 16.5
## Aubonne 14 7 19.1
# (b) a row that will be the total sum of the column,
Total <- colSums(SWISS);Total
## Examination Education Infant.Mortality
## 91 64 NA
SWISS[8,] <- Total
rownames(SWISS)<- c(rownames(SWISS[-8,]),"Total"); SWISS
## Examination Education Infant.Mortality
## Courtelary 15 12 22.2
## Delemont 6 9 22.2
## Franches-Mnt 5 5 20.2
## Sarine 16 13 NA
## Veveyse 14 6 24.5
## Aigle 21 12 16.5
## Aubonne 14 7 19.1
## Total 91 64 NA
# (c) a new variable that will be the proportion of Examination
Proportion.of.Examination <- (
SWISS[,
"Examination"])/SWISS["Total",
"Examination"
];Proportion.of.Examination
## [1] 0.16483516 0.06593407 0.05494505 0.17582418 0.15384615 0.23076923 0.15384615
## [8] 1.00000000
SWISS[,4] <- Proportion.of.Examination
colnames(SWISS) <- c(colnames(SWISS[-4]),"Proportion.of.Examination");SWISS
## Examination Education Infant.Mortality Proportion.of.Examination
## Courtelary 15 12 22.2 0.16483516
## Delemont 6 9 22.2 0.06593407
## Franches-Mnt 5 5 20.2 0.05494505
## Sarine 16 13 NA 0.17582418
## Veveyse 14 6 24.5 0.15384615
## Aigle 21 12 16.5 0.23076923
## Aubonne 14 7 19.1 0.15384615
## Total 91 64 NA 1.00000000
# Question 10
States <- data.frame("abb"=state.abb,"are"=state.area,
"div"=state.division,"nam"=state.name,
"reg"=state.region,
row.names=state.name);head(States)
## abb are div nam reg
## Alabama AL 51609 East South Central Alabama South
## Alaska AK 589757 Pacific Alaska West
## Arizona AZ 113909 Mountain Arizona West
## Arkansas AR 53104 West South Central Arkansas South
## California CA 158693 Pacific California West
## Colorado CO 104247 Mountain Colorado West
# Question 11
State.and.x77 <- data.frame(state.x77,States);head(State.and.x77)
## Population Income Illiteracy Life.Exp Murder HS.Grad Frost Area
## Alabama 3615 3624 2.1 69.05 15.1 41.3 20 50708
## Alaska 365 6315 1.5 69.31 11.3 66.7 152 566432
## Arizona 2212 4530 1.8 70.55 7.8 58.1 15 113417
## Arkansas 2110 3378 1.9 70.66 10.1 39.9 65 51945
## California 21198 5114 1.1 71.71 10.3 62.6 20 156361
## Colorado 2541 4884 0.7 72.06 6.8 63.9 166 103766
## abb are div nam reg
## Alabama AL 51609 East South Central Alabama South
## Alaska AK 589757 Pacific Alaska West
## Arizona AZ 113909 Mountain Arizona West
## Arkansas AR 53104 West South Central Arkansas South
## California CA 158693 Pacific California West
## Colorado CO 104247 Mountain Colorado West
# (a) Remove the variable div
State.and.x77.simplified <- State.and.x77[,-11];head(State.and.x77.simplified)
## Population Income Illiteracy Life.Exp Murder HS.Grad Frost Area
## Alabama 3615 3624 2.1 69.05 15.1 41.3 20 50708
## Alaska 365 6315 1.5 69.31 11.3 66.7 152 566432
## Arizona 2212 4530 1.8 70.55 7.8 58.1 15 113417
## Arkansas 2110 3378 1.9 70.66 10.1 39.9 65 51945
## California 21198 5114 1.1 71.71 10.3 62.6 20 156361
## Colorado 2541 4884 0.7 72.06 6.8 63.9 166 103766
## abb are nam reg
## Alabama AL 51609 Alabama South
## Alaska AK 589757 Alaska West
## Arizona AZ 113909 Arizona West
## Arkansas AR 53104 Arkansas South
## California CA 158693 California West
## Colorado CO 104247 Colorado West
# (b) remove the variables Life Exp, HS Grad, Frost, abb, and are.
State.and.x77.simplified1 <- State.and.x77.simplified[ ,
-c(4,6,7,9,10)
];head(State.and.x77.simplified1)
## Population Income Illiteracy Murder Area nam reg
## Alabama 3615 3624 2.1 15.1 50708 Alabama South
## Alaska 365 6315 1.5 11.3 566432 Alaska West
## Arizona 2212 4530 1.8 7.8 113417 Arizona West
## Arkansas 2110 3378 1.9 10.1 51945 Arkansas South
## California 21198 5114 1.1 10.3 156361 California West
## Colorado 2541 4884 0.7 6.8 103766 Colorado West
# (c)
# we write function that categorizes the level of illiteracy
category <- function(x)
{
if ((x>=0)&(x<1))
{
cat <- "low"
}else
if ((x>=1)&(x<2))
{
cat <- "some"
}else
if(x>=2)
{
cat <- "high"
}else
{
cat <- "Error in input, Level of illiteracy has to be a positive value"
}
return(cat)
}
# Applying the function to the Illiteracy
level.of.illiteracy <- sapply(
State.and.x77.simplified1[,
"Illiteracy"
],category);head(level.of.illiteracy)
## [1] "high" "some" "some" "some" "some" "low"
# Adding the categories to the data frame
State.and.x77.simplified1[,8] <- level.of.illiteracy
colnames(State.and.x77.simplified1) <- c(
colnames(State.and.x77.simplified1[,-8]),
"level.of.illiteracy");head(State.and.x77.simplified1)
## Population Income Illiteracy Murder Area nam reg
## Alabama 3615 3624 2.1 15.1 50708 Alabama South
## Alaska 365 6315 1.5 11.3 566432 Alaska West
## Arizona 2212 4530 1.8 7.8 113417 Arizona West
## Arkansas 2110 3378 1.9 10.1 51945 Arkansas South
## California 21198 5114 1.1 10.3 156361 California West
## Colorado 2541 4884 0.7 6.8 103766 Colorado West
## level.of.illiteracy
## Alabama high
## Alaska some
## Arizona some
## Arkansas some
## California some
## Colorado low
# (d)
# West states
west.states <- State.and.x77.simplified1[
(State.and.x77.simplified1$reg=="West"),
];head(west.states)
## Population Income Illiteracy Murder Area nam reg
## Alaska 365 6315 1.5 11.3 566432 Alaska West
## Arizona 2212 4530 1.8 7.8 113417 Arizona West
## California 21198 5114 1.1 10.3 156361 California West
## Colorado 2541 4884 0.7 6.8 103766 Colorado West
## Hawaii 868 4963 1.9 6.2 6425 Hawaii West
## Idaho 813 4119 0.6 5.3 82677 Idaho West
## level.of.illiteracy
## Alaska some
## Arizona some
## California some
## Colorado low
## Hawaii some
## Idaho low
# West state with low illiteracy,
west.states.low.lit <- west.states[west.states$level.of.illiteracy=="low",
];west.states.low.lit
## Population Income Illiteracy Murder Area nam reg
## Colorado 2541 4884 0.7 6.8 103766 Colorado West
## Idaho 813 4119 0.6 5.3 82677 Idaho West
## Montana 746 4347 0.6 5.0 145587 Montana West
## Nevada 590 5149 0.5 11.5 109889 Nevada West
## Oregon 2284 4660 0.6 4.2 96184 Oregon West
## Utah 1203 4022 0.6 4.5 82096 Utah West
## Washington 3559 4864 0.6 4.3 66570 Washington West
## Wyoming 376 4566 0.6 6.9 97203 Wyoming West
## level.of.illiteracy
## Colorado low
## Idaho low
## Montana low
## Nevada low
## Oregon low
## Utah low
## Washington low
## Wyoming low
# West state with low illiteracy,and highest income
west.states.low.lit.high.income <-west.states.low.lit[
west.states.low.lit$Income==max(west.states.low.lit$Income),
];west.states.low.lit.high.income
## Population Income Illiteracy Murder Area nam reg
## Nevada 590 5149 0.5 11.5 109889 Nevada West
## level.of.illiteracy
## Nevada low
# Income of West state with low illiteracy,and highest income
with(west.states.low.lit.high.income,print(Income))
## [1] 5149