FUN-WITH-R.R

Sys.time()

## [1] "2023-06-25 01:06:30 +01"

# Question 1
# creating five vectors of integers
a <- c(1,2,3,4,5,6)
b <- c(2,4,6,8,10,12)
c <- seq(10,60,10)
d <- seq(1,12,2)
e <- 10:15

# combining the vectors to form a matrix
A <- cbind(a,b,c,d,e)
rownames(A) <- letters[21:26];A

##   a  b  c  d  e
## u 1  2 10  1 10
## v 2  4 20  3 11
## w 3  6 30  5 12
## x 4  8 40  7 13
## y 5 10 50  9 14
## z 6 12 60 11 15

# modifying the code to make each vector a row
# we can transpose the matrix, A
t(A)

##    u  v  w  x  y  z
## a  1  2  3  4  5  6
## b  2  4  6  8 10 12
## c 10 20 30 40 50 60
## d  1  3  5  7  9 11
## e 10 11 12 13 14 15

# Question 2
is.matrix(A)

## [1] TRUE

# A is a matrix since the return is TRUE

# Question 3
# creating a vector with 20 integers
scores <- sample(0:100,size = 20,replace = F);scores

##  [1] 84 19  1  6 40 83 88 46 73 37 20 95 78 31 30 56 96 51 75 54

B <- matrix(scores,nrow=5,ncol=4)
X <- matrix(scores,nrow=5,ncol=4,byrow=T,
            dimnames=list(c("Zeric","Ranita","Angela", "Moses" , "Joseph"),
                          c("STAT222", "STAT226", "STAT224","GERSHON")));X

##        STAT222 STAT226 STAT224 GERSHON
## Zeric       84      19       1       6
## Ranita      40      83      88      46
## Angela      73      37      20      95
## Moses       78      31      30      56
## Joseph      96      51      75      54

B # B is filled column wise

##      [,1] [,2] [,3] [,4]
## [1,]   84   83   20   56
## [2,]   19   88   95   96
## [3,]    1   46   78   51
## [4,]    6   73   31   75
## [5,]   40   37   30   54

# Transpose of X
t(X)

##         Zeric Ranita Angela Moses Joseph
## STAT222    84     40     73    78     96
## STAT226    19     83     37    31     51
## STAT224     1     88     20    30     75
## GERSHON     6     46     95    56     54

# Trace
sum(diag(X))

## [1] 243

# a 3x3 sub matrix
subX <- X[c(1:3),-4];subX

##        STAT222 STAT226 STAT224
## Zeric       84      19       1
## Ranita      40      83      88
## Angela      73      37      20

# Question 4
# (a) creating a data frame
ID <- factor(1021:1030)
Age <- c(19,22,24,30,19,23,28,21,20,35)
Gender <- factor(c("Male","Male","Female",
                   "Male","Female","Female",
                   "Female","Male","Female","Male"))
GP <- c(2.35,1.75,3.13,3.01,3.73,2.53,2.89,3.89,2.83,2.45)
Residential_Status <- c("Resident","Resident",
                                  "Non-resident","Non-resident",
                                  "Resident","Non-resident","Non-resident",
                                  "Resident","Non-resident","Non-resident")
Data <- data.frame(ID,Age,Gender,GP,Residential_Status,
                   row.names=c("Eric","Yaw","Sally",
                              "Ben","Ranita",
                              "Portia","Roberta",
                              "Mensah","Queen",
                              "Fred"));head(Data)

##          ID Age Gender   GP Residential_Status
## Eric   1021  19   Male 2.35           Resident
## Yaw    1022  22   Male 1.75           Resident
## Sally  1023  24 Female 3.13       Non-resident
## Ben    1024  30   Male 3.01       Non-resident
## Ranita 1025  19 Female 3.73           Resident
## Portia 1026  23 Female 2.53       Non-resident

# 4 (b)
COVID_Test <- factor(c("Yes","Yes","No","No","Yes",
                "No","No","Yes","No","Yes"))
DATA <- data.frame(Data,COVID_Test);head(DATA)

##          ID Age Gender   GP Residential_Status COVID_Test
## Eric   1021  19   Male 2.35           Resident        Yes
## Yaw    1022  22   Male 1.75           Resident        Yes
## Sally  1023  24 Female 3.13       Non-resident         No
## Ben    1024  30   Male 3.01       Non-resident         No
## Ranita 1025  19 Female 3.73           Resident        Yes
## Portia 1026  23 Female 2.53       Non-resident         No

# (4.b.i)
# code to check number of rows and column
dim(DATA)

## [1] 10  6

# it has 10 rows and 6 columns

# (4.b.ii)
# Code to check the class of each data type
str(DATA)

## 'data.frame':    10 obs. of  6 variables:
##  $ ID                : Factor w/ 10 levels "1021","1022",..: 1 2 3 4 5 6 7 8 9 10
##  $ Age               : num  19 22 24 30 19 23 28 21 20 35
##  $ Gender            : Factor w/ 2 levels "Female","Male": 2 2 1 2 1 1 1 2 1 2
##  $ GP                : num  2.35 1.75 3.13 3.01 3.73 2.53 2.89 3.89 2.83 2.45
##  $ Residential_Status: chr  "Resident" "Resident" "Non-resident" "Non-resident" ...
##  $ COVID_Test        : Factor w/ 2 levels "No","Yes": 2 2 1 1 2 1 1 2 1 2

# (4.b.iii)
paste("It has a sample size of ", dim(DATA)[1])

## [1] "It has a sample size of  10"

# Question 5
str(state.center)

## List of 2
##  $ x: num [1:50] -86.8 -127.2 -111.6 -92.3 -119.8 ...
##  $ y: num [1:50] 32.6 49.2 34.2 34.7 36.5 ...

# A list with two vectors, X and Y
head(as.data.frame(state.center)) # converting to data frame

##           x       y
## 1  -86.7509 32.5901
## 2 -127.2500 49.2500
## 3 -111.6250 34.2192
## 4  -92.2992 34.7336
## 5 -119.7730 36.5341
## 6 -105.5130 38.6777

is.data.frame(as.data.frame(state.center))

## [1] TRUE

# Question 6
a <- c(12,32,43,9,7,5);a

## [1] 12 32 43  9  7  5

b <- sample(1:100,size=6);b

## [1] 17 24 40 36 48 74

c <- round(runif(6,0,50),2);c

## [1] 38.56 29.92 14.69  0.55 48.40 46.08

my.data <- data.frame(a,b,c,
           row.names=LETTERS[1:6])

a.order <- sort(a,decreasing = F)

# Using the first column to order the entire data frame
my.data.ordered <- rbind.data.frame(
  my.data[my.data$a==a.order[1],],
  my.data[my.data$a==a.order[2],],
  my.data[my.data$a==a.order[3],],
  my.data[my.data$a==a.order[4],],
  my.data[my.data$a==a.order[5],],
  my.data[my.data$a==a.order[6],]
);my.data.ordered

##    a  b     c
## F  5 74 46.08
## E  7 48 48.40
## D  9 36  0.55
## A 12 17 38.56
## B 32 24 29.92
## C 43 40 14.69

# Question 7
# (a) Checking if the VADeaths death data is a data frame
is.data.frame(VADeaths)

## [1] FALSE

# Converting the VADeaths data to a data frame
Vad <- as.data.frame(VADeaths);Vad

##       Rural Male Rural Female Urban Male Urban Female
## 50-54       11.7          8.7       15.4          8.4
## 55-59       18.1         11.7       24.3         13.6
## 60-64       26.9         20.3       37.0         19.3
## 65-69       41.0         30.9       54.6         35.1
## 70-74       66.0         54.3       71.1         50.0

# (b) Creating a variable called Total to sum the rows
Total <- rowSums(Vad);Total

## 50-54 55-59 60-64 65-69 70-74 
##  44.2  67.7 103.5 161.6 241.4

# (c) Adding the Total to the data frame
VAD <- data.frame(Total,Vad);VAD

##       Total Rural.Male Rural.Female Urban.Male Urban.Female
## 50-54  44.2       11.7          8.7       15.4          8.4
## 55-59  67.7       18.1         11.7       24.3         13.6
## 60-64 103.5       26.9         20.3       37.0         19.3
## 65-69 161.6       41.0         30.9       54.6         35.1
## 70-74 241.4       66.0         54.3       71.1         50.0

# Question 8
# (a) Checking if the state.x77 data is a data frame
is.data.frame(state.x77)

## [1] FALSE

# Converting the state.x77 data set to a dataframe
dat <- as.data.frame(state.x77);head(dat)

##            Population Income Illiteracy Life Exp Murder HS Grad Frost   Area
## Alabama          3615   3624        2.1    69.05   15.1    41.3    20  50708
## Alaska            365   6315        1.5    69.31   11.3    66.7   152 566432
## Arizona          2212   4530        1.8    70.55    7.8    58.1    15 113417
## Arkansas         2110   3378        1.9    70.66   10.1    39.9    65  51945
## California      21198   5114        1.1    71.71   10.3    62.6    20 156361
## Colorado         2541   4884        0.7    72.06    6.8    63.9   166 103766

# (b) states with income greater than 5000
sum(dat$Income>5000)

## [1] 8

# 8 states have income greater than 5000

# (c) the states with the highest and the lowest life expectancy in years.
dat[dat$`Life Exp`==max(dat$`Life Exp`),]

##        Population Income Illiteracy Life Exp Murder HS Grad Frost Area
## Hawaii        868   4963        1.9     73.6    6.2    61.9     0 6425

dat[dat$`Life Exp`==min(dat$`Life Exp`),]

##                Population Income Illiteracy Life Exp Murder HS Grad Frost  Area
## South Carolina       2816   3635        2.3    67.96   11.6    37.8    65 30225

# (d) information for states with land area in squares miles greater over
# 55000 and Illiteracy rate less than 1.0.
head(dat[(dat$Area>55000)&dat$Illiteracy<1.0,])

##          Population Income Illiteracy Life Exp Murder HS Grad Frost   Area
## Colorado       2541   4884        0.7    72.06    6.8    63.9   166 103766
## Idaho           813   4119        0.6    71.87    5.3    59.5   126  82677
## Illinois      11197   5107        0.9    70.14   10.3    52.6   127  55748
## Iowa           2861   4628        0.5    72.56    2.3    59.0   140  55941
## Kansas         2280   4669        0.6    72.58    4.5    59.9   114  81787
## Michigan       9111   4751        0.9    70.63   11.1    52.8   125  56817

# Question 9
# Checkoing if the swiss data set is a data frame
is.data.frame(swiss)

## [1] TRUE

# Creating a data frame with some selected rows and columns
SWISS <- swiss[c(1,2,3,10,11,12,13),
      c("Examination", "Education", "Infant.Mortality")];SWISS

##              Examination Education Infant.Mortality
## Courtelary            15        12             22.2
## Delemont               6         9             22.2
## Franches-Mnt           5         5             20.2
## Sarine                16        13             24.4
## Veveyse               14         6             24.5
## Aigle                 21        12             16.5
## Aubonne               14         7             19.1

# (a) correcting a value
(SWISS["Sarine","Infant.Mortality"]=NA);SWISS

## [1] NA

##              Examination Education Infant.Mortality
## Courtelary            15        12             22.2
## Delemont               6         9             22.2
## Franches-Mnt           5         5             20.2
## Sarine                16        13               NA
## Veveyse               14         6             24.5
## Aigle                 21        12             16.5
## Aubonne               14         7             19.1

# (b) a row that will be the total sum of the column,
Total <- colSums(SWISS);Total

##      Examination        Education Infant.Mortality 
##               91               64               NA

SWISS[8,] <- Total
rownames(SWISS)<- c(rownames(SWISS[-8,]),"Total"); SWISS

##              Examination Education Infant.Mortality
## Courtelary            15        12             22.2
## Delemont               6         9             22.2
## Franches-Mnt           5         5             20.2
## Sarine                16        13               NA
## Veveyse               14         6             24.5
## Aigle                 21        12             16.5
## Aubonne               14         7             19.1
## Total                 91        64               NA

# (c) a new variable that will be the proportion of Examination
Proportion.of.Examination <- (
  SWISS[,
        "Examination"])/SWISS["Total",
                              "Examination"
                              ];Proportion.of.Examination

## [1] 0.16483516 0.06593407 0.05494505 0.17582418 0.15384615 0.23076923 0.15384615
## [8] 1.00000000

SWISS[,4] <- Proportion.of.Examination
colnames(SWISS) <- c(colnames(SWISS[-4]),"Proportion.of.Examination");SWISS

##              Examination Education Infant.Mortality Proportion.of.Examination
## Courtelary            15        12             22.2                0.16483516
## Delemont               6         9             22.2                0.06593407
## Franches-Mnt           5         5             20.2                0.05494505
## Sarine                16        13               NA                0.17582418
## Veveyse               14         6             24.5                0.15384615
## Aigle                 21        12             16.5                0.23076923
## Aubonne               14         7             19.1                0.15384615
## Total                 91        64               NA                1.00000000

# Question 10
States <- data.frame("abb"=state.abb,"are"=state.area,
                     "div"=state.division,"nam"=state.name,
                     "reg"=state.region,
                     row.names=state.name);head(States)

##            abb    are                div        nam   reg
## Alabama     AL  51609 East South Central    Alabama South
## Alaska      AK 589757            Pacific     Alaska  West
## Arizona     AZ 113909           Mountain    Arizona  West
## Arkansas    AR  53104 West South Central   Arkansas South
## California  CA 158693            Pacific California  West
## Colorado    CO 104247           Mountain   Colorado  West

# Question 11
State.and.x77 <- data.frame(state.x77,States);head(State.and.x77)

##            Population Income Illiteracy Life.Exp Murder HS.Grad Frost   Area
## Alabama          3615   3624        2.1    69.05   15.1    41.3    20  50708
## Alaska            365   6315        1.5    69.31   11.3    66.7   152 566432
## Arizona          2212   4530        1.8    70.55    7.8    58.1    15 113417
## Arkansas         2110   3378        1.9    70.66   10.1    39.9    65  51945
## California      21198   5114        1.1    71.71   10.3    62.6    20 156361
## Colorado         2541   4884        0.7    72.06    6.8    63.9   166 103766
##            abb    are                div        nam   reg
## Alabama     AL  51609 East South Central    Alabama South
## Alaska      AK 589757            Pacific     Alaska  West
## Arizona     AZ 113909           Mountain    Arizona  West
## Arkansas    AR  53104 West South Central   Arkansas South
## California  CA 158693            Pacific California  West
## Colorado    CO 104247           Mountain   Colorado  West

# (a) Remove the variable div
State.and.x77.simplified <- State.and.x77[,-11];head(State.and.x77.simplified)

##            Population Income Illiteracy Life.Exp Murder HS.Grad Frost   Area
## Alabama          3615   3624        2.1    69.05   15.1    41.3    20  50708
## Alaska            365   6315        1.5    69.31   11.3    66.7   152 566432
## Arizona          2212   4530        1.8    70.55    7.8    58.1    15 113417
## Arkansas         2110   3378        1.9    70.66   10.1    39.9    65  51945
## California      21198   5114        1.1    71.71   10.3    62.6    20 156361
## Colorado         2541   4884        0.7    72.06    6.8    63.9   166 103766
##            abb    are        nam   reg
## Alabama     AL  51609    Alabama South
## Alaska      AK 589757     Alaska  West
## Arizona     AZ 113909    Arizona  West
## Arkansas    AR  53104   Arkansas South
## California  CA 158693 California  West
## Colorado    CO 104247   Colorado  West

# (b) remove the variables Life Exp, HS Grad, Frost, abb, and are.
State.and.x77.simplified1 <- State.and.x77.simplified[ ,
                                            -c(4,6,7,9,10)
                                            ];head(State.and.x77.simplified1)

##            Population Income Illiteracy Murder   Area        nam   reg
## Alabama          3615   3624        2.1   15.1  50708    Alabama South
## Alaska            365   6315        1.5   11.3 566432     Alaska  West
## Arizona          2212   4530        1.8    7.8 113417    Arizona  West
## Arkansas         2110   3378        1.9   10.1  51945   Arkansas South
## California      21198   5114        1.1   10.3 156361 California  West
## Colorado         2541   4884        0.7    6.8 103766   Colorado  West

# (c)
# we write function that categorizes the level of illiteracy 
category <- function(x)
{
  if ((x>=0)&(x<1))
  {
    cat <- "low"
  }else
    if ((x>=1)&(x<2))
  {
    cat <- "some"
  }else
    if(x>=2)
  {
    cat <- "high"
  }else
  {
    cat <- "Error in input, Level of illiteracy has to be a positive value"
  }
  return(cat)
}

# Applying the function to the Illiteracy
level.of.illiteracy <- sapply(
  State.and.x77.simplified1[,
                            "Illiteracy"
                            ],category);head(level.of.illiteracy)

## [1] "high" "some" "some" "some" "some" "low"

# Adding the categories to the data frame
State.and.x77.simplified1[,8] <- level.of.illiteracy

colnames(State.and.x77.simplified1) <- c(
  colnames(State.and.x77.simplified1[,-8]),
  "level.of.illiteracy");head(State.and.x77.simplified1)

##            Population Income Illiteracy Murder   Area        nam   reg
## Alabama          3615   3624        2.1   15.1  50708    Alabama South
## Alaska            365   6315        1.5   11.3 566432     Alaska  West
## Arizona          2212   4530        1.8    7.8 113417    Arizona  West
## Arkansas         2110   3378        1.9   10.1  51945   Arkansas South
## California      21198   5114        1.1   10.3 156361 California  West
## Colorado         2541   4884        0.7    6.8 103766   Colorado  West
##            level.of.illiteracy
## Alabama                   high
## Alaska                    some
## Arizona                   some
## Arkansas                  some
## California                some
## Colorado                   low

# (d)
# West states
west.states <- State.and.x77.simplified1[
  (State.and.x77.simplified1$reg=="West"),
];head(west.states)

##            Population Income Illiteracy Murder   Area        nam  reg
## Alaska            365   6315        1.5   11.3 566432     Alaska West
## Arizona          2212   4530        1.8    7.8 113417    Arizona West
## California      21198   5114        1.1   10.3 156361 California West
## Colorado         2541   4884        0.7    6.8 103766   Colorado West
## Hawaii            868   4963        1.9    6.2   6425     Hawaii West
## Idaho             813   4119        0.6    5.3  82677      Idaho West
##            level.of.illiteracy
## Alaska                    some
## Arizona                   some
## California                some
## Colorado                   low
## Hawaii                    some
## Idaho                      low

# West state with low illiteracy,
west.states.low.lit <- west.states[west.states$level.of.illiteracy=="low",
                                   ];west.states.low.lit

##            Population Income Illiteracy Murder   Area        nam  reg
## Colorado         2541   4884        0.7    6.8 103766   Colorado West
## Idaho             813   4119        0.6    5.3  82677      Idaho West
## Montana           746   4347        0.6    5.0 145587    Montana West
## Nevada            590   5149        0.5   11.5 109889     Nevada West
## Oregon           2284   4660        0.6    4.2  96184     Oregon West
## Utah             1203   4022        0.6    4.5  82096       Utah West
## Washington       3559   4864        0.6    4.3  66570 Washington West
## Wyoming           376   4566        0.6    6.9  97203    Wyoming West
##            level.of.illiteracy
## Colorado                   low
## Idaho                      low
## Montana                    low
## Nevada                     low
## Oregon                     low
## Utah                       low
## Washington                 low
## Wyoming                    low

# West state with low illiteracy,and highest income
west.states.low.lit.high.income <-west.states.low.lit[
  west.states.low.lit$Income==max(west.states.low.lit$Income),
];west.states.low.lit.high.income

##        Population Income Illiteracy Murder   Area    nam  reg
## Nevada        590   5149        0.5   11.5 109889 Nevada West
##        level.of.illiteracy
## Nevada                 low

# Income of West state with low illiteracy,and highest income
with(west.states.low.lit.high.income,print(Income))

## [1] 5149

FUN-WITH-R.R

user

2023-06-25