DF & Function

Lab 5: Dataframe & Functions

Dataframe

1. create a data frame mydata, with the following data

v1 = c(49, 80, 79, 41, 41, 52, 28, 8, 76, 8)
v2 = c(95, 46, 3, 100, 1, 59, 65, 82, 17, 20)
v3 = c(32, 96, 48, 96, 61, 54, 36, 18, 73, 67)
v4 = c(11, 56, 96, 48, 47, 84, 5, 84, 47, 16)
v5 = c(21, 41, 73, 47, 6, 20, 69, 77, 26, 79)
v6 = c(3, 46, 90, 42, 89, 48, 78, 82, 16, 65)
gender = c('F', 'F', 'M', 'F', 'M', 'M', 'F', 'M', 'M', 'F')
age = c(82, 2, 64, 93, 28, 28, 71, 68, 46, 1)

mydata <- data.frame(v1, v2, v3, v4, v5, v6, gender, age)

v1	v2	v3	v4	v5	v6	gender	age
49	95	32	11	21	3	F	82
80	46	96	56	41	46	F	2
79	3	48	96	73	90	M	64
41	100	96	48	47	42	F	93
41	1	61	47	6	89	M	28
52	59	54	84	20	48	M	28
28	65	36	5	69	78	F	71
8	82	18	84	77	82	M	68
76	17	73	47	26	16	M	46
8	20	67	16	79	65	F	1

2. make a list with values v1, v2, v3 and assign to myvars, then using myvars, select out variables v1 to v3 from mydata and keep in newdata. Newdata should look as shown below.

 myvars <- c("v1", "v2", "v3")
 newdata <- mydata[myvars]
 datakeep <- mydata

v1	v2	v3
49	95	32
80	46	96
79	3	48
41	100	96
41	1	61
52	59	54
28	65	36
8	82	18
76	17	73
8	20	67

3. follow the example given for using %in% and select variables other than v1 to v3 using the ! negation sign, and keep in newdata1 as shown below

 newdata1 <- mydata[!names(mydata) %in% myvars]

v4	v5	v6	gender	age
11	21	3	F	82
56	41	46	F	2
96	73	90	M	64
48	47	42	F	93
47	6	89	M	28
84	20	48	M	28
5	69	78	F	71
84	77	82	M	68
47	26	16	M	46
16	79	65	F	1

4. exclude column 3 and 5 from mydata, and keep in newdata2 as shown below :

newdata2 <- mydata[c(-3, -5)]

v1	v2	v4	v6	gender	age
49	95	11	3	F	82
80	46	56	46	F	2
79	3	96	90	M	64
41	100	48	42	F	93
41	1	47	89	M	28
52	59	84	48	M	28
28	65	5	78	F	71
8	82	84	82	M	68
76	17	47	16	M	46
8	20	16	65	F	1

5. remove the same columns using NULL value as in the example given

  mydata[3] <- mydata[5]<- NULL

v1	v2	v4	v6	gender	age
49	95	11	3	F	82
80	46	56	46	F	2
79	3	96	90	M	64
41	100	48	42	F	93
41	1	47	89	M	28
52	59	84	48	M	28
28	65	5	78	F	71
8	82	84	82	M	68
76	17	47	16	M	46
8	20	16	65	F	1

6. What is the data for newdata5 and newdataGA?

data for newdata5:

v1	v2	v4	v6	gender	age
49	95	11	3	F	82
80	46	56	46	F	2
79	3	96	90	M	64
41	100	48	42	F	93
41	1	47	89	M	28

data for newdataGA:

	v1	v2	v4	v6	gender	age
1	49	95	11	3	F	82
4	41	100	48	42	F	93
7	28	65	5	78	F	71

Control structure

1. Read file marks1.csv

 df <- read.csv(file="marks1.csv", head=TRUE, sep=",")

X	X.1	test	asgn	Prsnt	Final	q1	q2	q3	q4
60001	Ahmad	15	14	17	13	0.0	9	2	4.0
60003	Abu	26	13	18	22	3.0	5	8	6.0
60006	Samy	21	15	19	25	6.0	7	4	8.0
60008	Chong	25	10	17	14	2.0	3	4	5.0
60009	Paul	25	15	16	20	3.0	7	6	4.0
60011	John	18	15	19	22	4.0	7	4	7.0
60014	Devi	30	15	19	28	4.0	5	9	10.0
60015	Pillip	16	15	19	20	4.0	5	6	5.0
60023	Meilin	18	13	18	22	2.0	5	7	8.0
60025	Lily	30	14	18	24	5.5	6	5	7.5
60026	Jamil	12	10	12	12	1.0	5	1	6.0

2. Check the data frame info using a few available functions

3. Check the names of the variables in the data frame

 dim(df)

## [1] 11 10

 str(df)

## 'data.frame':    11 obs. of  10 variables:
##  $ X    : int  60001 60003 60006 60008 60009 60011 60014 60015 60023 60025 ...
##  $ X.1  : chr  "Ahmad" "Abu" "Samy" "Chong" ...
##  $ test : int  15 26 21 25 25 18 30 16 18 30 ...
##  $ asgn : int  14 13 15 10 15 15 15 15 13 14 ...
##  $ Prsnt: int  17 18 19 17 16 19 19 19 18 18 ...
##  $ Final: int  13 22 25 14 20 22 28 20 22 24 ...
##  $ q1   : num  0 3 6 2 3 4 4 4 2 5.5 ...
##  $ q2   : int  9 5 7 3 7 7 5 5 5 6 ...
##  $ q3   : int  2 8 4 4 6 4 9 6 7 5 ...
##  $ q4   : num  4 6 8 5 4 7 10 5 8 7.5 ...

 summary(df)

##        X             X.1                 test            asgn      
##  Min.   :60001   Length:11          Min.   :12.00   Min.   :10.00  
##  1st Qu.:60007   Class :character   1st Qu.:17.00   1st Qu.:13.00  
##  Median :60011   Mode  :character   Median :21.00   Median :14.00  
##  Mean   :60013                      Mean   :21.45   Mean   :13.55  
##  3rd Qu.:60019                      3rd Qu.:25.50   3rd Qu.:15.00  
##  Max.   :60026                      Max.   :30.00   Max.   :15.00  
##      Prsnt           Final             q1              q2       
##  Min.   :12.00   Min.   :12.00   Min.   :0.000   Min.   :3.000  
##  1st Qu.:17.00   1st Qu.:17.00   1st Qu.:2.000   1st Qu.:5.000  
##  Median :18.00   Median :22.00   Median :3.000   Median :5.000  
##  Mean   :17.45   Mean   :20.18   Mean   :3.136   Mean   :5.818  
##  3rd Qu.:19.00   3rd Qu.:23.00   3rd Qu.:4.000   3rd Qu.:7.000  
##  Max.   :19.00   Max.   :28.00   Max.   :6.000   Max.   :9.000  
##        q3              q4        
##  Min.   :1.000   Min.   : 4.000  
##  1st Qu.:4.000   1st Qu.: 5.000  
##  Median :5.000   Median : 6.000  
##  Mean   :5.091   Mean   : 6.409  
##  3rd Qu.:6.500   3rd Qu.: 7.750  
##  Max.   :9.000   Max.   :10.000

 colnames(df)

##  [1] "X"     "X.1"   "test"  "asgn"  "Prsnt" "Final" "q1"    "q2"    "q3"   
## [10] "q4"

4. Rename the first variable X to ID

5. Rename the second variable X.1 to StuName

names(df)[1] <- "ID"
names(df)[2] <- "StuName"

ID	StuName	test	asgn	Prsnt	Final	q1	q2	q3	q4
60001	Ahmad	15	14	17	13	0.0	9	2	4.0
60003	Abu	26	13	18	22	3.0	5	8	6.0
60006	Samy	21	15	19	25	6.0	7	4	8.0
60008	Chong	25	10	17	14	2.0	3	4	5.0
60009	Paul	25	15	16	20	3.0	7	6	4.0
60011	John	18	15	19	22	4.0	7	4	7.0
60014	Devi	30	15	19	28	4.0	5	9	10.0
60015	Pillip	16	15	19	20	4.0	5	6	5.0
60023	Meilin	18	13	18	22	2.0	5	7	8.0
60025	Lily	30	14	18	24	5.5	6	5	7.5
60026	Jamil	12	10	12	12	1.0	5	1	6.0

6. Remove the first two column from the data frame

df[1] <- df[2] <- NULL

7. Use apply() function to sum all the marks in the data frame and put them in a new vector called Total and bind the vector to the data frame

Total <- apply(df, 1, sum)
df<-cbind(df, Total)

test	asgn	Prsnt	Final	q1	q2	q3	q4	Total
15	14	17	13	0.0	9	2	4.0	74
26	13	18	22	3.0	5	8	6.0	101
21	15	19	25	6.0	7	4	8.0	105
25	10	17	14	2.0	3	4	5.0	80
25	15	16	20	3.0	7	6	4.0	96
18	15	19	22	4.0	7	4	7.0	96
30	15	19	28	4.0	5	9	10.0	120
16	15	19	20	4.0	5	6	5.0	90
18	13	18	22	2.0	5	7	8.0	93
30	14	18	24	5.5	6	5	7.5	110
12	10	12	12	1.0	5	1	6.0	59

DF & Function

Jiaqi Lim

11/23/2021

Lab 5: Dataframe & Functions

Dataframe

1. create a data frame mydata, with the following data

2. make a list with values v1, v2, v3 and assign to myvars, then using myvars, select out variables v1 to v3 from mydata and keep in newdata. Newdata should look as shown below.

3. follow the example given for using %in% and select variables other than v1 to v3 using the ! negation sign, and keep in newdata1 as shown below

4. exclude column 3 and 5 from mydata, and keep in newdata2 as shown below :

5. remove the same columns using NULL value as in the example given

6. What is the data for newdata5 and newdataGA?

data for newdata5:

data for newdataGA:

Control structure

1. Read file marks1.csv

2. Check the data frame info using a few available functions

3. Check the names of the variables in the data frame

4. Rename the first variable X to ID

5. Rename the second variable X.1 to StuName

6. Remove the first two column from the data frame

7. Use apply() function to sum all the marks in the data frame and put them in a new vector called Total and bind the vector to the data frame

v1	v2	v3	v4	v5	v6	gender	age
49	95	32	11	21	3	F	82
80	46	96	56	41	46	F	2
79	3	48	96	73	90	M	64
41	100	96	48	47	42	F	93
41	1	61	47	6	89	M	28
52	59	54	84	20	48	M	28
28	65	36	5	69	78	F	71
8	82	18	84	77	82	M	68
76	17	73	47	26	16	M	46
8	20	67	16	79	65	F	1

v4	v5	v6	gender	age
11	21	3	F	82
56	41	46	F	2
96	73	90	M	64
48	47	42	F	93
47	6	89	M	28
84	20	48	M	28
5	69	78	F	71
84	77	82	M	68
47	26	16	M	46
16	79	65	F	1

v1	v2	v4	v6	gender	age
49	95	11	3	F	82
80	46	56	46	F	2
79	3	96	90	M	64
41	100	48	42	F	93
41	1	47	89	M	28
52	59	84	48	M	28
28	65	5	78	F	71
8	82	84	82	M	68
76	17	47	16	M	46
8	20	16	65	F	1

v1	v2	v4	v6	gender	age
49	95	11	3	F	82
80	46	56	46	F	2
79	3	96	90	M	64
41	100	48	42	F	93
41	1	47	89	M	28
52	59	84	48	M	28
28	65	5	78	F	71
8	82	84	82	M	68
76	17	47	16	M	46
8	20	16	65	F	1

v1	v2	v3	v4	v5	v6	gender	age
49	95	32	11	21	3	F	82
80	46	96	56	41	46	F	2
79	3	48	96	73	90	M	64
41	100	96	48	47	42	F	93
41	1	61	47	6	89	M	28
52	59	54	84	20	48	M	28
28	65	36	5	69	78	F	71
8	82	18	84	77	82	M	68
76	17	73	47	26	16	M	46
8	20	67	16	79	65	F	1

v4	v5	v6	gender	age
11	21	3	F	82
56	41	46	F	2
96	73	90	M	64
48	47	42	F	93
47	6	89	M	28
84	20	48	M	28
5	69	78	F	71
84	77	82	M	68
47	26	16	M	46
16	79	65	F	1

v1	v2	v4	v6	gender	age
49	95	11	3	F	82
80	46	56	46	F	2
79	3	96	90	M	64
41	100	48	42	F	93
41	1	47	89	M	28
52	59	84	48	M	28
28	65	5	78	F	71
8	82	84	82	M	68
76	17	47	16	M	46
8	20	16	65	F	1

v1	v2	v4	v6	gender	age
49	95	11	3	F	82
80	46	56	46	F	2
79	3	96	90	M	64
41	100	48	42	F	93
41	1	47	89	M	28
52	59	84	48	M	28
28	65	5	78	F	71
8	82	84	82	M	68
76	17	47	16	M	46
8	20	16	65	F	1

v1	v2	v3	v4	v5	v6	gender	age
49	95	32	11	21	3	F	82
80	46	96	56	41	46	F	2
79	3	48	96	73	90	M	64
41	100	96	48	47	42	F	93
41	1	61	47	6	89	M	28
52	59	54	84	20	48	M	28
28	65	36	5	69	78	F	71
8	82	18	84	77	82	M	68
76	17	73	47	26	16	M	46
8	20	67	16	79	65	F	1

v4	v5	v6	gender	age
11	21	3	F	82
56	41	46	F	2
96	73	90	M	64
48	47	42	F	93
47	6	89	M	28
84	20	48	M	28
5	69	78	F	71
84	77	82	M	68
47	26	16	M	46
16	79	65	F	1

v1	v2	v4	v6	gender	age
49	95	11	3	F	82
80	46	56	46	F	2
79	3	96	90	M	64
41	100	48	42	F	93
41	1	47	89	M	28
52	59	84	48	M	28
28	65	5	78	F	71
8	82	84	82	M	68
76	17	47	16	M	46
8	20	16	65	F	1

v1	v2	v4	v6	gender	age
49	95	11	3	F	82
80	46	56	46	F	2
79	3	96	90	M	64
41	100	48	42	F	93
41	1	47	89	M	28
52	59	84	48	M	28
28	65	5	78	F	71
8	82	84	82	M	68
76	17	47	16	M	46
8	20	16	65	F	1