# import csv data for the assignments
migrationData <- read.table(file="https://raw.githubusercontent.com/maharjansudhan/Migration/master/Migration.csv", header=TRUE, sep=",",stringsAsFactors = FALSE)
migrationData
## X source destination migrants distance pops66 pops71 popd66 popd71
## 1 1 PEI NFLD 255 924 108535 111641 493396 522104
## 2 2 NS NFLD 2380 952 756039 788960 493396 522104
## 3 3 NB NFLD 1140 1119 616788 534557 493396 522104
## 4 4 QUE NFLD 2145 1641 5780845 6027764 493396 522104
## 5 5 ONT NFLD 6295 1996 6960870 7703106 493396 522104
## 6 6 MAN NFLD 215 3159 963066 988247 493396 522104
## 7 7 SASK NFLD 185 3542 955344 926242 493396 522104
## 8 8 ALTA NFLD 425 4059 1463203 1627874 493396 522104
## 9 9 BC NFLD 425 4838 1873674 2184621 493396 522104
## 10 10 NFLD PEI 340 924 493396 522104 108535 111641
## 11 11 NS PEI 1975 164 756039 788960 108535 111641
## 12 12 NB PEI 1310 252 616788 534557 108535 111641
## 13 13 QUE PEI 755 774 5780845 6027764 108535 111641
## 14 14 ONT PEI 3060 1129 6960870 7703106 108535 111641
## 15 15 MAN PEI 400 2293 963066 988247 108535 111641
## 16 16 SASK PEI 95 2675 955344 926242 108535 111641
## 17 17 ALTA PEI 185 3192 1463203 1627874 108535 111641
## 18 18 BC PEI 330 3972 1873674 2184621 108535 111641
## 19 19 NFLD NS 3340 952 493396 522104 756039 788960
## 20 20 PEI NS 2185 164 108535 111641 756039 788960
## 21 21 NB NS 8310 310 616788 534557 756039 788960
## 22 22 QUE NS 6090 832 5780845 6027764 756039 788960
## 23 23 ONT NS 18805 1187 6960870 7703106 756039 788960
## 24 24 MAN NS 1825 2351 963066 988247 756039 788960
## 25 25 SASK NS 840 2733 955344 926242 756039 788960
## 26 26 ALTA NS 2000 3250 1463203 1627874 756039 788960
## 27 27 BC NS 2490 4029 1873674 2184621 756039 788960
## 28 28 NFLD NB 1740 1119 493396 522104 616788 534557
## 29 29 PEI NB 1335 252 108535 111641 616788 534557
## 30 30 NS NB 7635 310 756039 788960 616788 534557
## 31 31 QUE NB 9315 522 5780845 6027764 616788 534557
## 32 32 ONT NB 12455 877 6960870 7703106 616788 534557
## 33 33 MAN NB 1405 2041 963066 988247 616788 534557
## 34 34 SASK NB 480 2423 955344 926242 616788 534557
## 35 35 ALTA NB 1130 2940 1463203 1627874 616788 534557
## 36 36 BC NB 1195 3719 1873674 2184621 616788 534557
## 37 37 NFLD QUE 2235 1641 493396 522104 5780845 6027764
## 38 38 PEI QUE 635 774 108535 111641 5780845 6027764
## 39 39 NS QUE 4350 832 756039 788960 5780845 6027764
## 40 40 NB QUE 7905 522 616788 534557 5780845 6027764
## 41 41 ONT QUE 48370 355 6960870 7703106 5780845 6027764
## 42 42 MAN QUE 4630 1519 963066 988247 5780845 6027764
## 43 43 SASK QUE 1515 1901 955344 926242 5780845 6027764
## 44 44 ALTA QUE 3305 2418 1463203 1627874 5780845 6027764
## 45 45 BC QUE 4740 3197 1873674 2184621 5780845 6027764
## 46 46 NFLD ONT 17860 1996 493396 522104 6960870 7703106
## 47 47 PEI ONT 3570 1129 108535 111641 6960870 7703106
## 48 48 NS ONT 25730 1187 756039 788960 6960870 7703106
## 49 49 NB ONT 18550 877 616788 534557 6960870 7703106
## 50 50 QUE ONT 99430 355 5780845 6027764 6960870 7703106
## 51 51 MAN ONT 23785 1380 963066 988247 6960870 7703106
## 52 52 SASK ONT 11805 1763 955344 926242 6960870 7703106
## 53 53 ALTA ONT 17655 2281 1463203 1627874 6960870 7703106
## 54 54 BC ONT 21205 3059 1873674 2184621 6960870 7703106
## 55 55 NFLD MAN 680 3159 493396 522104 963066 988247
## 56 56 PEI MAN 265 2293 108535 111641 963066 988247
## 57 57 NS MAN 1655 2351 756039 788960 963066 988247
## 58 58 NB MAN 1355 2041 616788 534557 963066 988247
## 59 59 QUE MAN 4330 1519 5780845 6027764 963066 988247
## 60 60 ONT MAN 18245 1380 6960870 7703106 963066 988247
## 61 61 SASK MAN 16365 382 955344 926242 963066 988247
## 62 62 ALTA MAN 7190 899 1463203 1627874 963066 988247
## 63 63 BC MAN 6310 1679 1873674 2184621 963066 988247
## 64 64 NFLD SASK 280 3542 493396 522104 955344 926242
## 65 65 PEI SASK 125 2675 108535 111641 955344 926242
## 66 66 NS SASK 620 2733 756039 788960 955344 926242
## 67 67 NB SASK 495 2423 616788 534557 955344 926242
## 68 68 QUE SASK 1570 1901 5780845 6027764 955344 926242
## 69 69 ONT SASK 6845 1763 6960870 7703106 955344 926242
## 70 70 MAN SASK 9425 382 963066 988247 955344 926242
## 71 71 ALTA SASK 10580 517 1463203 1627874 955344 926242
## 72 72 BC SASK 6090 1297 1873674 2184621 955344 926242
## 73 73 NFLD ALTA 805 4059 493396 522104 1463203 1627874
## 74 74 PEI ALTA 505 3192 108535 111641 1463203 1627874
## 75 75 NS ALTA 3300 3250 756039 788960 1463203 1627874
## 76 76 NB ALTA 2150 2940 616788 534557 1463203 1627874
## 77 77 QUE ALTA 7750 2418 5780845 6027764 1463203 1627874
## 78 78 ONT ALTA 23550 2281 6960870 7703106 1463203 1627874
## 79 79 MAN ALTA 17410 899 963066 988247 1463203 1627874
## 80 80 SASK ALTA 41910 517 955344 926242 1463203 1627874
## 81 81 BC ALTA 27765 987 1873674 2184621 1463203 1627874
## 82 82 NFLD BC 1455 4838 493396 522104 1873674 2184621
## 83 83 PEI BC 600 3972 108535 111641 1873674 2184621
## 84 84 NS BC 6075 4029 756039 788960 1873674 2184621
## 85 85 NB BC 3115 3719 616788 534557 1873674 2184621
## 86 86 QUE BC 16740 3197 5780845 6027764 1873674 2184621
## 87 87 ONT BC 47395 3059 6960870 7703106 1873674 2184621
## 88 88 MAN BC 26910 1679 963066 988247 1873674 2184621
## 89 89 SASK BC 29920 1297 955344 926242 1873674 2184621
## 90 90 ALTA BC 58915 987 1463203 1627874 1873674 2184621
# 1. Use the summary function to gain an overview of the data set. Then display the mean and median for at least two attributes.
summary(migrationData)
## X source destination migrants
## Min. : 1.00 Length:90 Length:90 Min. : 95.0
## 1st Qu.:23.25 Class :character Class :character 1st Qu.: 912.5
## Median :45.50 Mode :character Mode :character Median : 3087.5
## Mean :45.50 Mean : 9227.3
## 3rd Qu.:67.75 3rd Qu.:10291.2
## Max. :90.00 Max. :99430.0
## distance pops66 pops71 popd66
## Min. : 164 Min. : 108535 Min. : 111641 Min. : 108535
## 1st Qu.: 924 1st Qu.: 616788 1st Qu.: 534557 1st Qu.: 616788
## Median :1763 Median : 959205 Median : 957244 Median : 959205
## Mean :1945 Mean :1997176 Mean :2141512 Mean :1997176
## 3rd Qu.:2940 3rd Qu.:1873674 3rd Qu.:2184621 3rd Qu.:1873674
## Max. :4838 Max. :6960870 Max. :7703106 Max. :6960870
## popd71
## Min. : 111641
## 1st Qu.: 534557
## Median : 957244
## Mean :2141512
## 3rd Qu.:2184621
## Max. :7703106
# mean and median of attribute 5
mean(migrationData[,5])
## [1] 1945.089
median(migrationData[,5])
## [1] 1763
# mean = 1945.089 and median = 1763
# mean and median of attribute 4
mean(migrationData[,4])
## [1] 9227.333
median(migrationData[,4])
## [1] 3087.5
# mean = 9227.333 and median = 3087.5
# 2. Create a new data frame with a subset of the columns and rows. Make sure to rename it.
betterMigrationDataFrame <- data.frame(migrationData[1:10,1:5])
betterMigrationDataFrame
## X source destination migrants distance
## 1 1 PEI NFLD 255 924
## 2 2 NS NFLD 2380 952
## 3 3 NB NFLD 1140 1119
## 4 4 QUE NFLD 2145 1641
## 5 5 ONT NFLD 6295 1996
## 6 6 MAN NFLD 215 3159
## 7 7 SASK NFLD 185 3542
## 8 8 ALTA NFLD 425 4059
## 9 9 BC NFLD 425 4838
## 10 10 NFLD PEI 340 924
# 3. Create new column names for the new data frame
names(betterMigrationDataFrame)[1] = "Migrant_Id"
names(betterMigrationDataFrame)[2] = "Migration_Source"
names(betterMigrationDataFrame)[3] = "Migration_Destination"
names(betterMigrationDataFrame)[4] = "Total_Migrants"
names(betterMigrationDataFrame)[5] = "Distance_Travelled"
betterMigrationDataFrame
## Migrant_Id Migration_Source Migration_Destination Total_Migrants
## 1 1 PEI NFLD 255
## 2 2 NS NFLD 2380
## 3 3 NB NFLD 1140
## 4 4 QUE NFLD 2145
## 5 5 ONT NFLD 6295
## 6 6 MAN NFLD 215
## 7 7 SASK NFLD 185
## 8 8 ALTA NFLD 425
## 9 9 BC NFLD 425
## 10 10 NFLD PEI 340
## Distance_Travelled
## 1 924
## 2 952
## 3 1119
## 4 1641
## 5 1996
## 6 3159
## 7 3542
## 8 4059
## 9 4838
## 10 924
# 4. Use the summary function to create an overview of your new data frame. The print the mean and median for the same two attributes. Please compare.
summary(betterMigrationDataFrame)
## Migrant_Id Migration_Source Migration_Destination Total_Migrants
## Min. : 1.00 Length:10 Length:10 Min. : 185.0
## 1st Qu.: 3.25 Class :character Class :character 1st Qu.: 276.2
## Median : 5.50 Mode :character Mode :character Median : 425.0
## Mean : 5.50 Mean :1380.5
## 3rd Qu.: 7.75 3rd Qu.:1893.8
## Max. :10.00 Max. :6295.0
## Distance_Travelled
## Min. : 924.0
## 1st Qu.: 993.8
## Median :1818.5
## Mean :2315.4
## 3rd Qu.:3446.2
## Max. :4838.0
# mean and median of attribute 5
mean(betterMigrationDataFrame[,5])
## [1] 2315.4
median(betterMigrationDataFrame[,5])
## [1] 1818.5
# mean = 2315.4 and median = 1818.5
# mean and median of attribute 4
mean(betterMigrationDataFrame[,4])
## [1] 1380.5
median(betterMigrationDataFrame[,4])
## [1] 425
# mean = 1380.5 and median = 425
# After the new summary we see that the new mean and median for 4th attribute has decreased a lot
# whereas the new mean and median for 5th attribute has increased.
# ALTA, Alberta; BC, British Columbia; MAN, Manitoba; NB, New Brunswick; NFLD, New Foundland; NS, Nova Scotia; ONT, Ontario; PEI, Prince Edward Island; QUE, Quebec; SASK, Saskatchewan.
# 5.For at least 3 values in a column please rename so that every value in that column is renamed.
# For example, suppose I have 20 values of the letter "e" in one column. Rename those values so
# that all 20 would show as "excellent".
betterMigrationDataFrame$Migration_Source <- as.character(betterMigrationDataFrame$Migration_Source)
betterMigrationDataFrame$Migration_Source[1] <- "Prince Edward Island"
betterMigrationDataFrame$Migration_Source[2] <- "Nova Scotia"
betterMigrationDataFrame$Migration_Source[3] <- "New Brunswick"
betterMigrationDataFrame$Migration_Source[4] <- "Quebec"
betterMigrationDataFrame$Migration_Source[5] <- "Ontario"
betterMigrationDataFrame$Migration_Source[6] <- "Manitoba"
betterMigrationDataFrame$Migration_Source[7] <- "Sascatchewan"
betterMigrationDataFrame$Migration_Source[8] <- "Alberta"
betterMigrationDataFrame$Migration_Source[9] <- "British Columbia"
betterMigrationDataFrame$Migration_Source[10] <- "New Foundland"
betterMigrationDataFrame$Migration_Destination <- as.character(betterMigrationDataFrame$Migration_Destination)
betterMigrationDataFrame$Migration_Destination[1] <- "New Foundland"
betterMigrationDataFrame$Migration_Destination[2] <- "New Foundland"
betterMigrationDataFrame$Migration_Destination[3] <- "New Foundland"
betterMigrationDataFrame$Migration_Destination[4] <- "New Foundland"
betterMigrationDataFrame$Migration_Destination[5] <- "New Foundland"
betterMigrationDataFrame$Migration_Destination[6] <- "New Foundland"
betterMigrationDataFrame$Migration_Destination[7] <- "New Foundland"
betterMigrationDataFrame$Migration_Destination[8] <- "New Foundland"
betterMigrationDataFrame$Migration_Destination[9] <- "New Foundland"
betterMigrationDataFrame$Migration_Destination[10] <- "Prince Edward Island"
# 6. Display enough rows to see examples of all of steps 1-5 above.
betterMigrationDataFrame
## Migrant_Id Migration_Source Migration_Destination Total_Migrants
## 1 1 Prince Edward Island New Foundland 255
## 2 2 Nova Scotia New Foundland 2380
## 3 3 New Brunswick New Foundland 1140
## 4 4 Quebec New Foundland 2145
## 5 5 Ontario New Foundland 6295
## 6 6 Manitoba New Foundland 215
## 7 7 Sascatchewan New Foundland 185
## 8 8 Alberta New Foundland 425
## 9 9 British Columbia New Foundland 425
## 10 10 New Foundland Prince Edward Island 340
## Distance_Travelled
## 1 924
## 2 952
## 3 1119
## 4 1641
## 5 1996
## 6 3159
## 7 3542
## 8 4059
## 9 4838
## 10 924
# 7. BONUS - place the original .csv in a github file and have R read from the link. This will be a very
# useful skill as you progress in your data science education and career.
data <- read.table(file="https://raw.githubusercontent.com/maharjansudhan/Migration/master/Migration.csv", header=TRUE, sep=",",stringsAsFactors = FALSE)
data
## X source destination migrants distance pops66 pops71 popd66 popd71
## 1 1 PEI NFLD 255 924 108535 111641 493396 522104
## 2 2 NS NFLD 2380 952 756039 788960 493396 522104
## 3 3 NB NFLD 1140 1119 616788 534557 493396 522104
## 4 4 QUE NFLD 2145 1641 5780845 6027764 493396 522104
## 5 5 ONT NFLD 6295 1996 6960870 7703106 493396 522104
## 6 6 MAN NFLD 215 3159 963066 988247 493396 522104
## 7 7 SASK NFLD 185 3542 955344 926242 493396 522104
## 8 8 ALTA NFLD 425 4059 1463203 1627874 493396 522104
## 9 9 BC NFLD 425 4838 1873674 2184621 493396 522104
## 10 10 NFLD PEI 340 924 493396 522104 108535 111641
## 11 11 NS PEI 1975 164 756039 788960 108535 111641
## 12 12 NB PEI 1310 252 616788 534557 108535 111641
## 13 13 QUE PEI 755 774 5780845 6027764 108535 111641
## 14 14 ONT PEI 3060 1129 6960870 7703106 108535 111641
## 15 15 MAN PEI 400 2293 963066 988247 108535 111641
## 16 16 SASK PEI 95 2675 955344 926242 108535 111641
## 17 17 ALTA PEI 185 3192 1463203 1627874 108535 111641
## 18 18 BC PEI 330 3972 1873674 2184621 108535 111641
## 19 19 NFLD NS 3340 952 493396 522104 756039 788960
## 20 20 PEI NS 2185 164 108535 111641 756039 788960
## 21 21 NB NS 8310 310 616788 534557 756039 788960
## 22 22 QUE NS 6090 832 5780845 6027764 756039 788960
## 23 23 ONT NS 18805 1187 6960870 7703106 756039 788960
## 24 24 MAN NS 1825 2351 963066 988247 756039 788960
## 25 25 SASK NS 840 2733 955344 926242 756039 788960
## 26 26 ALTA NS 2000 3250 1463203 1627874 756039 788960
## 27 27 BC NS 2490 4029 1873674 2184621 756039 788960
## 28 28 NFLD NB 1740 1119 493396 522104 616788 534557
## 29 29 PEI NB 1335 252 108535 111641 616788 534557
## 30 30 NS NB 7635 310 756039 788960 616788 534557
## 31 31 QUE NB 9315 522 5780845 6027764 616788 534557
## 32 32 ONT NB 12455 877 6960870 7703106 616788 534557
## 33 33 MAN NB 1405 2041 963066 988247 616788 534557
## 34 34 SASK NB 480 2423 955344 926242 616788 534557
## 35 35 ALTA NB 1130 2940 1463203 1627874 616788 534557
## 36 36 BC NB 1195 3719 1873674 2184621 616788 534557
## 37 37 NFLD QUE 2235 1641 493396 522104 5780845 6027764
## 38 38 PEI QUE 635 774 108535 111641 5780845 6027764
## 39 39 NS QUE 4350 832 756039 788960 5780845 6027764
## 40 40 NB QUE 7905 522 616788 534557 5780845 6027764
## 41 41 ONT QUE 48370 355 6960870 7703106 5780845 6027764
## 42 42 MAN QUE 4630 1519 963066 988247 5780845 6027764
## 43 43 SASK QUE 1515 1901 955344 926242 5780845 6027764
## 44 44 ALTA QUE 3305 2418 1463203 1627874 5780845 6027764
## 45 45 BC QUE 4740 3197 1873674 2184621 5780845 6027764
## 46 46 NFLD ONT 17860 1996 493396 522104 6960870 7703106
## 47 47 PEI ONT 3570 1129 108535 111641 6960870 7703106
## 48 48 NS ONT 25730 1187 756039 788960 6960870 7703106
## 49 49 NB ONT 18550 877 616788 534557 6960870 7703106
## 50 50 QUE ONT 99430 355 5780845 6027764 6960870 7703106
## 51 51 MAN ONT 23785 1380 963066 988247 6960870 7703106
## 52 52 SASK ONT 11805 1763 955344 926242 6960870 7703106
## 53 53 ALTA ONT 17655 2281 1463203 1627874 6960870 7703106
## 54 54 BC ONT 21205 3059 1873674 2184621 6960870 7703106
## 55 55 NFLD MAN 680 3159 493396 522104 963066 988247
## 56 56 PEI MAN 265 2293 108535 111641 963066 988247
## 57 57 NS MAN 1655 2351 756039 788960 963066 988247
## 58 58 NB MAN 1355 2041 616788 534557 963066 988247
## 59 59 QUE MAN 4330 1519 5780845 6027764 963066 988247
## 60 60 ONT MAN 18245 1380 6960870 7703106 963066 988247
## 61 61 SASK MAN 16365 382 955344 926242 963066 988247
## 62 62 ALTA MAN 7190 899 1463203 1627874 963066 988247
## 63 63 BC MAN 6310 1679 1873674 2184621 963066 988247
## 64 64 NFLD SASK 280 3542 493396 522104 955344 926242
## 65 65 PEI SASK 125 2675 108535 111641 955344 926242
## 66 66 NS SASK 620 2733 756039 788960 955344 926242
## 67 67 NB SASK 495 2423 616788 534557 955344 926242
## 68 68 QUE SASK 1570 1901 5780845 6027764 955344 926242
## 69 69 ONT SASK 6845 1763 6960870 7703106 955344 926242
## 70 70 MAN SASK 9425 382 963066 988247 955344 926242
## 71 71 ALTA SASK 10580 517 1463203 1627874 955344 926242
## 72 72 BC SASK 6090 1297 1873674 2184621 955344 926242
## 73 73 NFLD ALTA 805 4059 493396 522104 1463203 1627874
## 74 74 PEI ALTA 505 3192 108535 111641 1463203 1627874
## 75 75 NS ALTA 3300 3250 756039 788960 1463203 1627874
## 76 76 NB ALTA 2150 2940 616788 534557 1463203 1627874
## 77 77 QUE ALTA 7750 2418 5780845 6027764 1463203 1627874
## 78 78 ONT ALTA 23550 2281 6960870 7703106 1463203 1627874
## 79 79 MAN ALTA 17410 899 963066 988247 1463203 1627874
## 80 80 SASK ALTA 41910 517 955344 926242 1463203 1627874
## 81 81 BC ALTA 27765 987 1873674 2184621 1463203 1627874
## 82 82 NFLD BC 1455 4838 493396 522104 1873674 2184621
## 83 83 PEI BC 600 3972 108535 111641 1873674 2184621
## 84 84 NS BC 6075 4029 756039 788960 1873674 2184621
## 85 85 NB BC 3115 3719 616788 534557 1873674 2184621
## 86 86 QUE BC 16740 3197 5780845 6027764 1873674 2184621
## 87 87 ONT BC 47395 3059 6960870 7703106 1873674 2184621
## 88 88 MAN BC 26910 1679 963066 988247 1873674 2184621
## 89 89 SASK BC 29920 1297 955344 926242 1873674 2184621
## 90 90 ALTA BC 58915 987 1463203 1627874 1873674 2184621