# import csv data for the assignments

migrationData <- read.table(file="https://raw.githubusercontent.com/maharjansudhan/Migration/master/Migration.csv", header=TRUE, sep=",",stringsAsFactors = FALSE)
migrationData
##     X source destination migrants distance  pops66  pops71  popd66  popd71
## 1   1    PEI        NFLD      255      924  108535  111641  493396  522104
## 2   2     NS        NFLD     2380      952  756039  788960  493396  522104
## 3   3     NB        NFLD     1140     1119  616788  534557  493396  522104
## 4   4    QUE        NFLD     2145     1641 5780845 6027764  493396  522104
## 5   5    ONT        NFLD     6295     1996 6960870 7703106  493396  522104
## 6   6    MAN        NFLD      215     3159  963066  988247  493396  522104
## 7   7   SASK        NFLD      185     3542  955344  926242  493396  522104
## 8   8   ALTA        NFLD      425     4059 1463203 1627874  493396  522104
## 9   9     BC        NFLD      425     4838 1873674 2184621  493396  522104
## 10 10   NFLD         PEI      340      924  493396  522104  108535  111641
## 11 11     NS         PEI     1975      164  756039  788960  108535  111641
## 12 12     NB         PEI     1310      252  616788  534557  108535  111641
## 13 13    QUE         PEI      755      774 5780845 6027764  108535  111641
## 14 14    ONT         PEI     3060     1129 6960870 7703106  108535  111641
## 15 15    MAN         PEI      400     2293  963066  988247  108535  111641
## 16 16   SASK         PEI       95     2675  955344  926242  108535  111641
## 17 17   ALTA         PEI      185     3192 1463203 1627874  108535  111641
## 18 18     BC         PEI      330     3972 1873674 2184621  108535  111641
## 19 19   NFLD          NS     3340      952  493396  522104  756039  788960
## 20 20    PEI          NS     2185      164  108535  111641  756039  788960
## 21 21     NB          NS     8310      310  616788  534557  756039  788960
## 22 22    QUE          NS     6090      832 5780845 6027764  756039  788960
## 23 23    ONT          NS    18805     1187 6960870 7703106  756039  788960
## 24 24    MAN          NS     1825     2351  963066  988247  756039  788960
## 25 25   SASK          NS      840     2733  955344  926242  756039  788960
## 26 26   ALTA          NS     2000     3250 1463203 1627874  756039  788960
## 27 27     BC          NS     2490     4029 1873674 2184621  756039  788960
## 28 28   NFLD          NB     1740     1119  493396  522104  616788  534557
## 29 29    PEI          NB     1335      252  108535  111641  616788  534557
## 30 30     NS          NB     7635      310  756039  788960  616788  534557
## 31 31    QUE          NB     9315      522 5780845 6027764  616788  534557
## 32 32    ONT          NB    12455      877 6960870 7703106  616788  534557
## 33 33    MAN          NB     1405     2041  963066  988247  616788  534557
## 34 34   SASK          NB      480     2423  955344  926242  616788  534557
## 35 35   ALTA          NB     1130     2940 1463203 1627874  616788  534557
## 36 36     BC          NB     1195     3719 1873674 2184621  616788  534557
## 37 37   NFLD         QUE     2235     1641  493396  522104 5780845 6027764
## 38 38    PEI         QUE      635      774  108535  111641 5780845 6027764
## 39 39     NS         QUE     4350      832  756039  788960 5780845 6027764
## 40 40     NB         QUE     7905      522  616788  534557 5780845 6027764
## 41 41    ONT         QUE    48370      355 6960870 7703106 5780845 6027764
## 42 42    MAN         QUE     4630     1519  963066  988247 5780845 6027764
## 43 43   SASK         QUE     1515     1901  955344  926242 5780845 6027764
## 44 44   ALTA         QUE     3305     2418 1463203 1627874 5780845 6027764
## 45 45     BC         QUE     4740     3197 1873674 2184621 5780845 6027764
## 46 46   NFLD         ONT    17860     1996  493396  522104 6960870 7703106
## 47 47    PEI         ONT     3570     1129  108535  111641 6960870 7703106
## 48 48     NS         ONT    25730     1187  756039  788960 6960870 7703106
## 49 49     NB         ONT    18550      877  616788  534557 6960870 7703106
## 50 50    QUE         ONT    99430      355 5780845 6027764 6960870 7703106
## 51 51    MAN         ONT    23785     1380  963066  988247 6960870 7703106
## 52 52   SASK         ONT    11805     1763  955344  926242 6960870 7703106
## 53 53   ALTA         ONT    17655     2281 1463203 1627874 6960870 7703106
## 54 54     BC         ONT    21205     3059 1873674 2184621 6960870 7703106
## 55 55   NFLD         MAN      680     3159  493396  522104  963066  988247
## 56 56    PEI         MAN      265     2293  108535  111641  963066  988247
## 57 57     NS         MAN     1655     2351  756039  788960  963066  988247
## 58 58     NB         MAN     1355     2041  616788  534557  963066  988247
## 59 59    QUE         MAN     4330     1519 5780845 6027764  963066  988247
## 60 60    ONT         MAN    18245     1380 6960870 7703106  963066  988247
## 61 61   SASK         MAN    16365      382  955344  926242  963066  988247
## 62 62   ALTA         MAN     7190      899 1463203 1627874  963066  988247
## 63 63     BC         MAN     6310     1679 1873674 2184621  963066  988247
## 64 64   NFLD        SASK      280     3542  493396  522104  955344  926242
## 65 65    PEI        SASK      125     2675  108535  111641  955344  926242
## 66 66     NS        SASK      620     2733  756039  788960  955344  926242
## 67 67     NB        SASK      495     2423  616788  534557  955344  926242
## 68 68    QUE        SASK     1570     1901 5780845 6027764  955344  926242
## 69 69    ONT        SASK     6845     1763 6960870 7703106  955344  926242
## 70 70    MAN        SASK     9425      382  963066  988247  955344  926242
## 71 71   ALTA        SASK    10580      517 1463203 1627874  955344  926242
## 72 72     BC        SASK     6090     1297 1873674 2184621  955344  926242
## 73 73   NFLD        ALTA      805     4059  493396  522104 1463203 1627874
## 74 74    PEI        ALTA      505     3192  108535  111641 1463203 1627874
## 75 75     NS        ALTA     3300     3250  756039  788960 1463203 1627874
## 76 76     NB        ALTA     2150     2940  616788  534557 1463203 1627874
## 77 77    QUE        ALTA     7750     2418 5780845 6027764 1463203 1627874
## 78 78    ONT        ALTA    23550     2281 6960870 7703106 1463203 1627874
## 79 79    MAN        ALTA    17410      899  963066  988247 1463203 1627874
## 80 80   SASK        ALTA    41910      517  955344  926242 1463203 1627874
## 81 81     BC        ALTA    27765      987 1873674 2184621 1463203 1627874
## 82 82   NFLD          BC     1455     4838  493396  522104 1873674 2184621
## 83 83    PEI          BC      600     3972  108535  111641 1873674 2184621
## 84 84     NS          BC     6075     4029  756039  788960 1873674 2184621
## 85 85     NB          BC     3115     3719  616788  534557 1873674 2184621
## 86 86    QUE          BC    16740     3197 5780845 6027764 1873674 2184621
## 87 87    ONT          BC    47395     3059 6960870 7703106 1873674 2184621
## 88 88    MAN          BC    26910     1679  963066  988247 1873674 2184621
## 89 89   SASK          BC    29920     1297  955344  926242 1873674 2184621
## 90 90   ALTA          BC    58915      987 1463203 1627874 1873674 2184621
# 1. Use the summary function to gain an overview of the data set. Then display the mean and median for at least two attributes.

summary(migrationData)
##        X            source          destination           migrants      
##  Min.   : 1.00   Length:90          Length:90          Min.   :   95.0  
##  1st Qu.:23.25   Class :character   Class :character   1st Qu.:  912.5  
##  Median :45.50   Mode  :character   Mode  :character   Median : 3087.5  
##  Mean   :45.50                                         Mean   : 9227.3  
##  3rd Qu.:67.75                                         3rd Qu.:10291.2  
##  Max.   :90.00                                         Max.   :99430.0  
##     distance        pops66            pops71            popd66       
##  Min.   : 164   Min.   : 108535   Min.   : 111641   Min.   : 108535  
##  1st Qu.: 924   1st Qu.: 616788   1st Qu.: 534557   1st Qu.: 616788  
##  Median :1763   Median : 959205   Median : 957244   Median : 959205  
##  Mean   :1945   Mean   :1997176   Mean   :2141512   Mean   :1997176  
##  3rd Qu.:2940   3rd Qu.:1873674   3rd Qu.:2184621   3rd Qu.:1873674  
##  Max.   :4838   Max.   :6960870   Max.   :7703106   Max.   :6960870  
##      popd71       
##  Min.   : 111641  
##  1st Qu.: 534557  
##  Median : 957244  
##  Mean   :2141512  
##  3rd Qu.:2184621  
##  Max.   :7703106
# mean and median of attribute 5

mean(migrationData[,5])
## [1] 1945.089
median(migrationData[,5])
## [1] 1763
# mean = 1945.089 and median = 1763

# mean and median of attribute 4

mean(migrationData[,4])
## [1] 9227.333
median(migrationData[,4])
## [1] 3087.5
# mean = 9227.333 and median = 3087.5

# 2. Create a new data frame with a subset of the columns and rows. Make sure to rename it.

betterMigrationDataFrame <- data.frame(migrationData[1:10,1:5])
betterMigrationDataFrame
##     X source destination migrants distance
## 1   1    PEI        NFLD      255      924
## 2   2     NS        NFLD     2380      952
## 3   3     NB        NFLD     1140     1119
## 4   4    QUE        NFLD     2145     1641
## 5   5    ONT        NFLD     6295     1996
## 6   6    MAN        NFLD      215     3159
## 7   7   SASK        NFLD      185     3542
## 8   8   ALTA        NFLD      425     4059
## 9   9     BC        NFLD      425     4838
## 10 10   NFLD         PEI      340      924
# 3. Create new column names for the new data frame
names(betterMigrationDataFrame)[1] = "Migrant_Id"
names(betterMigrationDataFrame)[2] = "Migration_Source"
names(betterMigrationDataFrame)[3] = "Migration_Destination"
names(betterMigrationDataFrame)[4] = "Total_Migrants"
names(betterMigrationDataFrame)[5] = "Distance_Travelled"

betterMigrationDataFrame
##    Migrant_Id Migration_Source Migration_Destination Total_Migrants
## 1           1              PEI                  NFLD            255
## 2           2               NS                  NFLD           2380
## 3           3               NB                  NFLD           1140
## 4           4              QUE                  NFLD           2145
## 5           5              ONT                  NFLD           6295
## 6           6              MAN                  NFLD            215
## 7           7             SASK                  NFLD            185
## 8           8             ALTA                  NFLD            425
## 9           9               BC                  NFLD            425
## 10         10             NFLD                   PEI            340
##    Distance_Travelled
## 1                 924
## 2                 952
## 3                1119
## 4                1641
## 5                1996
## 6                3159
## 7                3542
## 8                4059
## 9                4838
## 10                924
# 4. Use the summary function to create an overview of your new data frame. The print the mean and median for the same two attributes. Please compare.

summary(betterMigrationDataFrame)
##    Migrant_Id    Migration_Source   Migration_Destination Total_Migrants  
##  Min.   : 1.00   Length:10          Length:10             Min.   : 185.0  
##  1st Qu.: 3.25   Class :character   Class :character      1st Qu.: 276.2  
##  Median : 5.50   Mode  :character   Mode  :character      Median : 425.0  
##  Mean   : 5.50                                            Mean   :1380.5  
##  3rd Qu.: 7.75                                            3rd Qu.:1893.8  
##  Max.   :10.00                                            Max.   :6295.0  
##  Distance_Travelled
##  Min.   : 924.0    
##  1st Qu.: 993.8    
##  Median :1818.5    
##  Mean   :2315.4    
##  3rd Qu.:3446.2    
##  Max.   :4838.0
# mean and median of attribute 5

mean(betterMigrationDataFrame[,5])
## [1] 2315.4
median(betterMigrationDataFrame[,5])
## [1] 1818.5
# mean = 2315.4 and median = 1818.5

# mean and median of attribute 4

mean(betterMigrationDataFrame[,4])
## [1] 1380.5
median(betterMigrationDataFrame[,4])
## [1] 425
# mean = 1380.5 and median = 425
# After the new summary we see that the new mean and median for 4th attribute has decreased a lot 
# whereas the new mean and median for 5th attribute has increased.

#  ALTA, Alberta; BC, British Columbia; MAN, Manitoba; NB, New Brunswick; NFLD, New Foundland; NS, Nova Scotia; ONT, Ontario; PEI, Prince Edward Island; QUE, Quebec; SASK, Saskatchewan.

# 5.For at least 3 values in a column please rename so that every value in that column is renamed.
# For example, suppose I have 20 values of the letter "e" in one column. Rename those values so
# that all 20 would show as "excellent".

betterMigrationDataFrame$Migration_Source <- as.character(betterMigrationDataFrame$Migration_Source)
betterMigrationDataFrame$Migration_Source[1] <- "Prince Edward Island"
betterMigrationDataFrame$Migration_Source[2] <- "Nova Scotia"
betterMigrationDataFrame$Migration_Source[3] <- "New Brunswick"
betterMigrationDataFrame$Migration_Source[4] <- "Quebec"
betterMigrationDataFrame$Migration_Source[5] <- "Ontario"
betterMigrationDataFrame$Migration_Source[6] <- "Manitoba"
betterMigrationDataFrame$Migration_Source[7] <- "Sascatchewan"
betterMigrationDataFrame$Migration_Source[8] <- "Alberta"
betterMigrationDataFrame$Migration_Source[9] <- "British Columbia"
betterMigrationDataFrame$Migration_Source[10] <- "New Foundland"

betterMigrationDataFrame$Migration_Destination <- as.character(betterMigrationDataFrame$Migration_Destination)
betterMigrationDataFrame$Migration_Destination[1] <- "New Foundland"
betterMigrationDataFrame$Migration_Destination[2] <- "New Foundland"
betterMigrationDataFrame$Migration_Destination[3] <- "New Foundland"
betterMigrationDataFrame$Migration_Destination[4] <- "New Foundland"
betterMigrationDataFrame$Migration_Destination[5] <- "New Foundland"
betterMigrationDataFrame$Migration_Destination[6] <- "New Foundland"
betterMigrationDataFrame$Migration_Destination[7] <- "New Foundland"
betterMigrationDataFrame$Migration_Destination[8] <- "New Foundland"
betterMigrationDataFrame$Migration_Destination[9] <- "New Foundland"
betterMigrationDataFrame$Migration_Destination[10] <- "Prince Edward Island"


# 6. Display enough rows to see examples of all of steps 1-5 above.

betterMigrationDataFrame
##    Migrant_Id     Migration_Source Migration_Destination Total_Migrants
## 1           1 Prince Edward Island         New Foundland            255
## 2           2          Nova Scotia         New Foundland           2380
## 3           3        New Brunswick         New Foundland           1140
## 4           4               Quebec         New Foundland           2145
## 5           5              Ontario         New Foundland           6295
## 6           6             Manitoba         New Foundland            215
## 7           7         Sascatchewan         New Foundland            185
## 8           8              Alberta         New Foundland            425
## 9           9     British Columbia         New Foundland            425
## 10         10        New Foundland  Prince Edward Island            340
##    Distance_Travelled
## 1                 924
## 2                 952
## 3                1119
## 4                1641
## 5                1996
## 6                3159
## 7                3542
## 8                4059
## 9                4838
## 10                924
# 7. BONUS - place the original .csv in a github file and have R read from the link. This will be a very
# useful skill as you progress in your data science education and career.

data <- read.table(file="https://raw.githubusercontent.com/maharjansudhan/Migration/master/Migration.csv", header=TRUE, sep=",",stringsAsFactors = FALSE)

data
##     X source destination migrants distance  pops66  pops71  popd66  popd71
## 1   1    PEI        NFLD      255      924  108535  111641  493396  522104
## 2   2     NS        NFLD     2380      952  756039  788960  493396  522104
## 3   3     NB        NFLD     1140     1119  616788  534557  493396  522104
## 4   4    QUE        NFLD     2145     1641 5780845 6027764  493396  522104
## 5   5    ONT        NFLD     6295     1996 6960870 7703106  493396  522104
## 6   6    MAN        NFLD      215     3159  963066  988247  493396  522104
## 7   7   SASK        NFLD      185     3542  955344  926242  493396  522104
## 8   8   ALTA        NFLD      425     4059 1463203 1627874  493396  522104
## 9   9     BC        NFLD      425     4838 1873674 2184621  493396  522104
## 10 10   NFLD         PEI      340      924  493396  522104  108535  111641
## 11 11     NS         PEI     1975      164  756039  788960  108535  111641
## 12 12     NB         PEI     1310      252  616788  534557  108535  111641
## 13 13    QUE         PEI      755      774 5780845 6027764  108535  111641
## 14 14    ONT         PEI     3060     1129 6960870 7703106  108535  111641
## 15 15    MAN         PEI      400     2293  963066  988247  108535  111641
## 16 16   SASK         PEI       95     2675  955344  926242  108535  111641
## 17 17   ALTA         PEI      185     3192 1463203 1627874  108535  111641
## 18 18     BC         PEI      330     3972 1873674 2184621  108535  111641
## 19 19   NFLD          NS     3340      952  493396  522104  756039  788960
## 20 20    PEI          NS     2185      164  108535  111641  756039  788960
## 21 21     NB          NS     8310      310  616788  534557  756039  788960
## 22 22    QUE          NS     6090      832 5780845 6027764  756039  788960
## 23 23    ONT          NS    18805     1187 6960870 7703106  756039  788960
## 24 24    MAN          NS     1825     2351  963066  988247  756039  788960
## 25 25   SASK          NS      840     2733  955344  926242  756039  788960
## 26 26   ALTA          NS     2000     3250 1463203 1627874  756039  788960
## 27 27     BC          NS     2490     4029 1873674 2184621  756039  788960
## 28 28   NFLD          NB     1740     1119  493396  522104  616788  534557
## 29 29    PEI          NB     1335      252  108535  111641  616788  534557
## 30 30     NS          NB     7635      310  756039  788960  616788  534557
## 31 31    QUE          NB     9315      522 5780845 6027764  616788  534557
## 32 32    ONT          NB    12455      877 6960870 7703106  616788  534557
## 33 33    MAN          NB     1405     2041  963066  988247  616788  534557
## 34 34   SASK          NB      480     2423  955344  926242  616788  534557
## 35 35   ALTA          NB     1130     2940 1463203 1627874  616788  534557
## 36 36     BC          NB     1195     3719 1873674 2184621  616788  534557
## 37 37   NFLD         QUE     2235     1641  493396  522104 5780845 6027764
## 38 38    PEI         QUE      635      774  108535  111641 5780845 6027764
## 39 39     NS         QUE     4350      832  756039  788960 5780845 6027764
## 40 40     NB         QUE     7905      522  616788  534557 5780845 6027764
## 41 41    ONT         QUE    48370      355 6960870 7703106 5780845 6027764
## 42 42    MAN         QUE     4630     1519  963066  988247 5780845 6027764
## 43 43   SASK         QUE     1515     1901  955344  926242 5780845 6027764
## 44 44   ALTA         QUE     3305     2418 1463203 1627874 5780845 6027764
## 45 45     BC         QUE     4740     3197 1873674 2184621 5780845 6027764
## 46 46   NFLD         ONT    17860     1996  493396  522104 6960870 7703106
## 47 47    PEI         ONT     3570     1129  108535  111641 6960870 7703106
## 48 48     NS         ONT    25730     1187  756039  788960 6960870 7703106
## 49 49     NB         ONT    18550      877  616788  534557 6960870 7703106
## 50 50    QUE         ONT    99430      355 5780845 6027764 6960870 7703106
## 51 51    MAN         ONT    23785     1380  963066  988247 6960870 7703106
## 52 52   SASK         ONT    11805     1763  955344  926242 6960870 7703106
## 53 53   ALTA         ONT    17655     2281 1463203 1627874 6960870 7703106
## 54 54     BC         ONT    21205     3059 1873674 2184621 6960870 7703106
## 55 55   NFLD         MAN      680     3159  493396  522104  963066  988247
## 56 56    PEI         MAN      265     2293  108535  111641  963066  988247
## 57 57     NS         MAN     1655     2351  756039  788960  963066  988247
## 58 58     NB         MAN     1355     2041  616788  534557  963066  988247
## 59 59    QUE         MAN     4330     1519 5780845 6027764  963066  988247
## 60 60    ONT         MAN    18245     1380 6960870 7703106  963066  988247
## 61 61   SASK         MAN    16365      382  955344  926242  963066  988247
## 62 62   ALTA         MAN     7190      899 1463203 1627874  963066  988247
## 63 63     BC         MAN     6310     1679 1873674 2184621  963066  988247
## 64 64   NFLD        SASK      280     3542  493396  522104  955344  926242
## 65 65    PEI        SASK      125     2675  108535  111641  955344  926242
## 66 66     NS        SASK      620     2733  756039  788960  955344  926242
## 67 67     NB        SASK      495     2423  616788  534557  955344  926242
## 68 68    QUE        SASK     1570     1901 5780845 6027764  955344  926242
## 69 69    ONT        SASK     6845     1763 6960870 7703106  955344  926242
## 70 70    MAN        SASK     9425      382  963066  988247  955344  926242
## 71 71   ALTA        SASK    10580      517 1463203 1627874  955344  926242
## 72 72     BC        SASK     6090     1297 1873674 2184621  955344  926242
## 73 73   NFLD        ALTA      805     4059  493396  522104 1463203 1627874
## 74 74    PEI        ALTA      505     3192  108535  111641 1463203 1627874
## 75 75     NS        ALTA     3300     3250  756039  788960 1463203 1627874
## 76 76     NB        ALTA     2150     2940  616788  534557 1463203 1627874
## 77 77    QUE        ALTA     7750     2418 5780845 6027764 1463203 1627874
## 78 78    ONT        ALTA    23550     2281 6960870 7703106 1463203 1627874
## 79 79    MAN        ALTA    17410      899  963066  988247 1463203 1627874
## 80 80   SASK        ALTA    41910      517  955344  926242 1463203 1627874
## 81 81     BC        ALTA    27765      987 1873674 2184621 1463203 1627874
## 82 82   NFLD          BC     1455     4838  493396  522104 1873674 2184621
## 83 83    PEI          BC      600     3972  108535  111641 1873674 2184621
## 84 84     NS          BC     6075     4029  756039  788960 1873674 2184621
## 85 85     NB          BC     3115     3719  616788  534557 1873674 2184621
## 86 86    QUE          BC    16740     3197 5780845 6027764 1873674 2184621
## 87 87    ONT          BC    47395     3059 6960870 7703106 1873674 2184621
## 88 88    MAN          BC    26910     1679  963066  988247 1873674 2184621
## 89 89   SASK          BC    29920     1297  955344  926242 1873674 2184621
## 90 90   ALTA          BC    58915      987 1463203 1627874 1873674 2184621