library(RCurl)
Data - https://vincentarelbundock.github.io/Rdatasets/csv/carData/Robey.csv
Docs - https://vincentarelbundock.github.io/Rdatasets/doc/carData/Robey.html
Column data:
df <- read.csv('https://vincentarelbundock.github.io/Rdatasets/csv/carData/Robey.csv')
summary(df)
## X region tfr contraceptors
## Bangladesh: 1 Africa :18 Min. :1.700 Min. : 4.00
## Belize : 1 Asia :10 1st Qu.:3.600 1st Qu.:12.25
## Bolivia : 1 Latin.Amer:16 Median :4.600 Median :41.00
## Botswana : 1 Near.East : 6 Mean :4.688 Mean :37.44
## Brazil : 1 3rd Qu.:5.975 3rd Qu.:55.00
## Burundi : 1 Max. :7.300 Max. :77.00
## (Other) :44
World population statistics
print(mean(df$tfr))
## [1] 4.688
print(median(df$contraceptors))
## [1] 41
sdf <- subset(df, region == 'Latin.Amer', select=c(region, tfr, contraceptors))
sdf
## region tfr contraceptors
## 29 Latin.Amer 4.5 47
## 30 Latin.Amer 4.9 32
## 31 Latin.Amer 3.6 66
## 32 Latin.Amer 2.8 66
## 33 Latin.Amer 3.6 70
## 34 Latin.Amer 3.3 56
## 35 Latin.Amer 3.8 53
## 36 Latin.Amer 4.6 47
## 37 Latin.Amer 5.6 23
## 38 Latin.Amer 6.0 10
## 39 Latin.Amer 2.9 55
## 40 Latin.Amer 4.0 55
## 41 Latin.Amer 4.0 58
## 42 Latin.Amer 4.6 48
## 43 Latin.Amer 3.5 59
## 44 Latin.Amer 3.1 54
This subset trims the dataframe from 50 rows 4 columns to 16 rows and 3 columns #### 3. Create new column names for the new data frame.
colnames(sdf) <- c('region', 'tfr', 'contra')
I shortened contraceptors to contra
summary(sdf)
## region tfr contra
## Africa : 0 Min. :2.80 Min. :10.00
## Asia : 0 1st Qu.:3.45 1st Qu.:47.00
## Latin.Amer:16 Median :3.90 Median :54.50
## Near.East : 0 Mean :4.05 Mean :49.94
## 3rd Qu.:4.60 3rd Qu.:58.25
## Max. :6.00 Max. :70.00
For just Latin America the mean is smaller but the median is higher
print(mean(sdf$tfr))
## [1] 4.05
print(median(sdf$contra))
## [1] 54.5
df$region <- as.character(df$region)
df$region[df$region == 'Latin.Amer'] <- "LatinAmerica"
df$region[df$region == 'Near.East'] <- "East"
df$region[df$region == 'Africa'] <- "africa"
Subset df filtered for LatinAmerica and column values renamed
print(sdf)
## region tfr contra
## 29 Latin.Amer 4.5 47
## 30 Latin.Amer 4.9 32
## 31 Latin.Amer 3.6 66
## 32 Latin.Amer 2.8 66
## 33 Latin.Amer 3.6 70
## 34 Latin.Amer 3.3 56
## 35 Latin.Amer 3.8 53
## 36 Latin.Amer 4.6 47
## 37 Latin.Amer 5.6 23
## 38 Latin.Amer 6.0 10
## 39 Latin.Amer 2.9 55
## 40 Latin.Amer 4.0 55
## 41 Latin.Amer 4.0 58
## 42 Latin.Amer 4.6 48
## 43 Latin.Amer 3.5 59
## 44 Latin.Amer 3.1 54
Original df with regions renamed based on question 5
print(df)
## X region tfr contraceptors
## 1 Botswana africa 4.8 35
## 2 Burundi africa 6.5 9
## 3 Cameroon africa 5.9 16
## 4 Ghana africa 6.1 13
## 5 Kenya africa 6.5 27
## 6 Liberia africa 6.4 6
## 7 Mali africa 6.8 5
## 8 Mauitius africa 2.2 75
## 9 Niger africa 7.3 4
## 10 Nigeria africa 5.7 6
## 11 Senegal africa 6.4 12
## 12 Sudan africa 4.8 9
## 13 Swaziland africa 5.0 21
## 14 Tanzania africa 6.1 10
## 15 Togo africa 6.1 12
## 16 Uganda africa 7.2 5
## 17 Zambia africa 6.3 15
## 18 Zimbabwe africa 5.3 45
## 19 Bangladesh Asia 5.5 40
## 20 China Asia 2.5 72
## 21 India Asia 4.3 45
## 22 Indonesia Asia 3.0 50
## 23 Korea.Rep.of Asia 1.7 77
## 24 Pakistan Asia 5.2 12
## 25 Philippines Asia 4.3 34
## 26 Sri.Lanka Asia 2.7 62
## 27 Thailand Asia 2.3 68
## 28 Vietnam Asia 3.9 53
## 29 Belize LatinAmerica 4.5 47
## 30 Bolivia LatinAmerica 4.9 32
## 31 Brazil LatinAmerica 3.6 66
## 32 Columbia LatinAmerica 2.8 66
## 33 Costa.Rica LatinAmerica 3.6 70
## 34 Dom.Republic LatinAmerica 3.3 56
## 35 Ecuador LatinAmerica 3.8 53
## 36 El.Salvador LatinAmerica 4.6 47
## 37 Guatemala LatinAmerica 5.6 23
## 38 Haiti LatinAmerica 6.0 10
## 39 Jamaica LatinAmerica 2.9 55
## 40 Mexico LatinAmerica 4.0 55
## 41 Panama LatinAmerica 4.0 58
## 42 Paraguay LatinAmerica 4.6 48
## 43 Peru LatinAmerica 3.5 59
## 44 Trinidad.Tobago LatinAmerica 3.1 54
## 45 Egypt East 4.6 40
## 46 Jordan East 5.5 35
## 47 Morocco East 4.0 42
## 48 Tunisia East 4.3 51
## 49 Turkey East 3.4 60
## 50 Yemen East 7.0 7
On github locate the csv file and click raw on the top right then use that link
link <- 'https://raw.githubusercontent.com/ksooklall/CUNY-SPS-Masters-DS/main/bridge_r/w2/fertility_and_contraception.csv'
text_data <- getURL(link)
github_df <- read.csv(text=text_data)
head(github_df)
## X region tfr contraceptors
## 1 Botswana Africa 4.8 35
## 2 Burundi Africa 6.5 9
## 3 Cameroon Africa 5.9 16
## 4 Ghana Africa 6.1 13
## 5 Kenya Africa 6.5 27
## 6 Liberia Africa 6.4 6