#Loading

library(tidyverse)
library(moments)

loading library tidyverse and moments into my working directory

suicide_rate<- read.csv(file.choose())

Importing data set

str(suicide_rate)

'data.frame':   31756 obs. of  14 variables:
 $ country           : chr  "Albania" "Albania" "Albania" "Albania" ...
 $ year              : int  1987 1987 1987 1987 1987 1987 1987 1987 1987 1987 ...
 $ sex               : chr  "male" "male" "female" "male" ...
 $ age               : chr  "15-24 years" "35-54 years" "15-24 years" "75+ years" ...
 $ suicides_no       : int  21 16 14 1 9 1 6 4 1 0 ...
 $ population        : num  312900 308000 289700 21800 274300 ...
 $ suicides.100k.pop : num  6.71 5.19 4.83 4.59 3.28 2.81 2.15 1.56 0.73 0 ...
 $ country.year      : chr  "Albania1987" "Albania1987" "Albania1987" "Albania1987" ...
 $ HDI.for.year      : num  NA NA NA NA NA NA NA NA NA NA ...
 $ gdp_for_year....  : chr  "2,15,66,24,900" "2,15,66,24,900" "2,15,66,24,900" "2,15,66,24,900" ...
 $ gdp_per_capita....: num  796 796 796 796 796 796 796 796 796 796 ...
 $ generation        : chr  "Generation X" "Silent" "Generation X" "G.I. Generation" ...
 $ log_population    : num  12.65 12.64 12.58 9.99 12.52 ...
 $ sex_numeric       : num  1 1 0 1 1 0 0 0 1 0 ...

checking the structure of my data frame

##Plot 1 (Numeric Variable-“population”) Subset the data frame to include only the “Population” column and make it numeric

suicide_rate$population<- as.numeric(suicide_rate$population)

##Remove any missing or NA values from the “Population” column

suicide_rate$population<- na.omit(suicide_rate$population)
sum(is.na(suicide_rate$population))

[1] 0

#Plotting a histogram of the Population variable

hist(suicide_rate$population, main = "Population Distribution", xlab = "Population", col = "blue")

##identifying outliers using the IQR method and removing them from the data

library(tidyverse)
library(moments)

The output of identifying outliers using the IQR method is ‘TRUE’ for most value, this suggest that there are values that fall outside the lower and upper outlier boundaries the distribution of the data is relatively symmetric. The outliers cannot be ommited since they are the correct values and may affect the analysis negatively if ommited.

boxplot(suicide_rate$population)

##shape and skewness of the distribution

plot(density(suicide_rate$population), main = "Population Distribution")


skewness(suicide_rate$population)

[1] 21.53854

tail of the distribution is longer on the right side than on the left side

we use logarithm transformation to transform the data to reduce the skewness and make the distribution more symmetrical

suicide_rate$log_population<- log(suicide_rate$population)

plot(density(suicide_rate$log_population), main = "Log-transformed Population Distribution")

skewness(suicide_rate$log_population)

[1] -0.150254

The skewness has reduced after the log transformation

summary(suicide_rate$population)

     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
2.780e+02 1.288e+05 5.468e+05 7.217e+06 2.909e+06 1.411e+09

median(suicide_rate$population)

[1] 546832.5

mean(suicide_rate$population)

[1] 7217454

median(suicide_rate$population)

[1] 546832.5

skewness(suicide_rate$population)

[1] 21.53854

since skewness is high, it suggests the presence of outliers or extreme values that can distort the mean. Therefore, the median, is a robust measure of central tendency that is less influenced by extreme values,thus being an appropriate measure of central tendency

Q1<- quantile(suicide_rate$population, 0.25)
Q3<- quantile(suicide_rate$population, 0.75)
IQR<- Q1 - Q3

IQR

     25% 
-2779942

The median is a robust measure of central tendency, a robust measure of spread such as the interquartile range (IQR) is appropriate, since Quartiles are less impacted by outliers

plot 2 (categorical variable- “country”)

create a data frame with counts of each country

country_counts<- data.frame(table(suicide_rate$country))

plot the bar chart

ggplot(country_counts, aes(x = Var1, y = Freq)) +
  geom_bar(stat = "identity") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
  xlab("Country") +
  ylab("Count") +
  labs(title = "Count of Suicides by Country", tag = "Plot 2.1")

NA

*The graph is abit clouded since there are more than 100 countries in observation**

create a data frame with proportions of each country

country_group <- suicide_rate %>%
  group_by(suicide_rate$Country) %>%
  summarise(Count = n()) %>%
  mutate(percentage = round(Count / sum(Count) * 100, 2))

Q1<- quantile(suicide_rate$population, 0.25)
Q3<- quantile(suicide_rate$population, 0.75)
IQR<- Q1 - Q3

upper_outlier_bound <- Q3 + 1.5*IQR
lower_outlier_bound <- Q1 - 1.5*IQR

outliers <- suicide_rate$population > upper_outlier_bound | suicide_rate$population < lower_outlier_bound

view(head(outliers))

pie chart plot

ggplot(country_group, aes(x="", y=Count, fill=as.factor(Count)))+
  geom_bar(stat = 'identity' , width = 0.8) +
  coord_polar("y")+
  scale_fill_manual(values=cbPalette)+
  geom_text(aes(label = paste(percentage,"%")),
            position = position_stack(vjust = 0.5),size=3.5) +
  guides(fill = guide_legend(title = "Country")) +
  theme_void()+
  labs(title = "Distribution of proportions for variable Country (in Percentage)",
       tag = "Plot 2.2")

Unusual observations for the “country” variable would be some countries have an unusually high or low count or proportion of suicides compared to the rest of the dataset

unique(country_counts$Var1)

  [1] Albania                          Antigua and Barbuda             
  [3] Argentina                        Armenia                         
  [5] Aruba                            Australia                       
  [7] Austria                          Azerbaijan                      
  [9] Bahamas                          Bahrain                         
 [11] Barbados                         Belarus                         
 [13] Belgium                          Belize                          
 [15] Bosnia and Herzegovina           Brazil                          
 [17] Brunei Darussalam                Bulgaria                        
 [19] Cabo Verde                       Canada                          
 [21] Chile                            China, Hong Kong SAR            
 [23] Colombia                         Costa Rica                      
 [25] Croatia                          Cuba                            
 [27] Cyprus                           Czech Republic                  
 [29] Czechia                          Denmark                         
 [31] Dominica                         Dominican Republic              
 [33] Ecuador                          Egypt                           
 [35] El Salvador                      Estonia                         
 [37] Fiji                             Finland                         
 [39] France                           Georgia                         
 [41] Germany                          Greece                          
 [43] Grenada                          Guatemala                       
 [45] Guyana                           Hungary                         
 [47] Iceland                          Ireland                         
 [49] Israel                           Italy                           
 [51] Jamaica                          Japan                           
 [53] Jordan                           Kazakhstan                      
 [55] Kiribati                         Kuwait                          
 [57] Kyrgyzstan                       Latvia                          
 [59] Lebanon                          Lithuania                       
 [61] Luxembourg                       Macau                           
 [63] Maldives                         Malta                           
 [65] Mauritius                        Mexico                          
 [67] Mongolia                         Montenegro                      
 [69] Netherlands                      New Zealand                     
 [71] Nicaragua                        North Macedonia                 
 [73] Norway                           Oman                            
 [75] Panama                           Paraguay                        
 [77] Peru                             Philippines                     
 [79] Poland                           Portugal                        
 [81] Puerto Rico                      Qatar                           
 [83] Republic of Korea                Republic of Moldova             
 [85] Romania                          Russian Federation              
 [87] Saint Kitts and Nevis            Saint Lucia                     
 [89] Saint Vincent and Grenadines     Saint Vincent and the Grenadines
 [91] San Marino                       Serbia                          
 [93] Seychelles                       Singapore                       
 [95] Slovakia                         Slovenia                        
 [97] South Africa                     Spain                           
 [99] Sri Lanka                        Suriname                        
[101] Sweden                           Switzerland                     
[103] Tajikistan                       Thailand                        
[105] Trinidad and Tobago              Turkey                          
[107] Turkmenistan                     Ukraine                         
[109] United Arab Emirates             United Kingdom                  
[111] United States                    United States of America        
[113] Uruguay                          Uzbekistan                      
114 Levels: Albania Antigua and Barbuda Argentina Armenia Aruba Australia ... Uzbekistan

Bivariate

ggplot(suicide_rate, aes(x = gdp_for_year...., y = gdp_per_capita....)) +
  geom_point() +
  labs(title = "Relationship between GDP per year and GDP per capita",
       x = "GDP per year",
       y = "GDP per capita")

ggplot(suicide_rate, aes(x = sex, y = suicides.100k.pop, color = sex)) +
  geom_jitter(alpha = 0.5, size = 3, width = 0.2) +
  scale_color_manual(values=c("#E69F00", "#56B4E9")) +
  labs(title = "Relationship between Sex and Suicide Rate",
       x = "Sex",
       y = "Suicide Rate") +
  theme_minimal()

From the scatter plot, we can see that there is a slightly positive linear relationship between the suicide rate and sex. As the suicide rate increases, the number of males committing suicide is slightly higher than females

The relationship between sex and suicide rate indicates that there is a difference in the suicide rate between males and females. Specifically, the data suggests that males have a higher suicide rate than females. This is an important finding that may have implications for suicide prevention and intervention efforts, as it highlights the need to focus on understanding and addressing the specific risk factors that may be contributing to higher suicide rates in males. It may also point to the need for targeted interventions that take into account gender differences in the experience and expression of mental health issues.

In the scatterplot, we can see that there is a considerable amount of variability in the data, with many points spread out across the range of suicide rates for both males and females. This indicates that there is not a perfect relationship between sex and suicide rate, as there are many factors that could influence suicide rates in a given country. However, we can still see a general trend of higher suicide rates among males compared to females, which is supported by the positive correlation coefficient we calculated. This suggests that sex is a factor that can help predict suicide rates, but it is not the only factor, as there is still a significant amount of variability in the data. Overall, the scatterplot and correlation coefficient provide useful information about the relationship between sex and suicide rates, but further analysis would be needed to fully understand the factors that contribute to suicide rates in different countries.

LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQoNCg0KDQojTG9hZGluZyANCmBgYHtyIHdhcm5pbmc9RkFMU0UsIG1lc3NhZ2U9RkFMU0UsIGxpYnJhcmllc30NCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShtb21lbnRzKQ0KDQpgYGANCioqbG9hZGluZyBsaWJyYXJ5IHRpZHl2ZXJzZSBhbmQgbW9tZW50cyBpbnRvIG15IHdvcmtpbmcgZGlyZWN0b3J5KioNCg0KDQpgYGB7ciBkYXRhX3NldH0NCnN1aWNpZGVfcmF0ZTwtIHJlYWQuY3N2KGZpbGUuY2hvb3NlKCkpDQoNCmBgYA0KKipJbXBvcnRpbmcgZGF0YSBzZXQqKg0KDQoNCmBgYHtyIDF9DQpzdHIoc3VpY2lkZV9yYXRlKQ0KDQpgYGANCioqY2hlY2tpbmcgdGhlIHN0cnVjdHVyZSBvZiBteSBkYXRhIGZyYW1lKioNCg0KDQojI1Bsb3QgMSAoTnVtZXJpYyBWYXJpYWJsZS0icG9wdWxhdGlvbiIpDQoqKlN1YnNldCB0aGUgZGF0YSBmcmFtZSB0byBpbmNsdWRlIG9ubHkgdGhlICJQb3B1bGF0aW9uIiBjb2x1bW4gYW5kIG1ha2UgaXQgbnVtZXJpYyoqDQpgYGB7ciBwb3B1bGF0aW9ufQ0Kc3VpY2lkZV9yYXRlJHBvcHVsYXRpb248LSBhcy5udW1lcmljKHN1aWNpZGVfcmF0ZSRwb3B1bGF0aW9uKSANCg0KYGBgDQojI1JlbW92ZSBhbnkgbWlzc2luZyBvciBOQSB2YWx1ZXMgZnJvbSB0aGUgIlBvcHVsYXRpb24iIGNvbHVtbg0KYGBge3IgTkF9DQpzdWljaWRlX3JhdGUkcG9wdWxhdGlvbjwtIG5hLm9taXQoc3VpY2lkZV9yYXRlJHBvcHVsYXRpb24pDQpzdW0oaXMubmEoc3VpY2lkZV9yYXRlJHBvcHVsYXRpb24pKQ0KYGBgDQoNCiNQbG90dGluZyBhIGhpc3RvZ3JhbSBvZiB0aGUgUG9wdWxhdGlvbiB2YXJpYWJsZQ0KYGBge3IgaGlzdH0NCmhpc3Qoc3VpY2lkZV9yYXRlJHBvcHVsYXRpb24sIG1haW4gPSAiUG9wdWxhdGlvbiBEaXN0cmlidXRpb24iLCB4bGFiID0gIlBvcHVsYXRpb24iLCBjb2wgPSAiYmx1ZSIpIA0KDQpgYGANCiMjaWRlbnRpZnlpbmcgb3V0bGllcnMgdXNpbmcgdGhlIElRUiBtZXRob2QgYW5kIHJlbW92aW5nIHRoZW0gZnJvbSB0aGUgZGF0YQ0KYGBge3J9DQpRMTwtIHF1YW50aWxlKHN1aWNpZGVfcmF0ZSRwb3B1bGF0aW9uLCAwLjI1KQ0KUTM8LSBxdWFudGlsZShzdWljaWRlX3JhdGUkcG9wdWxhdGlvbiwgMC43NSkNCklRUjwtIFExIC0gUTMNCg0KdXBwZXJfb3V0bGllcl9ib3VuZCA8LSBRMyArIDEuNSpJUVINCmxvd2VyX291dGxpZXJfYm91bmQgPC0gUTEgLSAxLjUqSVFSDQoNCm91dGxpZXJzIDwtIHN1aWNpZGVfcmF0ZSRwb3B1bGF0aW9uID4gdXBwZXJfb3V0bGllcl9ib3VuZCB8IHN1aWNpZGVfcmF0ZSRwb3B1bGF0aW9uIDwgbG93ZXJfb3V0bGllcl9ib3VuZA0KDQp2aWV3KGhlYWQob3V0bGllcnMpKQ0KYGBgDQoqKlRoZSBvdXRwdXQgb2YgaWRlbnRpZnlpbmcgb3V0bGllcnMgdXNpbmcgdGhlIElRUiBtZXRob2QgaXMgJ1RSVUUnIGZvciBtb3N0ICB2YWx1ZSwgdGhpcyBzdWdnZXN0IHRoYXQgdGhlcmUgYXJlIHZhbHVlcyB0aGF0IGZhbGwgb3V0c2lkZSB0aGUgbG93ZXIgYW5kIHVwcGVyIG91dGxpZXIgYm91bmRhcmllcyB0aGUgZGlzdHJpYnV0aW9uIG9mIHRoZSBkYXRhIGlzIHJlbGF0aXZlbHkgc3ltbWV0cmljLiBUaGUgb3V0bGllcnMgY2Fubm90IGJlIG9tbWl0ZWQgc2luY2UgdGhleSBhcmUgdGhlIGNvcnJlY3QgdmFsdWVzIGFuZCBtYXkgYWZmZWN0IHRoZSBhbmFseXNpcyBuZWdhdGl2ZWx5IGlmIG9tbWl0ZWQuKioNCg0KYGBge3IgYm94cGxvdH0NCmJveHBsb3Qoc3VpY2lkZV9yYXRlJHBvcHVsYXRpb24pDQpgYGANCg0KIyNzaGFwZSBhbmQgc2tld25lc3Mgb2YgdGhlIGRpc3RyaWJ1dGlvbg0KYGBge3Igc2tld25lc3N9DQpwbG90KGRlbnNpdHkoc3VpY2lkZV9yYXRlJHBvcHVsYXRpb24pLCBtYWluID0gIlBvcHVsYXRpb24gRGlzdHJpYnV0aW9uIikNCg0Kc2tld25lc3Moc3VpY2lkZV9yYXRlJHBvcHVsYXRpb24pDQoNCmBgYA0KKip0YWlsIG9mIHRoZSBkaXN0cmlidXRpb24gaXMgbG9uZ2VyIG9uIHRoZSByaWdodCBzaWRlIHRoYW4gb24gdGhlIGxlZnQgc2lkZSoqDQoNCioqd2UgdXNlIGxvZ2FyaXRobSB0cmFuc2Zvcm1hdGlvbiB0byB0cmFuc2Zvcm0gdGhlIGRhdGEgdG8gcmVkdWNlIHRoZSBza2V3bmVzcyBhbmQgbWFrZSB0aGUgZGlzdHJpYnV0aW9uIG1vcmUgc3ltbWV0cmljYWwqKg0KDQoNCmBgYHtyIDJ9DQpzdWljaWRlX3JhdGUkbG9nX3BvcHVsYXRpb248LSBsb2coc3VpY2lkZV9yYXRlJHBvcHVsYXRpb24pDQoNCnBsb3QoZGVuc2l0eShzdWljaWRlX3JhdGUkbG9nX3BvcHVsYXRpb24pLCBtYWluID0gIkxvZy10cmFuc2Zvcm1lZCBQb3B1bGF0aW9uIERpc3RyaWJ1dGlvbiIpDQoNCmBgYA0KYGBge3Igc2tld25lc3MyfQ0Kc2tld25lc3Moc3VpY2lkZV9yYXRlJGxvZ19wb3B1bGF0aW9uKQ0KYGBgDQoqKlRoZSBza2V3bmVzcyBoYXMgcmVkdWNlZCBhZnRlciB0aGUgbG9nIHRyYW5zZm9ybWF0aW9uKioNCg0KDQpgYGB7ciBjZW50cmFsX3RlbmRhbmN5fQ0Kc3VtbWFyeShzdWljaWRlX3JhdGUkcG9wdWxhdGlvbikNCm1lZGlhbihzdWljaWRlX3JhdGUkcG9wdWxhdGlvbikNCm1lYW4oc3VpY2lkZV9yYXRlJHBvcHVsYXRpb24pDQpgYGANCg0KDQpgYGB7ciBtZWRpYW59DQptZWRpYW4oc3VpY2lkZV9yYXRlJHBvcHVsYXRpb24pDQoNCnNrZXduZXNzKHN1aWNpZGVfcmF0ZSRwb3B1bGF0aW9uKQ0KYGBgDQoqKnNpbmNlIHNrZXduZXNzIGlzIGhpZ2gsIGl0IHN1Z2dlc3RzIHRoZSBwcmVzZW5jZSBvZiBvdXRsaWVycyBvciBleHRyZW1lIHZhbHVlcyB0aGF0IGNhbiBkaXN0b3J0IHRoZSBtZWFuLiBUaGVyZWZvcmUsIHRoZSBtZWRpYW4sIGlzIGEgcm9idXN0IG1lYXN1cmUgb2YgY2VudHJhbCB0ZW5kZW5jeSB0aGF0IGlzIGxlc3MgaW5mbHVlbmNlZCBieSBleHRyZW1lIHZhbHVlcyx0aHVzIGJlaW5nIGFuIGFwcHJvcHJpYXRlIG1lYXN1cmUgb2YgY2VudHJhbCB0ZW5kZW5jeSoqDQogDQoNCmBgYHtyIElRUn0NClExPC0gcXVhbnRpbGUoc3VpY2lkZV9yYXRlJHBvcHVsYXRpb24sIDAuMjUpDQpRMzwtIHF1YW50aWxlKHN1aWNpZGVfcmF0ZSRwb3B1bGF0aW9uLCAwLjc1KQ0KSVFSPC0gUTEgLSBRMw0KDQpJUVINCg0KYGBgDQoqKlRoZSBtZWRpYW4gaXMgYSByb2J1c3QgbWVhc3VyZSBvZiBjZW50cmFsIHRlbmRlbmN5LCBhIHJvYnVzdCBtZWFzdXJlIG9mIHNwcmVhZCBzdWNoIGFzIHRoZSAgaW50ZXJxdWFydGlsZSByYW5nZSAoSVFSKSBpcyBhcHByb3ByaWF0ZSwgc2luY2UgUXVhcnRpbGVzIGFyZSBsZXNzIGltcGFjdGVkIGJ5IG91dGxpZXJzKioNCg0KDQoNCg0KDQoNCg0KIyBwbG90IDIgKGNhdGVnb3JpY2FsIHZhcmlhYmxlLSAiY291bnRyeSIpDQojIyBjcmVhdGUgYSBkYXRhIGZyYW1lIHdpdGggY291bnRzIG9mIGVhY2ggY291bnRyeQ0KYGBge3IgZGF0YWZyYW1lfQ0KY291bnRyeV9jb3VudHM8LSBkYXRhLmZyYW1lKHRhYmxlKHN1aWNpZGVfcmF0ZSRjb3VudHJ5KSkNCg0KYGBgDQojIyBwbG90IHRoZSBiYXIgY2hhcnQNCmBgYHtyIGJhcl9jaGFydH0NCmdncGxvdChjb3VudHJ5X2NvdW50cywgYWVzKHggPSBWYXIxLCB5ID0gRnJlcSkpICsNCiAgZ2VvbV9iYXIoc3RhdCA9ICJpZGVudGl0eSIpICsNCiAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA5MCwgdmp1c3QgPSAwLjUsIGhqdXN0PTEpKSArDQogIHhsYWIoIkNvdW50cnkiKSArDQogIHlsYWIoIkNvdW50IikgKw0KICBsYWJzKHRpdGxlID0gIkNvdW50IG9mIFN1aWNpZGVzIGJ5IENvdW50cnkiLCB0YWcgPSAiUGxvdCAyLjEiKQ0KIA0KYGBgDQoqVGhlIGdyYXBoIGlzIGFiaXQgY2xvdWRlZCBzaW5jZSB0aGVyZSBhcmUgbW9yZSB0aGFuIDEwMCBjb3VudHJpZXMgaW4gb2JzZXJ2YXRpb24qKg0KDQojIyBjcmVhdGUgYSBkYXRhIGZyYW1lIHdpdGggcHJvcG9ydGlvbnMgb2YgZWFjaCBjb3VudHJ5DQpgYGB7ciBwcm9wb3J0aW9ufQ0KY291bnRyeV9ncm91cCA8LSBzdWljaWRlX3JhdGUgJT4lDQogIGdyb3VwX2J5KHN1aWNpZGVfcmF0ZSRDb3VudHJ5KSAlPiUNCiAgc3VtbWFyaXNlKENvdW50ID0gbigpKSAlPiUNCiAgbXV0YXRlKHBlcmNlbnRhZ2UgPSByb3VuZChDb3VudCAvIHN1bShDb3VudCkgKiAxMDAsIDIpKQ0KYGBgDQoNCg0KYGBge3J9DQpjYlBhbGV0dGUgPC0gYygiI2ViMzQ0MyIsICIjNzhjOTVmIiwgIiM1NkI0RTkiLCAiIzAwOUU3MyIsICIjRjBFNDQyIiwgIiMwMDcyQjIiLCAiI0Q1NUUwMCIsICIjQ0M3OUE3IikNCmBgYA0KDQojIyBwaWUgY2hhcnQgcGxvdA0KYGBge3IgcGllfQ0KZ2dwbG90KGNvdW50cnlfZ3JvdXAsIGFlcyh4PSIiLCB5PUNvdW50LCBmaWxsPWFzLmZhY3RvcihDb3VudCkpKSsNCiAgZ2VvbV9iYXIoc3RhdCA9ICdpZGVudGl0eScgLCB3aWR0aCA9IDAuOCkgKw0KICBjb29yZF9wb2xhcigieSIpKw0KICBzY2FsZV9maWxsX21hbnVhbCh2YWx1ZXM9Y2JQYWxldHRlKSsNCiAgZ2VvbV90ZXh0KGFlcyhsYWJlbCA9IHBhc3RlKHBlcmNlbnRhZ2UsIiUiKSksDQogICAgICAgICAgICBwb3NpdGlvbiA9IHBvc2l0aW9uX3N0YWNrKHZqdXN0ID0gMC41KSxzaXplPTMuNSkgKw0KICBndWlkZXMoZmlsbCA9IGd1aWRlX2xlZ2VuZCh0aXRsZSA9ICJDb3VudHJ5IikpICsNCiAgdGhlbWVfdm9pZCgpKw0KICBsYWJzKHRpdGxlID0gIkRpc3RyaWJ1dGlvbiBvZiBwcm9wb3J0aW9ucyBmb3IgdmFyaWFibGUgQ291bnRyeSAoaW4gUGVyY2VudGFnZSkiLA0KICAgICAgIHRhZyA9ICJQbG90IDIuMiIpDQpgYGANCg0KDQoqKlVudXN1YWwgb2JzZXJ2YXRpb25zIGZvciB0aGUgImNvdW50cnkiIHZhcmlhYmxlIHdvdWxkIGJlIHNvbWUgY291bnRyaWVzICBoYXZlIGFuIHVudXN1YWxseSBoaWdoIG9yIGxvdyBjb3VudCBvciBwcm9wb3J0aW9uIG9mIHN1aWNpZGVzIGNvbXBhcmVkIHRvIHRoZSByZXN0IG9mIHRoZSBkYXRhc2V0KioNCg0KYGBge3IgY291bnR9DQp1bmlxdWUoY291bnRyeV9jb3VudHMkVmFyMSkNCmBgYA0KDQoNCiMgQml2YXJpYXRlDQpgYGB7ciB9DQpnZ3Bsb3Qoc3VpY2lkZV9yYXRlLCBhZXMoeCA9IGdkcF9mb3JfeWVhci4uLi4sIHkgPSBnZHBfcGVyX2NhcGl0YS4uLi4pKSArDQogIGdlb21fcG9pbnQoKSArDQogIGxhYnModGl0bGUgPSAiUmVsYXRpb25zaGlwIGJldHdlZW4gR0RQIHBlciB5ZWFyIGFuZCBHRFAgcGVyIGNhcGl0YSIsDQogICAgICAgeCA9ICJHRFAgcGVyIHllYXIiLA0KICAgICAgIHkgPSAiR0RQIHBlciBjYXBpdGEiKQ0KDQpgYGANCg0KYGBge3Igc2NhdHRlcnBsb3R9DQpnZ3Bsb3Qoc3VpY2lkZV9yYXRlLCBhZXMoeCA9IHNleCwgeSA9IHN1aWNpZGVzLjEwMGsucG9wLCBjb2xvciA9IHNleCkpICsNCiAgZ2VvbV9qaXR0ZXIoYWxwaGEgPSAwLjUsIHNpemUgPSAzLCB3aWR0aCA9IDAuMikgKw0KICBzY2FsZV9jb2xvcl9tYW51YWwodmFsdWVzPWMoIiNFNjlGMDAiLCAiIzU2QjRFOSIpKSArDQogIGxhYnModGl0bGUgPSAiUmVsYXRpb25zaGlwIGJldHdlZW4gU2V4IGFuZCBTdWljaWRlIFJhdGUiLA0KICAgICAgIHggPSAiU2V4IiwNCiAgICAgICB5ID0gIlN1aWNpZGUgUmF0ZSIpICsNCiAgdGhlbWVfbWluaW1hbCgpDQoNCmBgYA0KKipGcm9tIHRoZSBzY2F0dGVyIHBsb3QsIHdlIGNhbiBzZWUgdGhhdCB0aGVyZSBpcyBhIHNsaWdodGx5IHBvc2l0aXZlIGxpbmVhciByZWxhdGlvbnNoaXAgYmV0d2VlbiB0aGUgc3VpY2lkZSByYXRlIGFuZCBzZXguIEFzIHRoZSBzdWljaWRlIHJhdGUgaW5jcmVhc2VzLCB0aGUgbnVtYmVyIG9mIG1hbGVzIGNvbW1pdHRpbmcgc3VpY2lkZSBpcyBzbGlnaHRseSBoaWdoZXIgdGhhbiBmZW1hbGVzKioNCg0KDQoqKlRoZSByZWxhdGlvbnNoaXAgYmV0d2VlbiBzZXggYW5kIHN1aWNpZGUgcmF0ZSBpbmRpY2F0ZXMgdGhhdCB0aGVyZSBpcyBhIGRpZmZlcmVuY2UgaW4gdGhlIHN1aWNpZGUgcmF0ZSBiZXR3ZWVuIG1hbGVzIGFuZCBmZW1hbGVzLiBTcGVjaWZpY2FsbHksIHRoZSBkYXRhIHN1Z2dlc3RzIHRoYXQgbWFsZXMgaGF2ZSBhIGhpZ2hlciBzdWljaWRlIHJhdGUgdGhhbiBmZW1hbGVzLiBUaGlzIGlzIGFuIGltcG9ydGFudCBmaW5kaW5nIHRoYXQgbWF5IGhhdmUgaW1wbGljYXRpb25zIGZvciBzdWljaWRlIHByZXZlbnRpb24gYW5kIGludGVydmVudGlvbiBlZmZvcnRzLCBhcyBpdCBoaWdobGlnaHRzIHRoZSBuZWVkIHRvIGZvY3VzIG9uIHVuZGVyc3RhbmRpbmcgYW5kIGFkZHJlc3NpbmcgdGhlIHNwZWNpZmljIHJpc2sgZmFjdG9ycyB0aGF0IG1heSBiZSBjb250cmlidXRpbmcgdG8gaGlnaGVyIHN1aWNpZGUgcmF0ZXMgaW4gbWFsZXMuIEl0IG1heSBhbHNvIHBvaW50IHRvIHRoZSBuZWVkIGZvciB0YXJnZXRlZCBpbnRlcnZlbnRpb25zIHRoYXQgdGFrZSBpbnRvIGFjY291bnQgZ2VuZGVyIGRpZmZlcmVuY2VzIGluIHRoZSBleHBlcmllbmNlIGFuZCBleHByZXNzaW9uIG9mIG1lbnRhbCBoZWFsdGggaXNzdWVzLioqDQoNCioqSW4gdGhlIHNjYXR0ZXJwbG90LCB3ZSBjYW4gc2VlIHRoYXQgdGhlcmUgaXMgYSBjb25zaWRlcmFibGUgYW1vdW50IG9mIHZhcmlhYmlsaXR5IGluIHRoZSBkYXRhLCB3aXRoIG1hbnkgcG9pbnRzIHNwcmVhZCBvdXQgYWNyb3NzIHRoZSByYW5nZSBvZiBzdWljaWRlIHJhdGVzIGZvciBib3RoIG1hbGVzIGFuZCBmZW1hbGVzLiBUaGlzIGluZGljYXRlcyB0aGF0IHRoZXJlIGlzIG5vdCBhIHBlcmZlY3QgcmVsYXRpb25zaGlwIGJldHdlZW4gc2V4IGFuZCBzdWljaWRlIHJhdGUsIGFzIHRoZXJlIGFyZSBtYW55IGZhY3RvcnMgdGhhdCBjb3VsZCBpbmZsdWVuY2Ugc3VpY2lkZSByYXRlcyBpbiBhIGdpdmVuIGNvdW50cnkuIEhvd2V2ZXIsIHdlIGNhbiBzdGlsbCBzZWUgYSBnZW5lcmFsIHRyZW5kIG9mIGhpZ2hlciBzdWljaWRlIHJhdGVzIGFtb25nIG1hbGVzIGNvbXBhcmVkIHRvIGZlbWFsZXMsIHdoaWNoIGlzIHN1cHBvcnRlZCBieSB0aGUgcG9zaXRpdmUgY29ycmVsYXRpb24gY29lZmZpY2llbnQgd2UgY2FsY3VsYXRlZC4gVGhpcyBzdWdnZXN0cyB0aGF0IHNleCBpcyBhIGZhY3RvciB0aGF0IGNhbiBoZWxwIHByZWRpY3Qgc3VpY2lkZSByYXRlcywgYnV0IGl0IGlzIG5vdCB0aGUgb25seSBmYWN0b3IsIGFzIHRoZXJlIGlzIHN0aWxsIGEgc2lnbmlmaWNhbnQgYW1vdW50IG9mIHZhcmlhYmlsaXR5IGluIHRoZSBkYXRhLiBPdmVyYWxsLCB0aGUgc2NhdHRlcnBsb3QgYW5kIGNvcnJlbGF0aW9uIGNvZWZmaWNpZW50IHByb3ZpZGUgdXNlZnVsIGluZm9ybWF0aW9uIGFib3V0IHRoZSByZWxhdGlvbnNoaXAgYmV0d2VlbiBzZXggYW5kIHN1aWNpZGUgcmF0ZXMsIGJ1dCBmdXJ0aGVyIGFuYWx5c2lzIHdvdWxkIGJlIG5lZWRlZCB0byBmdWxseSB1bmRlcnN0YW5kIHRoZSBmYWN0b3JzIHRoYXQgY29udHJpYnV0ZSB0byBzdWljaWRlIHJhdGVzIGluIGRpZmZlcmVudCBjb3VudHJpZXMuKioNCg0KDQo=

R Notebook

plot 2 (categorical variable- “country”)

create a data frame with counts of each country

plot the bar chart

create a data frame with proportions of each country

pie chart plot

Bivariate