Temporal Trends in FluNet Data
by_week_and_region<-summarize(group_by(data,`ITZ`,`ISO_WEEK`,`ISO_YEAR`), SPEC_PROCESSED_WEEKLY=sum(`SPEC_PROCESSED_NB`))
## `summarise()` has grouped output by 'ITZ', 'ISO_WEEK'. You can override using
## the `.groups` argument.
by_date_and_region<-summarize(group_by(data,`ITZ`,`ISO_SDATE`), SPEC_PROCESSED_WEEKLY=sum(`SPEC_PROCESSED_NB`))
## `summarise()` has grouped output by 'ITZ'. You can override using the `.groups`
## argument.
by_week<-summarize(group_by(data,`ISO_SDATE`,`ISO_WEEK`, `ISO_YEAR`),SPEC_PROCESSED=sum(`SPEC_PROCESSED_NB`))
## `summarise()` has grouped output by 'ISO_SDATE', 'ISO_WEEK'. You can override
## using the `.groups` argument.
by_week2<-by_week
by_week2[, 3] <- sapply(by_week2[, 3], as.character)
f1<-ggplot(by_week2, aes(x=`ISO_WEEK`, y=`SPEC_PROCESSED`, group=`ISO_YEAR`, color= `ISO_YEAR`))+geom_line()+
scale_color_brewer("Year", palette="Dark2")+
labs(x="Week", y="Specimens Processed", title="Specimens Processed from 2017-2022")+
theme_classic()
ggplotly(f1)
by_year_and_country<-summarize(group_by(data,`COUNTRY/AREA/TERRITORY`,`ISO_YEAR`,`ITZ`),SPEC_PROCESSED_ANNUAL=sum(`SPEC_PROCESSED_NB`))
## `summarise()` has grouped output by 'COUNTRY/AREA/TERRITORY', 'ISO_YEAR'. You
## can override using the `.groups` argument.
by_year_and_region<-summarize(group_by(data,`ISO_YEAR`,`ITZ`),SPEC_PROCESSED_ANNUAL=sum(`SPEC_PROCESSED_NB`))
## `summarise()` has grouped output by 'ISO_YEAR'. You can override using the
## `.groups` argument.
Species Processed by Each Region
by_year_and_region[is.na(by_year_and_region)] <- 0
by_year_and_region2<- by_year_and_region %>% filter(!SPEC_PROCESSED_ANNUAL == 6434371)
f2<-ggplot(by_year_and_region2, aes(x=`ISO_YEAR`, y=`SPEC_PROCESSED_ANNUAL`))+
geom_line()+
facet_wrap(. ~`ITZ`)+
labs(x="Year", y="Specimens Processed")+
theme_bw()
ggplotly(f2)
by_year_and_country[is.na(by_year_and_country)] <- 0
by_year_and_country2<- by_year_and_country %>% filter(!SPEC_PROCESSED_ANNUAL > 20000)
f2<-ggplot(by_year_and_country2, aes(x=`ISO_YEAR`, y=`SPEC_PROCESSED_ANNUAL`, color=`COUNTRY/AREA/TERRITORY`))+
geom_line()+
facet_wrap(. ~`ITZ`)+
labs(x="Year", y="Specimens Processed")+
theme_bw()+
theme(legend.position="none")
ggplotly(f2)
f4<-ggplot(by_year_and_region, aes(x=`ISO_YEAR`, y=`SPEC_PROCESSED_ANNUAL`,fill=`ITZ`))+
geom_bar(position="stack", stat="identity")+
labs(y="Specimens Processed", x="Year", fill="Influenza Transmission Zone (ITZ)")+
theme_bw()
ggplotly(f4)
by_year_and_country2<- by_year_and_country %>% filter(!SPEC_PROCESSED_ANNUAL > 20000)
f5<-ggplot(by_year_and_country2, aes(x=`ITZ`, y=`SPEC_PROCESSED_ANNUAL`, color=`ITZ`))+
geom_boxplot()+
labs(y="Specimens Processed", x="")+
theme_bw()+
theme(axis.text.x=element_blank())
ggplotly(f5)
## Warning: The following aesthetics were dropped during statistical transformation:
## y_plotlyDomain
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
Preparing for Analysis of Contextual Factors
cf<- read_csv("Country_Indicators_Thesis.csv")
## Rows: 3779 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Series Name, Series Code, Country Name, Country Code, Values
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Fix country names to match FluNet
# Still need to figure out what to do with the UK
cf<- cf %>%
mutate (`Country Name` = recode(`Country Name`,
'Antigua & Barbuda' = 'Antigua and Barbuda',
'Bahamas, The' = 'Bahamas',
'Bolivia' = 'Bolivia (Plurinational State of)',
'Bosnia and Hercegovina' = 'Bosnia and Herzegovina',
'Czech Republic' = 'Czechia',
'North Korea' = "Democratic People's Republic of Korea",
"Korea, Dem. People's Rep."= "Democratic People's Republic of Korea",
"Korea (Democratic People's Rep. of)" = "Democratic People's Republic of Korea",
'Congo (Democratic Republic)' = 'Democratic Republic of the Congo',
'Congo, Dem. Rep.' = 'Democratic Republic of the Congo',
'Egypt, Arab Rep.'= 'Egypt',
'Iran' = 'Iran (Islamic Republic of)',
'Iran, Islamic Rep.' = 'Iran (Islamic Republic of)',
'Kosovo' = 'Kosovo (in accordance with UN Security Council resolution 1244 (1999))',
'Kyrgyz Republic' = 'Kyrgyzstan',
'Laos' = "Lao People's Democratic Republic",
'Lao PDR' = "Lao People's Democratic Republic",
'Moldova' = 'Republic of Moldova',
"Cote d'Ivoire" = "Côte d'Ivoire",
'Netherlands' = 'Netherlands (Kingdom of the)',
'South Korea' = 'Republic of Korea',
'Korea, Rep.' = 'Republic of Korea',
'Korea (Republic of)' = 'Republic of Korea',
'Russia' = 'Russian Federation',
'St Kitts & Nevis' = 'Saint Kitts and Nevis',
'St. Kitts and Nevis' = 'Saint Kitts and Nevis',
'St Lucia' = 'Saint Lucia',
'St. Lucia' = 'Saint Lucia',
'St Vincent & The Grenadines' = 'Saint Vincent and the Grenadines',
'St. Vincent and the Grenadines' = 'Saint Vincent and the Grenadines',
'Slovak Republic' = 'Slovakia',
'Syria' = 'Syrian Arab Republic',
'Tanzania' = 'United Republic of Tanzania',
'Turkey' = 'Türkiye',
'Turkiye' = 'Türkiye',
'United Kingdom of Great Britain and Northern Ireland' = 'United Kingdom',
'Venezuela' = 'Venezuela (Bolivarian Republic of)',
'Venezuela, RB' = 'Venezuela (Bolivarian Republic of)',
'Vietnam' = 'Viet Nam'))
# pivot_wider for social factors so each row is country + values for each factor
cf1<- subset(cf, select = -c(`Series Code`, `Country Code`))
cf1<-cf1 %>%
pivot_wider(names_from=c(`Series Name`), values_from=`Values`)
# gets rid of countries that did not submit to FluNet
cf1<-cf1 %>% drop_na(`Domestic general government health expenditure (% of general government expenditure)`)
# replace all ".." values with NA
# context values can have NA but not 0, specimen values can have 0 but not NA
cf1[cf1==".."]<-NA
cf1[, 2:22] <- sapply(cf1[, 2:22], as.numeric)
# create data set that is each country + ITZ + column for each year's total (pivot_wider)
data2<-summarize(group_by(data,`COUNTRY/AREA/TERRITORY`,`ISO_YEAR`,`ITZ`),SPEC_PROCESSED_ANNUAL=sum(`SPEC_PROCESSED_NB`))
## `summarise()` has grouped output by 'COUNTRY/AREA/TERRITORY', 'ISO_YEAR'. You
## can override using the `.groups` argument.
data2<- data2 %>%
pivot_wider(names_from=`ISO_YEAR`, values_from=`SPEC_PROCESSED_ANNUAL`, names_prefix = "Specimens Processed in ")
data2[is.na(data2)] <- 0
# join social factors to table with yearly observations in previous
data3<-merge(x=data2, y=cf1, by.x = "COUNTRY/AREA/TERRITORY", by.y = "Country Name")
Creating a Data Table Based on Rankings of Values
data4<-lapply(data3, rank, ties.method="min")
data4<-as_tibble(data4)
Regression Analysis of Contextual Factors
Domestic general government health expenditure (% of general
government expenditure)
data3 %>%
lm(`Domestic general government health expenditure (% of general government expenditure)`~`Specimens Processed in 2022`, data=.) %>%
summary()
##
## Call:
## lm(formula = `Domestic general government health expenditure (% of general government expenditure)` ~
## `Specimens Processed in 2022`, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.8206 -3.4613 -0.3288 2.8199 13.7624
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.045e+01 4.121e-01 25.344 < 2e-16 ***
## `Specimens Processed in 2022` 1.090e-05 3.203e-06 3.404 0.000864 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.691 on 141 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.07595, Adjusted R-squared: 0.0694
## F-statistic: 11.59 on 1 and 141 DF, p-value: 0.0008639
ggplot(data=data3, aes(x=`Domestic general government health expenditure (% of general government expenditure)`, y=`Specimens Processed in 2022`))+
geom_point()+
geom_smooth(method='lm')+
scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 20 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 9 rows containing missing values (`geom_point()`).

Current health expenditure (% of GDP)
data3 %>%
lm(`Current health expenditure (% of GDP)`~`Specimens Processed in 2022`, data=.) %>%
summary()
##
## Call:
## lm(formula = `Current health expenditure (% of GDP)` ~ `Specimens Processed in 2022`,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.1457 -1.8637 -0.0041 1.5331 7.0219
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.198e+00 1.987e-01 31.188 < 2e-16 ***
## `Specimens Processed in 2022` 6.728e-06 1.544e-06 4.356 2.53e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.262 on 141 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.1186, Adjusted R-squared: 0.1124
## F-statistic: 18.98 on 1 and 141 DF, p-value: 2.53e-05
ggplot(data=data3, aes(x=`Current health expenditure (% of GDP)`, y=`Specimens Processed in 2022`))+
geom_point()+
geom_smooth(method='lm')+
scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 20 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 9 rows containing missing values (`geom_point()`).

Births attended by skilled health staff (% of total)
data3 %>%
lm(`Births attended by skilled health staff (% of total)`~`Specimens Processed in 2022`, data=.) %>%
summary()
##
## Call:
## lm(formula = `Births attended by skilled health staff (% of total)` ~
## `Specimens Processed in 2022`, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -54.407 1.488 5.231 6.434 6.797
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.320e+01 1.714e+00 54.375 <2e-16 ***
## `Specimens Processed in 2022` 1.066e-05 1.051e-05 1.014 0.314
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.34 on 67 degrees of freedom
## (83 observations deleted due to missingness)
## Multiple R-squared: 0.01511, Adjusted R-squared: 0.0004106
## F-statistic: 1.028 on 1 and 67 DF, p-value: 0.3143
ggplot(data=data3, aes(x=`Births attended by skilled health staff (% of total)`, y=`Specimens Processed in 2022`))+
geom_point()+
geom_smooth(method='lm')+
scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 90 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 83 rows containing missing values (`geom_point()`).

Prevalence of HIV, total (% of population ages 15-49)
data3 %>%
lm(`Prevalence of HIV, total (% of population ages 15-49)`~`Specimens Processed in 2022`, data=.) %>%
summary()
##
## Call:
## lm(formula = `Prevalence of HIV, total (% of population ages 15-49)` ~
## `Specimens Processed in 2022`, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1410 -1.0386 -0.8360 -0.1414 17.3894
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.241e+00 2.645e-01 4.693 7.73e-06 ***
## `Specimens Processed in 2022` -2.908e-06 2.745e-06 -1.060 0.292
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.663 on 111 degrees of freedom
## (39 observations deleted due to missingness)
## Multiple R-squared: 0.01001, Adjusted R-squared: 0.001096
## F-statistic: 1.123 on 1 and 111 DF, p-value: 0.2916
ggplot(data=data3, aes(x=`Prevalence of HIV, total (% of population ages 15-49)`, y=`Specimens Processed in 2022`))+
geom_point()+
geom_smooth(method='lm')+
scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 43 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 39 rows containing missing values (`geom_point()`).

Antiretroviral therapy coverage (% of people living with HIV)
data3 %>%
lm(`Antiretroviral therapy coverage (% of people living with HIV)`~`Specimens Processed in 2022`, data=.) %>%
summary()
##
## Call:
## lm(formula = `Antiretroviral therapy coverage (% of people living with HIV)` ~
## `Specimens Processed in 2022`, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -46.758 -10.658 2.358 12.255 30.237
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.757e+01 1.795e+00 32.068 < 2e-16 ***
## `Specimens Processed in 2022` 5.834e-05 1.853e-05 3.148 0.00213 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.79 on 107 degrees of freedom
## (43 observations deleted due to missingness)
## Multiple R-squared: 0.08478, Adjusted R-squared: 0.07623
## F-statistic: 9.912 on 1 and 107 DF, p-value: 0.002128
ggplot(data=data3, aes(x=`Antiretroviral therapy coverage (% of people living with HIV)`, y=`Specimens Processed in 2022`))+
geom_point()+
geom_smooth(method='lm')+
scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 47 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 43 rows containing missing values (`geom_point()`).

Immunization, measles (% of children ages 12-23 months)
data3 %>%
lm(`Immunization, measles (% of children ages 12-23 months)`~`Specimens Processed in 2022`, data=.) %>%
summary()
##
## Call:
## lm(formula = `Immunization, measles (% of children ages 12-23 months)` ~
## `Specimens Processed in 2022`, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -54.812 -4.572 5.154 8.764 11.200
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.780e+01 1.172e+00 74.922 <2e-16 ***
## `Specimens Processed in 2022` 1.482e-05 9.200e-06 1.611 0.109
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.48 on 144 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.01771, Adjusted R-squared: 0.01089
## F-statistic: 2.596 on 1 and 144 DF, p-value: 0.1093
ggplot(data=data3, aes(x=`Immunization, measles (% of children ages 12-23 months)`, y=`Specimens Processed in 2022`))+
geom_point()+
geom_smooth(method='lm')+
scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 6 rows containing missing values (`geom_point()`).

Immunization, DPT (% of children ages 12-23 months)
data3 %>%
lm(`Immunization, DPT (% of children ages 12-23 months)`~`Specimens Processed in 2022`, data=.) %>%
summary()
##
## Call:
## lm(formula = `Immunization, DPT (% of children ages 12-23 months)` ~
## `Specimens Processed in 2022`, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -46.570 -3.588 3.601 8.390 10.441
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.856e+01 1.038e+00 85.302 <2e-16 ***
## `Specimens Processed in 2022` 1.363e-05 8.151e-06 1.672 0.0967 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.95 on 144 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.01905, Adjusted R-squared: 0.01224
## F-statistic: 2.796 on 1 and 144 DF, p-value: 0.09665
ggplot(data=data3, aes(x=`Immunization, DPT (% of children ages 12-23 months)`, y=`Specimens Processed in 2022`))+
geom_point()+
geom_smooth(method='lm')+
scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 6 rows containing missing values (`geom_point()`).

Cause of death, by communicable diseases and maternal, prenatal and
nutrition conditions (% of total)
data3 %>%
lm(`Cause of death, by communicable diseases and maternal, prenatal and nutrition conditions (% of total)`~`Specimens Processed in 2022`, data=.) %>%
summary()
##
## Call:
## lm(formula = `Cause of death, by communicable diseases and maternal, prenatal and nutrition conditions (% of total)` ~
## `Specimens Processed in 2022`, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.802 -12.318 -6.969 4.497 46.006
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.924e+01 1.549e+00 12.421 <2e-16 ***
## `Specimens Processed in 2022` -2.779e-05 1.208e-05 -2.302 0.0228 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.68 on 142 degrees of freedom
## (8 observations deleted due to missingness)
## Multiple R-squared: 0.03596, Adjusted R-squared: 0.02917
## F-statistic: 5.297 on 1 and 142 DF, p-value: 0.02281
ggplot(data=data3, aes(x=`Cause of death, by communicable diseases and maternal, prenatal and nutrition conditions (% of total)`, y=`Specimens Processed in 2022`))+
geom_point()+
geom_smooth(method='lm')+
scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 8 rows containing missing values (`geom_point()`).

Life expectancy at birth, total (years)
data3 %>%
lm(`Life expectancy at birth, total (years)`~`Specimens Processed in 2022`, data=.) %>%
summary()
##
## Call:
## lm(formula = `Life expectancy at birth, total (years)` ~ `Specimens Processed in 2022`,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.014 -3.485 1.185 4.765 11.455
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.290e+01 5.940e-01 122.724 < 2e-16 ***
## `Specimens Processed in 2022` 1.763e-05 4.711e-06 3.743 0.000261 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.912 on 147 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.08699, Adjusted R-squared: 0.08078
## F-statistic: 14.01 on 1 and 147 DF, p-value: 0.0002606
ggplot(data=data3, aes(x=`Life expectancy at birth, total (years)`, y=`Specimens Processed in 2022`))+
geom_point()+
geom_smooth(method='lm')+
scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 16 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 3 rows containing missing values (`geom_point()`).

GDP per capita (current US$)
data3 %>%
lm(`GDP per capita (current US$)`~`Specimens Processed in 2022`, data=.) %>%
summary()
##
## Call:
## lm(formula = `GDP per capita (current US$)` ~ `Specimens Processed in 2022`,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29505 -13177 -8874 2738 101187
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.497e+04 1.902e+03 7.870 7.74e-13 ***
## `Specimens Processed in 2022` 5.036e-02 1.493e-02 3.373 0.000954 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21880 on 144 degrees of freedom
## (6 observations deleted due to missingness)
## Multiple R-squared: 0.07323, Adjusted R-squared: 0.0668
## F-statistic: 11.38 on 1 and 144 DF, p-value: 0.0009544
ggplot(data=data3, aes(x=`GDP per capita (current US$)`, y=`Specimens Processed in 2022`))+
geom_point()+
geom_smooth(method='lm')+
scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 21 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 6 rows containing missing values (`geom_point()`).

Access to Electricity
data3 %>%
lm(`Access to electricity (% of population)`~`Specimens Processed in 2022`, data=.) %>%
summary()
##
## Call:
## lm(formula = `Access to electricity (% of population)` ~ `Specimens Processed in 2022`,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -80.527 2.236 12.018 12.640 12.783
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.722e+01 1.996e+00 43.689 <2e-16 ***
## `Specimens Processed in 2022` 2.830e-05 1.594e-05 1.776 0.0778 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23.4 on 149 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.02072, Adjusted R-squared: 0.01415
## F-statistic: 3.153 on 1 and 149 DF, p-value: 0.07784
ggplot(data=data3, aes(x=`Access to electricity (% of population)`, y=`Specimens Processed in 2022`))+
geom_point()+
geom_smooth(method='lm')+
scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).

Literacy rate, adult total (% of people ages 15 and above)
data3 %>%
lm(`Literacy rate, adult total (% of people ages 15 and above)`~`Specimens Processed in 2022`, data=.) %>%
summary()
##
## Call:
## lm(formula = `Literacy rate, adult total (% of people ages 15 and above)` ~
## `Specimens Processed in 2022`, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -31.843 -2.844 3.509 6.466 9.210
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 93.1982757 2.6191591 35.583 <2e-16 ***
## `Specimens Processed in 2022` -0.0001152 0.0001507 -0.764 0.453
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.66 on 23 degrees of freedom
## (127 observations deleted due to missingness)
## Multiple R-squared: 0.02475, Adjusted R-squared: -0.01765
## F-statistic: 0.5837 on 1 and 23 DF, p-value: 0.4526
ggplot(data=data3, aes(x=`Literacy rate, adult total (% of people ages 15 and above)`, y=`Specimens Processed in 2022`))+
geom_point()+
geom_smooth(method='lm')+
scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 127 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 127 rows containing missing values (`geom_point()`).

Primary completion rate, total (% of relevant age group)
data3 %>%
lm(`Primary completion rate, total (% of relevant age group)`~`Specimens Processed in 2022`, data=.) %>%
summary()
##
## Call:
## lm(formula = `Primary completion rate, total (% of relevant age group)` ~
## `Specimens Processed in 2022`, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.364 -4.757 2.790 7.759 27.252
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.297e+01 1.321e+00 70.38 <2e-16 ***
## `Specimens Processed in 2022` 2.860e-05 1.498e-05 1.91 0.0591 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.35 on 98 degrees of freedom
## (52 observations deleted due to missingness)
## Multiple R-squared: 0.03588, Adjusted R-squared: 0.02604
## F-statistic: 3.647 on 1 and 98 DF, p-value: 0.05909
ggplot(data=data3, aes(x=`Primary completion rate, total (% of relevant age group)`, y=`Specimens Processed in 2022`))+
geom_point()+
geom_smooth(method='lm')+
scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 57 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 52 rows containing missing values (`geom_point()`).

Unemployment, total (% of total labor force) (modeled ILO
estimate)
data3 %>%
lm(`Unemployment, total (% of total labor force) (modeled ILO estimate)`~`Specimens Processed in 2022`, data=.) %>%
summary()
##
## Call:
## lm(formula = `Unemployment, total (% of total labor force) (modeled ILO estimate)` ~
## `Specimens Processed in 2022`, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.472 -3.028 -1.485 1.954 21.896
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.577e+00 4.216e-01 15.600 <2e-16 ***
## `Specimens Processed in 2022` -2.151e-07 3.253e-06 -0.066 0.947
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.758 on 139 degrees of freedom
## (11 observations deleted due to missingness)
## Multiple R-squared: 3.147e-05, Adjusted R-squared: -0.007163
## F-statistic: 0.004374 on 1 and 139 DF, p-value: 0.9474
ggplot(data=data3, aes(x=`Unemployment, total (% of total labor force) (modeled ILO estimate)`, y=`Specimens Processed in 2022`))+
geom_point()+
geom_smooth(method='lm')+
scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 18 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 11 rows containing missing values (`geom_point()`).

Urban population (% of total population)
data3 %>%
lm(`Urban population (% of total population)`~`Specimens Processed in 2022`, data=.) %>%
summary()
##
## Call:
## lm(formula = `Urban population (% of total population)` ~ `Specimens Processed in 2022`,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -43.197 -16.942 -0.957 15.612 40.421
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.958e+01 1.828e+00 32.598 < 2e-16 ***
## `Specimens Processed in 2022` 4.314e-05 1.454e-05 2.966 0.00352 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21.35 on 148 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.05611, Adjusted R-squared: 0.04974
## F-statistic: 8.799 on 1 and 148 DF, p-value: 0.003516
ggplot(data=data3, aes(x=`Urban population (% of total population)`, y=`Specimens Processed in 2022`))+
geom_point()+
geom_smooth(method='lm')+
scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 18 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).

Agricultural land (% of land area)
data3 %>%
lm(`Agricultural land (% of land area)`~`Specimens Processed in 2022`, data=.) %>%
summary()
##
## Call:
## lm(formula = `Agricultural land (% of land area)` ~ `Specimens Processed in 2022`,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -37.955 -17.137 1.341 14.856 42.398
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.849e+01 1.778e+00 21.645 <2e-16 ***
## `Specimens Processed in 2022` -1.405e-05 1.420e-05 -0.989 0.324
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 20.84 on 149 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.006525, Adjusted R-squared: -0.0001423
## F-statistic: 0.9787 on 1 and 149 DF, p-value: 0.3241
ggplot(data=data3, aes(x=`Agricultural land (% of land area)`, y=`Specimens Processed in 2022`))+
geom_point()+
geom_smooth(method='lm')+
scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).

Population, total
data3 %>%
lm(`Population, total`~`Specimens Processed in 2022`, data=.) %>%
summary()
##
## Call:
## lm(formula = `Population, total` ~ `Specimens Processed in 2022`,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -296313231 -34249923 -26839542 -7418448 1334095040
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.450e+07 1.349e+07 2.558 0.0115 *
## `Specimens Processed in 2022` 3.206e+02 1.081e+02 2.967 0.0035 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 158700000 on 150 degrees of freedom
## Multiple R-squared: 0.05543, Adjusted R-squared: 0.04913
## F-statistic: 8.802 on 1 and 150 DF, p-value: 0.003502
ggplot(data=data3, aes(x=`Population, total`, y=`Specimens Processed in 2022`))+
geom_point()+
geom_smooth(method='lm')+
scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 16 rows containing non-finite values (`stat_smooth()`).

Mobile cellular subscriptions (per 100 people)
data3 %>%
lm(`Mobile cellular subscriptions (per 100 people)`~`Specimens Processed in 2022`, data=.) %>%
summary()
##
## Call:
## lm(formula = `Mobile cellular subscriptions (per 100 people)` ~
## `Specimens Processed in 2022`, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -95.907 -18.502 3.442 20.405 100.348
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.124e+02 2.852e+00 39.412 <2e-16 ***
## `Specimens Processed in 2022` 5.594e-06 2.270e-05 0.246 0.806
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 33.31 on 148 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.0004102, Adjusted R-squared: -0.006344
## F-statistic: 0.06074 on 1 and 148 DF, p-value: 0.8057
ggplot(data=data3, aes(x=`Mobile cellular subscriptions (per 100 people)`, y=`Specimens Processed in 2022`))+
geom_point()+
geom_smooth(method='lm')+
scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 17 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).

Scientific and technical journal articles
data3 %>%
lm(`Scientific and technical journal articles`~`Specimens Processed in 2022`, data=.) %>%
summary()
##
## Call:
## lm(formula = `Scientific and technical journal articles` ~ `Specimens Processed in 2022`,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -109302 -7259 -6761 -1868 436192
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6819.9230 3744.5104 1.821 0.0706 .
## `Specimens Processed in 2022` 0.1739 0.0295 5.897 2.5e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 43250 on 145 degrees of freedom
## (5 observations deleted due to missingness)
## Multiple R-squared: 0.1934, Adjusted R-squared: 0.1878
## F-statistic: 34.77 on 1 and 145 DF, p-value: 2.501e-08
ggplot(data=data3, aes(x=`Scientific and technical journal articles`, y=`Specimens Processed in 2022`))+
geom_point()+
geom_smooth(method='lm')+
scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 16 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 5 rows containing missing values (`geom_point()`).

Gender Inequality Index
data3 %>%
lm(`Gender Inequality Index`~`Specimens Processed in 2022`, data=.) %>%
summary()
##
## Call:
## lm(formula = `Gender Inequality Index` ~ `Specimens Processed in 2022`,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.32223 -0.14120 0.00793 0.13994 0.33871
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.419e-01 1.620e-02 21.111 < 2e-16 ***
## `Specimens Processed in 2022` -5.082e-07 1.236e-07 -4.111 6.79e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1807 on 136 degrees of freedom
## (14 observations deleted due to missingness)
## Multiple R-squared: 0.1105, Adjusted R-squared: 0.104
## F-statistic: 16.9 on 1 and 136 DF, p-value: 6.785e-05
ggplot(data=data3, aes(x=`Gender Inequality Index`, y=`Specimens Processed in 2022`))+
geom_point()+
geom_smooth(method='lm')+
scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 21 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 14 rows containing missing values (`geom_point()`).

Human Development Index
data3 %>%
lm(`Human Development Index`~`Specimens Processed in 2022`, data=.) %>%
summary()
##
## Call:
## lm(formula = `Human Development Index` ~ `Specimens Processed in 2022`,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.34217 -0.09132 0.01876 0.08463 0.23383
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.270e-01 1.227e-02 59.268 < 2e-16 ***
## `Specimens Processed in 2022` 3.634e-07 9.497e-08 3.827 0.000195 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.139 on 140 degrees of freedom
## (10 observations deleted due to missingness)
## Multiple R-squared: 0.0947, Adjusted R-squared: 0.08823
## F-statistic: 14.64 on 1 and 140 DF, p-value: 0.000195
ggplot(data=data3, aes(x=`Human Development Index`, y=`Specimens Processed in 2022`))+
geom_point()+
geom_smooth(method='lm')+
scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 21 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 10 rows containing missing values (`geom_point()`).
