# Loading the packages used in this file
library(tidyverse)
## Warning: package 'tidyr' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)
# Obtaining the url file, converting it into a data frame, and getting an overview of the table
url<- "https://raw.githubusercontent.com/ursulapodosenin/DAT-607/main/osteoporosis.csv"
raw_data<-read.csv(url(url))
raw_data<- data.frame(raw_data)
dplyr::glimpse(raw_data)
## Rows: 1,958
## Columns: 16
## $ Id <int> 1734616, 1419098, 1797916, 1805337, 1351334, 17993…
## $ Age <int> 69, 32, 89, 78, 38, 41, 20, 39, 70, 19, 47, 55, 19…
## $ Gender <chr> "Female", "Female", "Female", "Female", "Male", "M…
## $ Hormonal.Changes <chr> "Normal", "Normal", "Postmenopausal", "Normal", "P…
## $ Family.History <chr> "Yes", "Yes", "No", "No", "Yes", "Yes", "Yes", "Ye…
## $ Race.Ethnicity <chr> "Asian", "Asian", "Caucasian", "Caucasian", "Afric…
## $ Body.Weight <chr> "Underweight", "Underweight", "Normal", "Underweig…
## $ Calcium.Intake <chr> "Low", "Low", "Adequate", "Adequate", "Low", "Low"…
## $ Vitamin.D.Intake <chr> "Sufficient", "Sufficient", "Sufficient", "Insuffi…
## $ Physical.Activity <chr> "Sedentary", "Sedentary", "Active", "Sedentary", "…
## $ Smoking <chr> "Yes", "No", "No", "Yes", "Yes", "Yes", "No", "No"…
## $ Alcohol.Consumption <chr> "Moderate", "None", "Moderate", "None", "None", "M…
## $ Medical.Conditions <chr> "Rheumatoid Arthritis", "None", "Hyperthyroidism",…
## $ Medications <chr> "Corticosteroids", "None", "Corticosteroids", "Cor…
## $ Prior.Fractures <chr> "Yes", "Yes", "No", "No", "Yes", "Yes", "No", "Yes…
## $ Osteoporosis <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
sum(is.na(raw_data))
## [1] 0
# Adding a column to raw data that pulls out people over 50 with osteoporosis and potential risk factors
raw_data<- raw_data |>
mutate(Osteoporosis.And.Over.50= Age>49 & Osteoporosis== "1")
raw_data<- raw_data |>
mutate(Risk_Factors = ifelse(raw_data$Calcium.Intake == "Low" | Family.History == "Yes" | Medical.Conditions != "None" | Prior.Fractures== "Yes" , "yes", "no"))
# Filtering the number of people with Osteoporosis by ethnicity, activity level and hormonal changes
raw_data |>
group_by(Race.Ethnicity)|>
summarise(total_with_osteoperosis= sum(Osteoporosis==1))
## # A tibble: 3 × 2
## Race.Ethnicity total_with_osteoperosis
## <chr> <int>
## 1 African American 344
## 2 Asian 314
## 3 Caucasian 321
raw_data |>
group_by(Physical.Activity)|>
summarise(total_with_osteoperosis= sum(Osteoporosis==1))
## # A tibble: 2 × 2
## Physical.Activity total_with_osteoperosis
## <chr> <int>
## 1 Active 501
## 2 Sedentary 478
raw_data |>
group_by(Gender, Hormonal.Changes)|>
summarise(total_with_osteoperosis= sum(Osteoporosis==1))
## `summarise()` has grouped output by 'Gender'. You can override using the
## `.groups` argument.
## # A tibble: 4 × 3
## # Groups: Gender [2]
## Gender Hormonal.Changes total_with_osteoperosis
## <chr> <chr> <int>
## 1 Female Normal 237
## 2 Female Postmenopausal 240
## 3 Male Normal 246
## 4 Male Postmenopausal 256
# Filetring by some general conditions to see if there are any suggestive variables that may be linked to Osteoporosis
raw_data |>
filter(Age>49 & Hormonal.Changes== "Normal" & Family.History=="No" & Body.Weight== "Normal" & Calcium.Intake== "Adequate", Osteoporosis==1)
## Id Age Gender Hormonal.Changes Family.History Race.Ethnicity
## 1 1402680 88 Female Normal No Asian
## 2 1220288 58 Male Normal No African American
## 3 1424153 51 Female Normal No Asian
## 4 1341583 80 Male Normal No Caucasian
## 5 1900138 59 Male Normal No Caucasian
## 6 1695880 89 Female Normal No Asian
## 7 1810732 63 Male Normal No Caucasian
## 8 1766500 77 Female Normal No Caucasian
## 9 1471008 75 Male Normal No Asian
## 10 1719175 86 Female Normal No Asian
## 11 1651578 52 Female Normal No Asian
## 12 1219937 61 Male Normal No Caucasian
## 13 1904175 63 Male Normal No Asian
## 14 1719024 72 Male Normal No Caucasian
## 15 1746159 70 Female Normal No Asian
## 16 1609912 59 Female Normal No African American
## 17 1783672 83 Female Normal No African American
## 18 1121163 82 Male Normal No African American
## 19 1835582 62 Male Normal No Asian
## 20 1164489 50 Female Normal No African American
## 21 1563838 66 Male Normal No Asian
## 22 1169513 74 Female Normal No Caucasian
## 23 1145799 52 Male Normal No Asian
## 24 1322699 90 Male Normal No Caucasian
## 25 1201953 55 Female Normal No Caucasian
## 26 1104327 68 Male Normal No Caucasian
## 27 1531292 78 Male Normal No Asian
## 28 1100709 81 Male Normal No African American
## 29 1762519 81 Female Normal No Caucasian
## 30 1947977 79 Male Normal No Caucasian
## 31 1351055 79 Male Normal No African American
## 32 1203598 74 Female Normal No African American
## 33 1358700 90 Male Normal No African American
## 34 1764469 68 Female Normal No African American
## 35 1823633 87 Female Normal No Asian
## 36 1585642 68 Male Normal No Asian
## 37 1485095 69 Male Normal No Caucasian
## Body.Weight Calcium.Intake Vitamin.D.Intake Physical.Activity Smoking
## 1 Normal Adequate Sufficient Active No
## 2 Normal Adequate Insufficient Sedentary Yes
## 3 Normal Adequate Sufficient Sedentary Yes
## 4 Normal Adequate Sufficient Sedentary Yes
## 5 Normal Adequate Sufficient Active No
## 6 Normal Adequate Insufficient Sedentary Yes
## 7 Normal Adequate Sufficient Sedentary No
## 8 Normal Adequate Insufficient Active Yes
## 9 Normal Adequate Sufficient Active Yes
## 10 Normal Adequate Sufficient Active Yes
## 11 Normal Adequate Insufficient Active Yes
## 12 Normal Adequate Insufficient Active No
## 13 Normal Adequate Sufficient Sedentary No
## 14 Normal Adequate Sufficient Sedentary Yes
## 15 Normal Adequate Sufficient Sedentary Yes
## 16 Normal Adequate Sufficient Sedentary Yes
## 17 Normal Adequate Insufficient Sedentary Yes
## 18 Normal Adequate Sufficient Sedentary No
## 19 Normal Adequate Insufficient Active Yes
## 20 Normal Adequate Insufficient Active Yes
## 21 Normal Adequate Insufficient Active Yes
## 22 Normal Adequate Insufficient Sedentary Yes
## 23 Normal Adequate Insufficient Active No
## 24 Normal Adequate Sufficient Sedentary Yes
## 25 Normal Adequate Sufficient Active No
## 26 Normal Adequate Sufficient Active No
## 27 Normal Adequate Insufficient Sedentary No
## 28 Normal Adequate Insufficient Sedentary No
## 29 Normal Adequate Insufficient Sedentary No
## 30 Normal Adequate Sufficient Active No
## 31 Normal Adequate Insufficient Active Yes
## 32 Normal Adequate Insufficient Active No
## 33 Normal Adequate Insufficient Sedentary Yes
## 34 Normal Adequate Insufficient Active No
## 35 Normal Adequate Sufficient Sedentary Yes
## 36 Normal Adequate Sufficient Active Yes
## 37 Normal Adequate Insufficient Active Yes
## Alcohol.Consumption Medical.Conditions Medications Prior.Fractures
## 1 Moderate None None Yes
## 2 Moderate Hyperthyroidism None Yes
## 3 Moderate None None Yes
## 4 Moderate Hyperthyroidism None Yes
## 5 None None Corticosteroids No
## 6 None Hyperthyroidism None No
## 7 None None None Yes
## 8 Moderate Hyperthyroidism Corticosteroids Yes
## 9 None Rheumatoid Arthritis Corticosteroids Yes
## 10 None Hyperthyroidism Corticosteroids No
## 11 Moderate None None No
## 12 Moderate None Corticosteroids No
## 13 Moderate None None No
## 14 None Hyperthyroidism None Yes
## 15 None Hyperthyroidism None No
## 16 Moderate None None No
## 17 None None Corticosteroids No
## 18 None None None Yes
## 19 None Rheumatoid Arthritis Corticosteroids Yes
## 20 Moderate Rheumatoid Arthritis None No
## 21 None Rheumatoid Arthritis Corticosteroids Yes
## 22 None Hyperthyroidism Corticosteroids No
## 23 None Hyperthyroidism Corticosteroids Yes
## 24 Moderate Hyperthyroidism Corticosteroids Yes
## 25 Moderate None Corticosteroids Yes
## 26 None Hyperthyroidism None Yes
## 27 Moderate None None Yes
## 28 None Hyperthyroidism None Yes
## 29 Moderate None None No
## 30 Moderate None None Yes
## 31 Moderate None Corticosteroids Yes
## 32 None Hyperthyroidism Corticosteroids No
## 33 Moderate Hyperthyroidism None Yes
## 34 Moderate Rheumatoid Arthritis Corticosteroids No
## 35 None None None No
## 36 None Rheumatoid Arthritis None Yes
## 37 Moderate Rheumatoid Arthritis None No
## Osteoporosis Osteoporosis.And.Over.50 Risk_Factors
## 1 1 TRUE yes
## 2 1 TRUE yes
## 3 1 TRUE yes
## 4 1 TRUE yes
## 5 1 TRUE no
## 6 1 TRUE yes
## 7 1 TRUE yes
## 8 1 TRUE yes
## 9 1 TRUE yes
## 10 1 TRUE yes
## 11 1 TRUE no
## 12 1 TRUE no
## 13 1 TRUE no
## 14 1 TRUE yes
## 15 1 TRUE yes
## 16 1 TRUE no
## 17 1 TRUE no
## 18 1 TRUE yes
## 19 1 TRUE yes
## 20 1 TRUE yes
## 21 1 TRUE yes
## 22 1 TRUE yes
## 23 1 TRUE yes
## 24 1 TRUE yes
## 25 1 TRUE yes
## 26 1 TRUE yes
## 27 1 TRUE yes
## 28 1 TRUE yes
## 29 1 TRUE no
## 30 1 TRUE yes
## 31 1 TRUE yes
## 32 1 TRUE yes
## 33 1 TRUE yes
## 34 1 TRUE yes
## 35 1 TRUE no
## 36 1 TRUE yes
## 37 1 TRUE yes
raw_data |>
filter(Age>49 & Hormonal.Changes== "Normal" & Family.History=="No" & Body.Weight== "Normal" & Calcium.Intake== "Adequate", Osteoporosis==0)
## [1] Id Age Gender
## [4] Hormonal.Changes Family.History Race.Ethnicity
## [7] Body.Weight Calcium.Intake Vitamin.D.Intake
## [10] Physical.Activity Smoking Alcohol.Consumption
## [13] Medical.Conditions Medications Prior.Fractures
## [16] Osteoporosis Osteoporosis.And.Over.50 Risk_Factors
## <0 rows> (or 0-length row.names)
# Looking at the number of people with medical conditions and those on medication
ggplot(data= raw_data, aes(x= Medical.Conditions))+
geom_bar(
fill= "blue")+
theme_bw()

ggplot(data= raw_data, aes(x= Medications))+
geom_bar(
fill= "green")+
theme_bw()
