# Loading the packages used in this file

library(tidyverse)
## Warning: package 'tidyr' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)
# Obtaining the url file, converting it into a data frame, and getting an overview of the table 
url<- "https://raw.githubusercontent.com/ursulapodosenin/DAT-607/main/osteoporosis.csv"

raw_data<-read.csv(url(url))
raw_data<- data.frame(raw_data)
dplyr::glimpse(raw_data)
## Rows: 1,958
## Columns: 16
## $ Id                  <int> 1734616, 1419098, 1797916, 1805337, 1351334, 17993…
## $ Age                 <int> 69, 32, 89, 78, 38, 41, 20, 39, 70, 19, 47, 55, 19…
## $ Gender              <chr> "Female", "Female", "Female", "Female", "Male", "M…
## $ Hormonal.Changes    <chr> "Normal", "Normal", "Postmenopausal", "Normal", "P…
## $ Family.History      <chr> "Yes", "Yes", "No", "No", "Yes", "Yes", "Yes", "Ye…
## $ Race.Ethnicity      <chr> "Asian", "Asian", "Caucasian", "Caucasian", "Afric…
## $ Body.Weight         <chr> "Underweight", "Underweight", "Normal", "Underweig…
## $ Calcium.Intake      <chr> "Low", "Low", "Adequate", "Adequate", "Low", "Low"…
## $ Vitamin.D.Intake    <chr> "Sufficient", "Sufficient", "Sufficient", "Insuffi…
## $ Physical.Activity   <chr> "Sedentary", "Sedentary", "Active", "Sedentary", "…
## $ Smoking             <chr> "Yes", "No", "No", "Yes", "Yes", "Yes", "No", "No"…
## $ Alcohol.Consumption <chr> "Moderate", "None", "Moderate", "None", "None", "M…
## $ Medical.Conditions  <chr> "Rheumatoid Arthritis", "None", "Hyperthyroidism",…
## $ Medications         <chr> "Corticosteroids", "None", "Corticosteroids", "Cor…
## $ Prior.Fractures     <chr> "Yes", "Yes", "No", "No", "Yes", "Yes", "No", "Yes…
## $ Osteoporosis        <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
sum(is.na(raw_data))
## [1] 0
# Adding a column to raw data that pulls out people over 50 with osteoporosis and potential risk factors
raw_data<- raw_data |>
  mutate(Osteoporosis.And.Over.50= Age>49 & Osteoporosis== "1")

raw_data<- raw_data |> 
  mutate(Risk_Factors = ifelse(raw_data$Calcium.Intake == "Low" | Family.History == "Yes" | Medical.Conditions != "None" | Prior.Fractures== "Yes" , "yes", "no"))

# Filtering the number of people with Osteoporosis by ethnicity, activity level and hormonal changes
raw_data |>
  group_by(Race.Ethnicity)|>
  summarise(total_with_osteoperosis= sum(Osteoporosis==1))
## # A tibble: 3 × 2
##   Race.Ethnicity   total_with_osteoperosis
##   <chr>                              <int>
## 1 African American                     344
## 2 Asian                                314
## 3 Caucasian                            321
raw_data |>
  group_by(Physical.Activity)|>
  summarise(total_with_osteoperosis= sum(Osteoporosis==1))
## # A tibble: 2 × 2
##   Physical.Activity total_with_osteoperosis
##   <chr>                               <int>
## 1 Active                                501
## 2 Sedentary                             478
raw_data |>
  group_by(Gender, Hormonal.Changes)|>
  summarise(total_with_osteoperosis= sum(Osteoporosis==1))
## `summarise()` has grouped output by 'Gender'. You can override using the
## `.groups` argument.
## # A tibble: 4 × 3
## # Groups:   Gender [2]
##   Gender Hormonal.Changes total_with_osteoperosis
##   <chr>  <chr>                              <int>
## 1 Female Normal                               237
## 2 Female Postmenopausal                       240
## 3 Male   Normal                               246
## 4 Male   Postmenopausal                       256
# Filetring by some general conditions to see if there are any suggestive variables that may be linked to Osteoporosis
raw_data |> 
  filter(Age>49 & Hormonal.Changes== "Normal" & Family.History=="No" & Body.Weight== "Normal" & Calcium.Intake== "Adequate", Osteoporosis==1)
##         Id Age Gender Hormonal.Changes Family.History   Race.Ethnicity
## 1  1402680  88 Female           Normal             No            Asian
## 2  1220288  58   Male           Normal             No African American
## 3  1424153  51 Female           Normal             No            Asian
## 4  1341583  80   Male           Normal             No        Caucasian
## 5  1900138  59   Male           Normal             No        Caucasian
## 6  1695880  89 Female           Normal             No            Asian
## 7  1810732  63   Male           Normal             No        Caucasian
## 8  1766500  77 Female           Normal             No        Caucasian
## 9  1471008  75   Male           Normal             No            Asian
## 10 1719175  86 Female           Normal             No            Asian
## 11 1651578  52 Female           Normal             No            Asian
## 12 1219937  61   Male           Normal             No        Caucasian
## 13 1904175  63   Male           Normal             No            Asian
## 14 1719024  72   Male           Normal             No        Caucasian
## 15 1746159  70 Female           Normal             No            Asian
## 16 1609912  59 Female           Normal             No African American
## 17 1783672  83 Female           Normal             No African American
## 18 1121163  82   Male           Normal             No African American
## 19 1835582  62   Male           Normal             No            Asian
## 20 1164489  50 Female           Normal             No African American
## 21 1563838  66   Male           Normal             No            Asian
## 22 1169513  74 Female           Normal             No        Caucasian
## 23 1145799  52   Male           Normal             No            Asian
## 24 1322699  90   Male           Normal             No        Caucasian
## 25 1201953  55 Female           Normal             No        Caucasian
## 26 1104327  68   Male           Normal             No        Caucasian
## 27 1531292  78   Male           Normal             No            Asian
## 28 1100709  81   Male           Normal             No African American
## 29 1762519  81 Female           Normal             No        Caucasian
## 30 1947977  79   Male           Normal             No        Caucasian
## 31 1351055  79   Male           Normal             No African American
## 32 1203598  74 Female           Normal             No African American
## 33 1358700  90   Male           Normal             No African American
## 34 1764469  68 Female           Normal             No African American
## 35 1823633  87 Female           Normal             No            Asian
## 36 1585642  68   Male           Normal             No            Asian
## 37 1485095  69   Male           Normal             No        Caucasian
##    Body.Weight Calcium.Intake Vitamin.D.Intake Physical.Activity Smoking
## 1       Normal       Adequate       Sufficient            Active      No
## 2       Normal       Adequate     Insufficient         Sedentary     Yes
## 3       Normal       Adequate       Sufficient         Sedentary     Yes
## 4       Normal       Adequate       Sufficient         Sedentary     Yes
## 5       Normal       Adequate       Sufficient            Active      No
## 6       Normal       Adequate     Insufficient         Sedentary     Yes
## 7       Normal       Adequate       Sufficient         Sedentary      No
## 8       Normal       Adequate     Insufficient            Active     Yes
## 9       Normal       Adequate       Sufficient            Active     Yes
## 10      Normal       Adequate       Sufficient            Active     Yes
## 11      Normal       Adequate     Insufficient            Active     Yes
## 12      Normal       Adequate     Insufficient            Active      No
## 13      Normal       Adequate       Sufficient         Sedentary      No
## 14      Normal       Adequate       Sufficient         Sedentary     Yes
## 15      Normal       Adequate       Sufficient         Sedentary     Yes
## 16      Normal       Adequate       Sufficient         Sedentary     Yes
## 17      Normal       Adequate     Insufficient         Sedentary     Yes
## 18      Normal       Adequate       Sufficient         Sedentary      No
## 19      Normal       Adequate     Insufficient            Active     Yes
## 20      Normal       Adequate     Insufficient            Active     Yes
## 21      Normal       Adequate     Insufficient            Active     Yes
## 22      Normal       Adequate     Insufficient         Sedentary     Yes
## 23      Normal       Adequate     Insufficient            Active      No
## 24      Normal       Adequate       Sufficient         Sedentary     Yes
## 25      Normal       Adequate       Sufficient            Active      No
## 26      Normal       Adequate       Sufficient            Active      No
## 27      Normal       Adequate     Insufficient         Sedentary      No
## 28      Normal       Adequate     Insufficient         Sedentary      No
## 29      Normal       Adequate     Insufficient         Sedentary      No
## 30      Normal       Adequate       Sufficient            Active      No
## 31      Normal       Adequate     Insufficient            Active     Yes
## 32      Normal       Adequate     Insufficient            Active      No
## 33      Normal       Adequate     Insufficient         Sedentary     Yes
## 34      Normal       Adequate     Insufficient            Active      No
## 35      Normal       Adequate       Sufficient         Sedentary     Yes
## 36      Normal       Adequate       Sufficient            Active     Yes
## 37      Normal       Adequate     Insufficient            Active     Yes
##    Alcohol.Consumption   Medical.Conditions     Medications Prior.Fractures
## 1             Moderate                 None            None             Yes
## 2             Moderate      Hyperthyroidism            None             Yes
## 3             Moderate                 None            None             Yes
## 4             Moderate      Hyperthyroidism            None             Yes
## 5                 None                 None Corticosteroids              No
## 6                 None      Hyperthyroidism            None              No
## 7                 None                 None            None             Yes
## 8             Moderate      Hyperthyroidism Corticosteroids             Yes
## 9                 None Rheumatoid Arthritis Corticosteroids             Yes
## 10                None      Hyperthyroidism Corticosteroids              No
## 11            Moderate                 None            None              No
## 12            Moderate                 None Corticosteroids              No
## 13            Moderate                 None            None              No
## 14                None      Hyperthyroidism            None             Yes
## 15                None      Hyperthyroidism            None              No
## 16            Moderate                 None            None              No
## 17                None                 None Corticosteroids              No
## 18                None                 None            None             Yes
## 19                None Rheumatoid Arthritis Corticosteroids             Yes
## 20            Moderate Rheumatoid Arthritis            None              No
## 21                None Rheumatoid Arthritis Corticosteroids             Yes
## 22                None      Hyperthyroidism Corticosteroids              No
## 23                None      Hyperthyroidism Corticosteroids             Yes
## 24            Moderate      Hyperthyroidism Corticosteroids             Yes
## 25            Moderate                 None Corticosteroids             Yes
## 26                None      Hyperthyroidism            None             Yes
## 27            Moderate                 None            None             Yes
## 28                None      Hyperthyroidism            None             Yes
## 29            Moderate                 None            None              No
## 30            Moderate                 None            None             Yes
## 31            Moderate                 None Corticosteroids             Yes
## 32                None      Hyperthyroidism Corticosteroids              No
## 33            Moderate      Hyperthyroidism            None             Yes
## 34            Moderate Rheumatoid Arthritis Corticosteroids              No
## 35                None                 None            None              No
## 36                None Rheumatoid Arthritis            None             Yes
## 37            Moderate Rheumatoid Arthritis            None              No
##    Osteoporosis Osteoporosis.And.Over.50 Risk_Factors
## 1             1                     TRUE          yes
## 2             1                     TRUE          yes
## 3             1                     TRUE          yes
## 4             1                     TRUE          yes
## 5             1                     TRUE           no
## 6             1                     TRUE          yes
## 7             1                     TRUE          yes
## 8             1                     TRUE          yes
## 9             1                     TRUE          yes
## 10            1                     TRUE          yes
## 11            1                     TRUE           no
## 12            1                     TRUE           no
## 13            1                     TRUE           no
## 14            1                     TRUE          yes
## 15            1                     TRUE          yes
## 16            1                     TRUE           no
## 17            1                     TRUE           no
## 18            1                     TRUE          yes
## 19            1                     TRUE          yes
## 20            1                     TRUE          yes
## 21            1                     TRUE          yes
## 22            1                     TRUE          yes
## 23            1                     TRUE          yes
## 24            1                     TRUE          yes
## 25            1                     TRUE          yes
## 26            1                     TRUE          yes
## 27            1                     TRUE          yes
## 28            1                     TRUE          yes
## 29            1                     TRUE           no
## 30            1                     TRUE          yes
## 31            1                     TRUE          yes
## 32            1                     TRUE          yes
## 33            1                     TRUE          yes
## 34            1                     TRUE          yes
## 35            1                     TRUE           no
## 36            1                     TRUE          yes
## 37            1                     TRUE          yes
raw_data |> 
  filter(Age>49 & Hormonal.Changes== "Normal" & Family.History=="No" & Body.Weight== "Normal" & Calcium.Intake== "Adequate", Osteoporosis==0)
##  [1] Id                       Age                      Gender                  
##  [4] Hormonal.Changes         Family.History           Race.Ethnicity          
##  [7] Body.Weight              Calcium.Intake           Vitamin.D.Intake        
## [10] Physical.Activity        Smoking                  Alcohol.Consumption     
## [13] Medical.Conditions       Medications              Prior.Fractures         
## [16] Osteoporosis             Osteoporosis.And.Over.50 Risk_Factors            
## <0 rows> (or 0-length row.names)
# Looking at the number of people with medical conditions and those on medication

ggplot(data= raw_data, aes(x= Medical.Conditions))+
  geom_bar(
  fill= "blue")+
  theme_bw()

ggplot(data= raw_data, aes(x= Medications))+
  geom_bar(
  fill= "green")+
  theme_bw()