Assignment-1

library(survival)
library(ggpubr)
## Loading required package: ggplot2
library(survminer)
## 
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
## 
##     myeloma
library(gtsummary)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(broom)
library(crosstable)
## 
## Attaching package: 'crosstable'
## The following object is masked from 'package:gtsummary':
## 
##     as_gt
# Load the package
library(survival)
# Load the ovarian dataset
data("ovarian")
## Warning in data("ovarian"): data set 'ovarian' not found
# View the dataset
head(ovarian)
##   futime fustat     age resid.ds rx ecog.ps
## 1     59      1 72.3315        2  1       1
## 2    115      1 74.4932        2  1       1
## 3    156      1 66.4658        2  1       2
## 4    421      0 53.3644        2  2       1
## 5    431      1 50.3397        2  1       1
## 6    448      0 56.4301        1  1       2

##Variable Types:

  1. futime → Survival time (days)

Type: Quantitative, Continuous (numeric)

  1. fustat → Status (1 = died, 0 = censored)

Type: Qualitative, Binary (categorical, nominal)

  1. age → Age in years

Type: Quantitative, Continuous (numeric)

  1. resid.ds → Residual disease

Codes: 0 = none, 1 = minimal, 2 = large

Type: Qualitative, Ordinal (ordered categories)

  1. rx → Treatment group (1 or 2)

Type: Qualitative, Nominal (categorical)

  1. ecog.ps → ECOG performance status

Codes: 0 = good, 1 = moderate, 2 = poor

Type: Qualitative, Ordinal

str(ovarian)
## 'data.frame':    26 obs. of  6 variables:
##  $ futime  : num  59 115 156 421 431 448 464 475 477 563 ...
##  $ fustat  : num  1 1 1 0 1 0 1 1 0 1 ...
##  $ age     : num  72.3 74.5 66.5 53.4 50.3 ...
##  $ resid.ds: num  2 2 2 2 2 1 2 2 2 1 ...
##  $ rx      : num  1 1 1 2 1 1 2 2 1 2 ...
##  $ ecog.ps : num  1 1 2 1 1 2 2 2 1 2 ...

##Interpretetion: ##Types of Variables are given below:Analyzing multiple variables here (e.g., age + rx + ecog.ps in Cox regression). 1. futime : survival time (days)

  1. fustat: censoring status (1 = dead, 0 = alive)

  2. age: patient age

  3. resid.ds: residual disease (0 = no, 1 = yes)

  4. rx: treatment group (1 or 2)

  5. ecog.ps: performance status

head(ovarian)
##   futime fustat     age resid.ds rx ecog.ps
## 1     59      1 72.3315        2  1       1
## 2    115      1 74.4932        2  1       1
## 3    156      1 66.4658        2  1       2
## 4    421      0 53.3644        2  2       1
## 5    431      1 50.3397        2  1       1
## 6    448      0 56.4301        1  1       2
summary(ovarian)
##      futime           fustat            age           resid.ds    
##  Min.   :  59.0   Min.   :0.0000   Min.   :38.89   Min.   :1.000  
##  1st Qu.: 368.0   1st Qu.:0.0000   1st Qu.:50.17   1st Qu.:1.000  
##  Median : 476.0   Median :0.0000   Median :56.85   Median :2.000  
##  Mean   : 599.5   Mean   :0.4615   Mean   :56.17   Mean   :1.577  
##  3rd Qu.: 794.8   3rd Qu.:1.0000   3rd Qu.:62.38   3rd Qu.:2.000  
##  Max.   :1227.0   Max.   :1.0000   Max.   :74.50   Max.   :2.000  
##        rx         ecog.ps     
##  Min.   :1.0   Min.   :1.000  
##  1st Qu.:1.0   1st Qu.:1.000  
##  Median :1.5   Median :1.000  
##  Mean   :1.5   Mean   :1.462  
##  3rd Qu.:2.0   3rd Qu.:2.000  
##  Max.   :2.0   Max.   :2.000

##Univariate ## Extract age as univariate dataset ## See summary ### Histogram for distribution

ovarian_age <- ovarian$age
summary(ovarian_age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   38.89   50.17   56.85   56.17   62.38   74.50
hist(ovarian_age, main = "Distribution of Age in Ovarian Cancer Patients", 
     xlab = "Age", col = "lightblue")

##interpretetion ##Variable: Univariable,analyzing one variable at a time- Only age