#install.packages("MASS")
#install.packages("survival")
#??Rcmdr
# a.)
library(readr)
## Warning: package 'readr' was built under R version 4.5.1
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.5.1
## Warning: package 'ggplot2' was built under R version 4.5.1
## Warning: package 'tibble' was built under R version 4.5.1
## Warning: package 'tidyr' was built under R version 4.5.1
## Warning: package 'purrr' was built under R version 4.5.1
## Warning: package 'dplyr' was built under R version 4.5.1
## Warning: package 'forcats' was built under R version 4.5.1
## Warning: package 'lubridate' was built under R version 4.5.1
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ purrr 1.0.4
## ✔ forcats 1.0.1 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
## Warning: package 'readxl' was built under R version 4.5.1
df<-read_excel("BEST.xlsx")
summary(df$PERCENTAGE.EXPOSED.BASQUE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 20.00 30.00 32.72 40.00 90.00
summary(df$PERCENTAGE.EXPOSED.ENGLISH)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 10.00 10.00 11.15 10.00 70.00
summary(df$PERCENTAGE.EXPOSED.SPANISH)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10.00 40.00 60.00 55.45 70.00 100.00
#b.)
df=read.csv("BEST.csv")
fivenum(df$PERCENTAGE.EXPOSED.BASQUE)
## [1] 0 20 30 40 90
fivenum(df$PERCENTAGE.EXPOSED.ENGLISH)
## [1] 0 10 10 10 70
fivenum(df$PERCENTAGE.EXPOSED.SPANISH)
## [1] 10 40 60 70 100
Note: I had trouble loading the function for importing text files
#install.packages("vioplot")
library(MASS)
## Warning: package 'MASS' was built under R version 4.5.1
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(vioplot)
## Warning: package 'vioplot' was built under R version 4.5.1
## Loading required package: sm
## Warning: package 'sm' was built under R version 4.5.1
## Package 'sm', version 2.2-6.0: type help(sm) for summary information
##
## Attaching package: 'sm'
## The following object is masked from 'package:MASS':
##
## muscle
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.5.1
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(datasets)
data("anorexia")
class(anorexia$Treat)
## [1] "factor"
class(anorexia$Prewt)
## [1] "numeric"
class(anorexia$Postwt)
## [1] "numeric"
any(is.na(anorexia$Treat))
## [1] FALSE
any(is.na(anorexia$Prewt))
## [1] FALSE
any(is.na(anorexia$Postwt))
## [1] FALSE
sapply(anorexia,is.null)
## Treat Prewt Postwt
## FALSE FALSE FALSE
anorexia$Treat<-factor(anorexia$Treat, levels=c("CBT","Cont","FT"), labels=c("Cogn Beh Tr","Contr","Fam Tr"))
# OR like this:
levels(anorexia$Treat)
## [1] "Cogn Beh Tr" "Contr" "Fam Tr"
levels(anorexia$Treat)<-c("Cogn Bh Tr","Contr", "Fam Tr")
anorexia$Treat <- fct_recode(anorexia$Treat,
"Cogn Beh Tr"="CBT",
"Contr"="Cont",
"Fam Tr"="FT")
## Warning: Unknown levels in `f`: CBT, Cont, FT
#a.)
data("biopsy")
write.csv(biopsy, "biopsy.csv")
#b.)
data("Melanoma")
write.table(Melanoma, "melanoma.txt",sep="")
write.table(Melanoma, "C:/Users/atoth/Documents/Courses/Software para el analísis de datos/Activity1/Files/melanoma.txt", sep="")
#install.packages("xlsx")
#library(rJava)
#library(xlsx)
#write.xlsx(Melanoma, "C:/Users/atoth/Documents/Courses/Software para el analísis de datos/Activity1/Files/melanoma.xlsx")
#c.)
summary(Melanoma$age)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.00 42.00 54.00 52.46 65.00 95.00
capture.output(summary(Melanoma$age), file="summary_mel_age.doc")
#d.) To fit a bit more to my field I downoaded an open access dataset from here: https://figshare.com/s/2b377367585a7e5353fb
df=read_excel("BEST.xlsx")
Note to b.) : I could not use the write.xlsx function because I do not have the appropriate Java version installed. And since I am with my work computer I can’t install programs without an administrator.
data("birthwt")
max(birthwt$age) #a.) 45
## [1] 45
min(birthwt$age) #b.) 14
## [1] 14
max(birthwt$age)-min(birthwt$age) #c.) 31
## [1] 31
birthwt$smoke[birthwt$bwt==min(birthwt$bwt)] #d.) Yes, the mother smoked
## [1] 1
birthwt$bwt[birthwt$age==max(birthwt$age)] #e.) 4990g
## [1] 4990
#f.)
birthwt$bwt[birthwt$ftv<2]
## [1] 2523 2557 2600 2622 2637 2637 2663 2665 2722 2733 2751 2769 2769 2778 2807
## [16] 2821 2836 2863 2877 2906 2920 2920 2920 2948 2948 2977 2977 2922 3033 3062
## [31] 3062 3062 3062 3090 3090 3100 3104 3132 3175 3175 3203 3203 3203 3225 3225
## [46] 3232 3234 3260 3274 3317 3317 3331 3374 3374 3402 3416 3444 3459 3460 3473
## [61] 3544 3487 3544 3572 3572 3586 3600 3614 3614 3629 3637 3643 3651 3651 3651
## [76] 3651 3699 3728 3756 3770 3770 3770 3790 3799 3827 3884 3912 3940 3941 3941
## [91] 3969 3997 3997 4054 4054 4111 4174 4238 4593 4990 709 1135 1330 1474 1588
## [106] 1588 1701 1729 1790 1818 1885 1893 1899 1928 1936 1970 2055 2055 2084 2084
## [121] 2100 2125 2187 2187 2211 2225 2240 2240 2282 2296 2296 2325 2353 2353 2367
## [136] 2381 2381 2381 2410 2410 2410 2424 2442 2466 2466 2495 2495
data("anorexia")
matrix(c(anorexia$Prewt, anorexia$Postwt), c(length(anorexia$Prewt),2))
## [,1] [,2]
## [1,] 80.7 80.2
## [2,] 89.4 80.1
## [3,] 91.8 86.4
## [4,] 74.0 86.3
## [5,] 78.1 76.1
## [6,] 88.3 78.1
## [7,] 87.3 75.1
## [8,] 75.1 86.7
## [9,] 80.6 73.5
## [10,] 78.4 84.6
## [11,] 77.6 77.4
## [12,] 88.7 79.5
## [13,] 81.3 89.6
## [14,] 78.1 81.4
## [15,] 70.5 81.8
## [16,] 77.3 77.3
## [17,] 85.2 84.2
## [18,] 86.0 75.4
## [19,] 84.1 79.5
## [20,] 79.7 73.0
## [21,] 85.5 88.3
## [22,] 84.4 84.7
## [23,] 79.6 81.4
## [24,] 77.5 81.2
## [25,] 72.3 88.2
## [26,] 89.0 78.8
## [27,] 80.5 82.2
## [28,] 84.9 85.6
## [29,] 81.5 81.4
## [30,] 82.6 81.9
## [31,] 79.9 76.4
## [32,] 88.7 103.6
## [33,] 94.9 98.4
## [34,] 76.3 93.4
## [35,] 81.0 73.4
## [36,] 80.5 82.1
## [37,] 85.0 96.7
## [38,] 89.2 95.3
## [39,] 81.3 82.4
## [40,] 76.5 72.5
## [41,] 70.0 90.9
## [42,] 80.4 71.3
## [43,] 83.3 85.4
## [44,] 83.0 81.6
## [45,] 87.7 89.1
## [46,] 84.2 83.9
## [47,] 86.4 82.7
## [48,] 76.5 75.7
## [49,] 80.2 82.6
## [50,] 87.8 100.4
## [51,] 83.3 85.2
## [52,] 79.7 83.6
## [53,] 84.5 84.6
## [54,] 80.8 96.2
## [55,] 87.4 86.7
## [56,] 83.8 95.2
## [57,] 83.3 94.3
## [58,] 86.0 91.5
## [59,] 82.5 91.9
## [60,] 86.7 100.3
## [61,] 79.6 76.7
## [62,] 76.9 76.8
## [63,] 94.2 101.6
## [64,] 73.4 94.9
## [65,] 80.5 75.2
## [66,] 81.6 77.8
## [67,] 82.1 95.5
## [68,] 77.6 90.7
## [69,] 83.5 92.5
## [70,] 89.9 93.8
## [71,] 86.0 91.7
## [72,] 87.3 98.0
Identificador <-
c("I1","I2","I3","I4","I5","I6","I7","I8","I9","I10","I11","I12","I13","I14",
"I15","I16","I17","I18","I19","I20","I21","I22","I23","I24","I25")
Edad <-
c(23,24,21,22,23,25,26,24,21,22,23,25,26,24,22,21,25,26,24,21,25,27,26,22,29)
Sexo <-c(1,2,1,1,1,2,2,2,1,2,1,2,2,2,1,1,1,2,2,2,1,2,1,1,2) #1 para mujeres y 2 para hombres
Peso <-
c(76.5,81.2,79.3,59.5,67.3,78.6,67.9,100.2,97.8,56.4,65.4,67.5,87.4,99.7,87.6
,93.4,65.4,73.7,85.1,61.2,54.8,103.4,65.8,71.7,85.0)
Alt <-
c(165,154,178,165,164,175,182,165,178,165,158,183,184,164,189,167,182,179,165
,158,183,184,189,166,175) #altura en cm
Fuma <-
c("SÍ","NO","SÍ","SÍ","NO","NO","NO","SÍ","SÍ","SÍ","NO","NO","SÍ","SÍ","SÍ",
"SÍ","NO","NO","SÍ","SÍ","SÍ","NO","SÍ","NO","SÍ")
Trat_Pulmon <- data.frame(Identificador,Edad,Sexo,Peso,Alt,Fuma)
Trat_Pulmon
## Identificador Edad Sexo Peso Alt Fuma
## 1 I1 23 1 76.5 165 SÍ
## 2 I2 24 2 81.2 154 NO
## 3 I3 21 1 79.3 178 SÍ
## 4 I4 22 1 59.5 165 SÍ
## 5 I5 23 1 67.3 164 NO
## 6 I6 25 2 78.6 175 NO
## 7 I7 26 2 67.9 182 NO
## 8 I8 24 2 100.2 165 SÍ
## 9 I9 21 1 97.8 178 SÍ
## 10 I10 22 2 56.4 165 SÍ
## 11 I11 23 1 65.4 158 NO
## 12 I12 25 2 67.5 183 NO
## 13 I13 26 2 87.4 184 SÍ
## 14 I14 24 2 99.7 164 SÍ
## 15 I15 22 1 87.6 189 SÍ
## 16 I16 21 1 93.4 167 SÍ
## 17 I17 25 1 65.4 182 NO
## 18 I18 26 2 73.7 179 NO
## 19 I19 24 2 85.1 165 SÍ
## 20 I20 21 2 61.2 158 SÍ
## 21 I21 25 1 54.8 183 SÍ
## 22 I22 27 2 103.4 184 NO
## 23 I23 26 1 65.8 189 SÍ
## 24 I24 22 1 71.7 166 NO
## 25 I25 29 2 85.0 175 SÍ
#a.)
Trat_Pulmon[Trat_Pulmon$Edad>22,]
## Identificador Edad Sexo Peso Alt Fuma
## 1 I1 23 1 76.5 165 SÍ
## 2 I2 24 2 81.2 154 NO
## 5 I5 23 1 67.3 164 NO
## 6 I6 25 2 78.6 175 NO
## 7 I7 26 2 67.9 182 NO
## 8 I8 24 2 100.2 165 SÍ
## 11 I11 23 1 65.4 158 NO
## 12 I12 25 2 67.5 183 NO
## 13 I13 26 2 87.4 184 SÍ
## 14 I14 24 2 99.7 164 SÍ
## 17 I17 25 1 65.4 182 NO
## 18 I18 26 2 73.7 179 NO
## 19 I19 24 2 85.1 165 SÍ
## 21 I21 25 1 54.8 183 SÍ
## 22 I22 27 2 103.4 184 NO
## 23 I23 26 1 65.8 189 SÍ
## 25 I25 29 2 85.0 175 SÍ
#OR
subset(Trat_Pulmon, Edad>22)
## Identificador Edad Sexo Peso Alt Fuma
## 1 I1 23 1 76.5 165 SÍ
## 2 I2 24 2 81.2 154 NO
## 5 I5 23 1 67.3 164 NO
## 6 I6 25 2 78.6 175 NO
## 7 I7 26 2 67.9 182 NO
## 8 I8 24 2 100.2 165 SÍ
## 11 I11 23 1 65.4 158 NO
## 12 I12 25 2 67.5 183 NO
## 13 I13 26 2 87.4 184 SÍ
## 14 I14 24 2 99.7 164 SÍ
## 17 I17 25 1 65.4 182 NO
## 18 I18 26 2 73.7 179 NO
## 19 I19 24 2 85.1 165 SÍ
## 21 I21 25 1 54.8 183 SÍ
## 22 I22 27 2 103.4 184 NO
## 23 I23 26 1 65.8 189 SÍ
## 25 I25 29 2 85.0 175 SÍ
#b.)
Trat_Pulmon[3,4]
## [1] 79.3
#c.)
subset(Trat_Pulmon, Edad<27, select=-c(Alt))
## Identificador Edad Sexo Peso Fuma
## 1 I1 23 1 76.5 SÍ
## 2 I2 24 2 81.2 NO
## 3 I3 21 1 79.3 SÍ
## 4 I4 22 1 59.5 SÍ
## 5 I5 23 1 67.3 NO
## 6 I6 25 2 78.6 NO
## 7 I7 26 2 67.9 NO
## 8 I8 24 2 100.2 SÍ
## 9 I9 21 1 97.8 SÍ
## 10 I10 22 2 56.4 SÍ
## 11 I11 23 1 65.4 NO
## 12 I12 25 2 67.5 NO
## 13 I13 26 2 87.4 SÍ
## 14 I14 24 2 99.7 SÍ
## 15 I15 22 1 87.6 SÍ
## 16 I16 21 1 93.4 SÍ
## 17 I17 25 1 65.4 NO
## 18 I18 26 2 73.7 NO
## 19 I19 24 2 85.1 SÍ
## 20 I20 21 2 61.2 SÍ
## 21 I21 25 1 54.8 SÍ
## 23 I23 26 1 65.8 SÍ
## 24 I24 22 1 71.7 NO
#a.)
data("ChickWeight")
#b.)
plot(ChickWeight$weight, col=blues9)
#c.)
boxplot(ChickWeight$Time)
data(anorexia)
anorexia_treat_df<-anorexia
anorexia_treat_df$Diffwt<-anorexia_treat_df$Prewt-anorexia_treat_df$Postwt
tapply(anorexia_treat_df$Diffwt, anorexia_treat_df$Treat, mean)
## CBT Cont FT
## -3.006897 0.450000 -7.264706
anorexia_treat_df_gan<-anorexia_treat_df[anorexia_treat_df$Diffwt<0,]
anorexia_treat_C_df<-subset(anorexia_treat_df_gan, anorexia_treat_df_gan$Treat=="Cont")
#OR
anorexia_treat_C_df2<-subset(anorexia_treat_df, anorexia_treat_df$Treat=="Cont" & anorexia_treat_df$Diffwt<0)
c.) [Link to RPubs page] (https://rpubs.com/talaura)
#Caso practico
#a.)
set.seed(1)
ID<-1:30
Edad<-round(rnorm(30,mean=45,sd=10))
gender<-c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2)
Gene<-sample(gender, 30, replace=FALSE)
table(Gene)
## Gene
## 1 2
## 15 15
Treat<-c("A","B","C")
Trat<-sample(Treat, 30, replace=TRUE)
Peso<-round(rnorm(30, mean=70, sd=25))
Alt<-round(rnorm(30, mean=175, sd=15))
df<-data.frame(ID,Edad,Gene,Trat,Peso,Alt)
#b.)
tapply(Peso,Gene, summary)
## $`1`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 34.0 56.5 70.0 69.2 85.5 102.0
##
## $`2`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 47.00 63.00 74.00 76.53 96.50 106.00
#c.)
df$IMC<-df$Peso/(df$Alt/100)
#d.)
Df_Hombres<-subset(df,Gene==2)
Df_Mujeres<-subset(df,Gene==1)
#e.)
df2<-rbind(Df_Hombres, Df_Mujeres)