myGex <- read.table("ENCFF166SFX.tsv", header = TRUE)
ggplot(myGex,aes(x=length)) +
geom_histogram(binwidth=50)

normalLength <- myGex |>
filter(length < 10000)
ggplot(normalLength, aes(x=length)) +
geom_histogram(binwidth = 1000)

hiEx <- myGex |>
filter(FPKM > 5000)
ggplot(hiEx,aes(x=gene_id, y=FPKM, fill=FPKM)) +
geom_col() +
theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust = 1))

reg <- lm(formula = TPM ~ FPKM,
data=normalLength)
coeff<-coefficients(reg)
int1<-coeff[1]
slo<-coeff[2]
ggplot(normalLength,aes(x=FPKM, y=TPM))+
geom_point(position = 'jitter', alpha=0.5)+
geom_abline(intercept = int1, slope = slo, color="violet", linewidth=2, alpha=0.1)

reg<-lm(formula = expected_count ~ FPKM,
data = normalLength)
coeff<-coefficients(reg)
int1<-coeff[1]
slo<-coeff[2]
ggplot(normalLength,aes(x=FPKM, y=expected_count))+
geom_point(position = 'jitter', alpha=0.5)+
geom_abline(intercept = int1, slope = slo, color="magenta", linewidth=2, alpha=0.1)
