PKD1 <- read.csv("PKD1.csv")
PKD2 <- read.csv("PKD2.csv")



#Top mutations by clinical significance
PKD1_CS <- PKD1 %>% 
  select(Region, Codon, Amino.Acid.Change, Mutation.Type, Clinical.Significance) %>% 
  group_by(Clinical.Significance, Region) %>% 
  tally() %>% 
  filter(n!=1) %>% 
  filter(Clinical.Significance != "Likely Hypomorphic") %>% 
  filter(Clinical.Significance != "Indeterminate") %>% 
  arrange(desc(n)) %>% 
  top_n(5)
## Selecting by n
PKD1_CS$Region <- factor(PKD1_CS$Region, levels = 
                                PKD1_CS$Region[order(-PKD1_CS$n)])

a <- ggplot(PKD1_CS, aes( x = Region, y = n, fill=Clinical.Significance)) + 
  geom_bar(stat = "identity") + 
  labs(title = "Total Number of Mutations in PKD1 Gene by Exon", 
       y = "# of Mutations") + coord_flip()
ggplotly()
#PKD2
PKD2_CS <- PKD2 %>% 
  select(Region, Codon, Amino.Acid.Change, Mutation.Type, Clinical.Significance) %>% 
  group_by(Clinical.Significance, Region) %>% 
  tally() %>% 
  filter(n!=1) %>% 
  filter(Clinical.Significance != "Likely Hypomorphic") %>% 
  filter(Clinical.Significance != "Indeterminate") %>% 
  arrange(desc(n)) %>% 
  top_n(5)
## Selecting by n
PKD2_CS$Region <- factor(PKD2_CS$Region, levels = 
                           PKD2_CS$Region[order(-PKD2_CS$n)])

b <- ggplot(PKD2_CS, aes( x = Region, y = n, fill=Clinical.Significance)) + 
  geom_bar(stat = "identity") + 
  labs(title = "Total Number of Mutations in PKD2 Gene by Exon", 
       y = "# of Mutations") + coord_flip()
ggplotly()
#Mutation Type vs. Clinical Significance
PKD1_MT <- PKD1 %>% 
  select(Region, Codon, Amino.Acid.Change, Mutation.Type, Clinical.Significance) %>% 
  group_by(Clinical.Significance, Mutation.Type) %>% 
  tally() %>% 
  filter(n!=1) %>% 
  filter(Clinical.Significance != "Likely Hypomorphic") %>% 
  filter(Clinical.Significance != "Indeterminate") %>% 
  filter(Mutation.Type != "SYNONYMOUS") %>% 
  arrange(Clinical.Significance) %>% 
  arrange(desc(n)) 

PKD1_MT$Mutation.Type <- factor(PKD1_MT$Mutation.Type, levels = 
                           PKD1_MT$Mutation.Type[order(-PKD1_MT$n)])

c <- ggplot(PKD1_MT, aes(x = Mutation.Type, y = n, fill= Clinical.Significance)) + 
  geom_bar(stat = "Identity") + 
  labs(title = "Total Number of Mutations in PKD1 Gene by Mutation Type", 
       y = "# of Mutations", x = "Mutation Type") + coord_flip()
ggplotly()
PKD2_MT <- PKD2 %>% 
  select(Region, Codon, Amino.Acid.Change, Mutation.Type, Clinical.Significance) %>% 
  group_by(Clinical.Significance, Mutation.Type) %>% 
  tally() %>% 
  filter(n!=1) %>% 
  filter(Clinical.Significance != "Likely Hypomorphic") %>% 
  filter(Clinical.Significance != "Indeterminate") %>% 
  filter(Mutation.Type != "SYNONYMOUS") %>% 
  arrange(Clinical.Significance) %>% 
  arrange(desc(n)) 

PKD2_MT$Mutation.Type <- factor(PKD2_MT$Mutation.Type, levels = 
                                  PKD2_MT$Mutation.Type[order(-PKD2_MT$n)])

d <- ggplot(PKD2_MT, aes(x = Mutation.Type, y = n, fill= Clinical.Significance)) + 
  geom_bar(stat = "Identity") + 
  labs(title = "Total Number of Mutations in PKD2 Gene by Mutation Type", 
       y = "# of Mutations", x =  "Type of Mutation") + coord_flip()
ggplotly()
## Selecting by n

## Selecting by n