#UpdateR
install.packages("installr")
library(installr)
updater()

#CDD - RNA-Seq Volcano Plots of BIHi264-A, 263-A, 272-A, 273-A, 005-A) - Figure 1C----
library(ggplot2)
library(ggrepel)
library(ggpubr) #warning message: ggpubr wurde unter R Version 4.1.3 erstellt
library(readxl)
library(writexl)
library(tidyverse) # includes ggplot2, for data visualisation. dplyr, for data manipulation.
library(RColorBrewer) # for a colourful plot
library(ggrepel) # for nice annotations
library(dplyr)

#Import data for Volcano Plots
setwd("C:/Users/Christian Schinke/Desktop/R-Kurs_Timeline_Project/202501_Revision")

#Read xlsx
RNA_All <- read_excel(path = "00_202501_All_lines_contrasts_full.PTX_vs_DMSO_ID0.xlsx") #Includes pooled RNA Seq data of all cell lines (BIHi263-A, BIHi264-A, BIHi272-A, BIHi273-A, BIHi005-A) 
RNA_All <- as.data.frame(RNA_All)
RNA_All <- RNA_All %>% filter(!is.na(padj))
RNA_All <- RNA_All %>% arrange(desc(abs(log2FoldChange)))

#Define up- and downregulated as well as n.s. significant differential regulated genes
RNA_All$diffexpr <- "NO"
RNA_All$diffexpr[RNA_All$log2FoldChange >= 0.5 & RNA_All$padj < 0.05] <- "UP"
RNA_All$diffexpr[RNA_All$log2FoldChange <= -0.5 & RNA_All$padj < 0.05] <- "DOWN"

#Label genes
RNA_All$Labels <- NA #Add column named "Labels" with NA values
RNA_All$Labels[RNA_All$diffexpr != "NO"] <- RNA_All$symbol[RNA_All$diffexpr != "NO"]

#VolcanoPlots
#All_Cell_Lines
ggplot(data = RNA_All, aes(x = log2FoldChange, y = -log10(padj), col = diffexpr, label = Labels)) +
  geom_point(size = 2, alpha = 0.8) +
  geom_text_repel(max.overlaps = 15, size = 4, force = 1.2) + #organize labels nicely with ggrepel --> geom_text_repel() 
  scale_color_manual(values = c("darkblue", "black", "darkred")) + #define color scheme
  geom_vline(xintercept = 0, col = "black", lty = 2) + 
  geom_hline(yintercept = -log10(0.05), col = "black", lty = 4) + 
  xlim(-2.5, 5) +
  theme(text = element_text(size = 12))

ggsave("Lines263-A_264-A_272-A_273_A_005-A_Volcano_p0.05.tiff", width = 8, height = 8, dpi = 600)

#CDD - Venn-Diagram and intersect tables of all Cell Lines BIHi263-A, 264-A, 272-A, 273-A, 005-A - Figure 3 ----

#Load library
library(readxl)
library(writexl)
library(tidyverse)
library(ggVennDiagram)
library(gplots)

#Import data for Venn Diagram
setwd("C:/Users/Christian Schinke/Desktop/R-Kurs_Timeline_Project/202501_Revision")

RNA_All <- read_excel(path = "00_202501_All_lines_contrasts_full.PTX_vs_DMSO_ID0.xlsx")

RNA_263A <- read_excel(path = "contrasts_full.Paclitaxel_263a_ID7.xlsx") #RNA Seq data of BIHi263-A
RNA_263A <- as.data.frame(RNA_263A)
RNA_263A <- RNA_263A %>% filter(!is.na(padj))

RNA_264A <- read_excel(path = "05_contrasts_full.48h_PTX_vs_DMSO_ID5.xlsx") #RNA Seq data of BIHi264-A
RNA_264A <- as.data.frame(RNA_264A)
RNA_264A <- RNA_264A %>% filter(!is.na(padj))

RNA_272A <- read_excel(path = "contrasts_full.Paclitaxel_272a_ID8.xlsx") #RNA Seq data of BIHi272-A
RNA_272A <- as.data.frame(RNA_272A)
RNA_272A <- RNA_272A %>% filter(!is.na(padj))

RNA_273A <- read_excel(path = "contrasts_full.Paclitaxel_273a_ID6.xlsx") #RNA Seq data of BIHi273-A
RNA_273A <- as.data.frame(RNA_273A)
RNA_273A <- RNA_273A %>% filter(!is.na(padj))

RNA_005A <- read_excel(path = "contrasts_full.PTX_vs_VEH_ID0_005a.xlsx") #RNA Seq data of BIHi005-A
RNA_005A <- as.data.frame(RNA_005A)
RNA_005A <- RNA_005A %>% filter(!is.na(padj))

#Tidy up data
V263a <- subset(RNA_263A, RNA_263A$padj < 0.05 & abs(RNA_263A$log2FoldChange) > 0)
V264a <- subset(RNA_264A, RNA_264A$padj < 0.05 & abs(RNA_264A$log2FoldChange) > 0)
V272a <- subset(RNA_272A, RNA_272A$padj < 0.05 & abs(RNA_272A$log2FoldChange) > 0)
V273a <- subset(RNA_273A, RNA_273A$padj < 0.05 & abs(RNA_273A$log2FoldChange) > 0)
V005a <- subset(RNA_005A, RNA_005A$padj < 0.05 & abs(RNA_005A$log2FoldChange) > 0)

# Venn 263-A, 264-A, 272-A, 273-A, 005-A
ggVennDiagram(list(V263a$gene_id, V264a$gene_id, V272a$gene_id, V273a$gene_id, V005a$gene_id),
              category.names = c("BIHi263-A","BIHi264-A", "BIHi272-A", "BIHi273-A", "BIHi005-A"),
              label_alpha = 0,
              label = c("count"),
              set_color = "black",
              set_size = 3.5,
              label_size = 6,
              label_geom = c("label"),
              label_color = "black", 
              edge_size = 0.2,
              edge_lty = "solid") +
    ggplot2::scale_fill_gradient(low = "beige", high = "#94D2BD")

ggsave("Venn_Diagram_263a_264a_272a_273a_005-a_p0.05.tiff", width = 8, height = 8, dpi = 600)

#Create table with intersects
# Load necessary libraries
library(dplyr)
library(purrr)

# Load the package after installation
library(dplyr)
library(purrr)
library(writexl)

# Create a list of gene sets
gene_lists <- list(
  "BIHi263-A" = V263a$gene_id,
  "BIHi264-A" = V264a$gene_id,
  "BIHi272-A" = V272a$gene_id,
  "BIHi273-A" = V273a$gene_id,
  "BIHi005-A" = V005a$gene_id
)

# Count occurrences of each gene across datasets
gene_counts <- table(unlist(gene_lists))

# Convert to a dataframe
gene_overlap_df <- as.data.frame(gene_counts) %>%
  rename(gene_id = Var1, overlap_count = Freq) %>%
  arrange(desc(overlap_count))  # Sort by highest overlap

# Create a lookup table to map gene_id to symbol and log2FC
lookup_table <- RNA_All %>%
  select(gene_id, symbol, log2FoldChange) %>%
  distinct()  # Ensure unique mappings

# Merge with annotated data
annotated_overlap <- left_join(gene_overlap_df, lookup_table, by = "gene_id")

# Merge full gene names with the dataset
annotated_overlap <- left_join(annotated_overlap, gene_annotations, by = c("symbol" = "external_gene_name"))

# Filter genes by overlap count
overlap_2 <- annotated_overlap %>% filter(overlap_count >= 2)
overlap_3 <- annotated_overlap %>% filter(overlap_count >= 3)
overlap_4 <- annotated_overlap %>% filter(overlap_count >= 4)
overlap_all <- annotated_overlap %>% filter(overlap_count == 5)  # Present in all cell lines

# Save results to an Excel file
write_xlsx(list(
  "Overlap_2+" = overlap_2,
  "Overlap_3+" = overlap_3,
  "Overlap_4+" = overlap_4,
  "Overlap_All" = overlap_all
), "202501_Gene_Overlap_Analysis_263a_264a_272a_273a_005a.xlsx")

# Print summary
print(annotated_overlap)

library(org.Hs.eg.db)

#CDD - Discordance-Concorance (Disco-) Plots: 005a, 263a, 264a, 272a, 273a - Suppl. Figures----
setwd("C:/Users/Christian Schinke/Desktop/R-Kurs_Timeline_Project/202501_Revision/")
RNA264A <- read_excel(path = "05_contrasts_full.48h_PTX_vs_DMSO_ID5.xlsx")
RNA264A <- as.data.frame(RNA264A)
RNA264A <- RNA264A %>% as_tibble()

RNA263A <- read_excel(path = "contrasts_full.Paclitaxel_263a_ID7.xlsx")
RNA263A <- as.data.frame(RNA263A)
RNA263A <- RNA263A %>% as_tibble()

RNA272A <- read_excel(path = "contrasts_full.Paclitaxel_272a_ID8.xlsx")
RNA272A <- as.data.frame(RNA272A)
RNA272A <- RNA272A %>% as_tibble()

RNA273A <- read_excel(path = "contrasts_full.Paclitaxel_273a_ID6.xlsx")
RNA273A <- as.data.frame(RNA273A)
RNA273A <- RNA273A %>% as_tibble()

RNA005A <- read_excel(path = "contrasts_full.PTX_vs_VEH_ID0_005a.xlsx")
RNA005A <- as.data.frame(RNA005A)
RNA005A <- RNA005A %>% as_tibble()

#show disco-values
discovalues_005a_vs_264a <- RNA005A %>% #of BIHi005-A and BIHi264-A
  left_join(RNA264A, by = "gene_id") %>% 
  mutate(disco = log2FoldChange.x * log2FoldChange.y * abs(log10(padj.x) + log10(padj.y)))

discovalues_005a_vs_263a <- RNA005A %>% #of BIHi005-A and BIHi263-A
  left_join(RNA263A, by = "gene_id") %>% 
  mutate(disco = log2FoldChange.x * log2FoldChange.y * abs(log10(padj.x) + log10(padj.y)))

discovalues_005a_vs_272a <- RNA005A %>% #of BIHi005-A and BIHi272-A
  left_join(RNA272A, by = "gene_id") %>% 
  mutate(disco = log2FoldChange.x * log2FoldChange.y * abs(log10(padj.x) + log10(padj.y)))

discovalues_005a_vs_273a <- RNA005A %>% #of BIHi005-A and BIHi273-A
  left_join(RNA273A, by = "gene_id") %>% 
  mutate(disco = log2FoldChange.x * log2FoldChange.y * abs(log10(padj.x) + log10(padj.y)))

discovalues_263a_vs_264a <- RNA263A %>% #of BIHi263-A and BIHi264-A
  left_join(RNA264A, by = "gene_id") %>% 
  mutate(disco = log2FoldChange.x * log2FoldChange.y * abs(log10(padj.x) + log10(padj.y)))

discovalues_272a_vs_273a <- RNA272A %>% #of BIHi272-A and BIHi273-A
  left_join(RNA273A, by = "gene_id") %>% 
  mutate(disco = log2FoldChange.x * log2FoldChange.y * abs(log10(padj.x) + log10(padj.y)))

discovalues_263a_vs_272a <- RNA263A %>% #of BIHi263-A and BIHi272-A
  left_join(RNA272A, by = "gene_id") %>% 
  mutate(disco = log2FoldChange.x * log2FoldChange.y * abs(log10(padj.x) + log10(padj.y)))

# Create a new data frame with "gene_id" and "disco" in descending order
discovalues_005a_vs_264a_sorted <- discovalues_005a_vs_264a %>% #264-A and 005-A 
  dplyr::select(gene_id, disco, symbol.x, symbol.y, log2FoldChange.x, log2FoldChange.y, padj.x, padj.y) %>% 
  arrange(desc(disco))

discovalues_005a_vs_272a_sorted <- discovalues_005a_vs_272a %>% #272-A and 005-A 
  dplyr::select(gene_id, disco, symbol.x, symbol.y, log2FoldChange.x, log2FoldChange.y, padj.x, padj.y) %>% 
  arrange(desc(disco))

discovalues_272a_vs_273a_sorted <- discovalues_272a_vs_273a %>% #272-A and 273-A 
  dplyr::select(gene_id, disco, symbol.x, symbol.y, log2FoldChange.x, log2FoldChange.y, padj.x, padj.y) %>% 
  arrange(desc(disco))

discovalues_263a_vs_272a_sorted <- discovalues_263a_vs_272a %>% #263-A and 272-A 
  dplyr::select(gene_id, disco, symbol.x, symbol.y, log2FoldChange.x, log2FoldChange.y, padj.x, padj.y) %>% 
  arrange(desc(disco))

# Subset of data with padj <0.05
discovalues_005a_vs_264a_padj_smaller_0.05 <- discovalues_005a_vs_264a_sorted %>%
  filter(padj.x < 0.05, padj.y < 0.05)

discovalues_005a_vs_272a_padj_smaller_0.05 <- discovalues_005a_vs_272a_sorted %>%
  filter(padj.x < 0.05, padj.y < 0.05)

discovalues_272a_vs_273a_padj_smaller_0.05 <- discovalues_272a_vs_273a_sorted %>%
  filter(padj.x < 0.05, padj.y < 0.05)

discovalues_263a_vs_272a_padj_smaller_0.05 <- discovalues_263a_vs_272a_sorted %>%
  filter(padj.x < 0.05, padj.y < 0.05)

write_xlsx(discovalues_005a_vs_264a_sorted, path = "C:/Users/Christian Schinke/Desktop/R-Kurs_Timeline_Project/202501_Revision/discovalues_005a_vs_264a_sorted.xlsx")
write_xlsx(discovalues_005a_vs_263a_sorted, path = "C:/Users/Christian Schinke/Desktop/R-Kurs_Timeline_Project/202501_Revision/discovalues_005a_vs_263a_sorted.xlsx")
write_xlsx(discovalues_272a_vs_273a_sorted, path = "C:/Users/Christian Schinke/Desktop/R-Kurs_Timeline_Project/202501_Revision/discovalues_272a_vs_273a_sorted.xlsx")
write_xlsx(discovalues_263a_vs_272a_sorted, path = "C:/Users/Christian Schinke/Desktop/R-Kurs_Timeline_Project/202501_Revision/discovalues_263a_vs_272a_sorted.xlsx")

#Pearson correlation of the log2FCs between BIHi005-A and BIHi264-A
correlation_log2FC_005a_264a <- cor.test(discovalues_005a_vs_264a$log2FoldChange.x, discovalues_005a_vs_264a$log2FoldChange.y)
print(correlation_log2FC_005a_264a)
p_value_corr_005a_264a <- correlation_log2FC_005a_264a$p.value #define p_value from the Pearson correlation
pearson_r_corr_005a_264a <- correlation_log2FC_005a_264a$estimate #define the Pearson R from the correlation

correlation_log2FC_005a_264a_p_smaller_0.05 <- cor.test(discovalues_005a_vs_264a_padj_smaller_0.05$log2FoldChange.x, discovalues_005a_vs_264a_padj_smaller_0.05$log2FoldChange.y)
print(correlation_log2FC_005a_264a_p_smaller_0.05)
p_value_corr_005a_264a_p_smaller_0.05 <- correlation_log2FC_005a_264a_p_smaller_0.05$p.value
pearson_r_corr_005a_264a_p_smaller_0.05 <- correlation_log2FC_005a_264a_p_smaller_0.05$estimate

#Pearson correlation of the log2FCs between BIHi005-A and BIHi263-A
correlation_log2FC_005a_263a <- cor.test(discovalues_005a_vs_263a$log2FoldChange.x, discovalues_005a_vs_263a$log2FoldChange.y)
print(correlation_log2FC_005a_263a)
p_value_corr_005a_263a <- correlation_log2FC_005a_263a$p.value #define p_value from the Pearson correlation
pearson_r_corr_005a_263a <- correlation_log2FC_005a_263a$estimate #define the Pearson R from the correlation

correlation_log2FC_005a_263a_p_smaller_0.05 <- cor.test(discovalues_005a_vs_263a_padj_smaller_0.05$log2FoldChange.x, discovalues_005a_vs_263a_padj_smaller_0.05$log2FoldChange.y)
print(correlation_log2FC_005a_263a_p_smaller_0.05)
p_value_corr_005a_263a_p_smaller_0.05 <- correlation_log2FC_005a_263a_p_smaller_0.05$p.value
pearson_r_corr_005a_263a_p_smaller_0.05 <- correlation_log2FC_005a_263a_p_smaller_0.05$estimate

#Pearson correlation of the log2FCs between BIHi272-A and BIHi273-A
correlation_log2FC_272a_273a <- cor.test(discovalues_272a_vs_273a$log2FoldChange.x, discovalues_272a_vs_273a$log2FoldChange.y)
print(correlation_log2FC_272a_273a)
p_value_corr_272a_273a <- correlation_log2FC_272a_273a$p.value #define p_value from the Pearson correlation
pearson_r_corr_272a_273a <- correlation_log2FC_272a_273a$estimate #define the Pearson R from the correlation

correlation_log2FC_272a_273a_p_smaller_0.05 <- cor.test(discovalues_272a_vs_273a_padj_smaller_0.05$log2FoldChange.x, discovalues_272a_vs_273a_padj_smaller_0.05$log2FoldChange.y)
print(correlation_log2FC_272a_273a_p_smaller_0.05)
p_value_corr_272a_273a_p_smaller_0.05 <- correlation_log2FC_272a_273a_p_smaller_0.05$p.value
pearson_r_corr_272a_273a_p_smaller_0.05 <- correlation_log2FC_272a_273a_p_smaller_0.05$estimate

#Pearson correlation of the log2FCs between BIHi263-A and BIHi272-A
correlation_log2FC_263a_272a <- cor.test(discovalues_263a_vs_272a$log2FoldChange.x, discovalues_263a_vs_272a$log2FoldChange.y)
print(correlation_log2FC_263a_272a)
p_value_corr_263a_272a <- correlation_log2FC_263a_272a$p.value #define p_value from the Pearson correlation
pearson_r_corr_263a_272a <- correlation_log2FC_263a_272a$estimate #define the Pearson R from the correlation

correlation_log2FC_263a_272a_p_smaller_0.05 <- cor.test(discovalues_263a_vs_272a_padj_smaller_0.05$log2FoldChange.x, discovalues_263a_vs_272a_padj_smaller_0.05$log2FoldChange.y)
print(correlation_log2FC_263a_272a_p_smaller_0.05)
p_value_corr_263a_272a_p_smaller_0.05 <- correlation_log2FC_263a_272a_p_smaller_0.05$p.value
pearson_r_corr_263a_272a_p_smaller_0.05 <- correlation_log2FC_263a_272a_p_smaller_0.05$estimate

# Discoplot Correlation BIHi264-A and BIHi005-A with Labels simple, no correlation coefficient
RNA005A %>%
  left_join(RNA264A, by = "gene_id") %>%
  mutate(disco = log2FoldChange.x * log2FoldChange.y * abs(log10(padj.x) + log10(padj.y))) %>%
  ggplot(aes(x = log2FoldChange.x, y = log2FoldChange.y, col = disco, label = symbol.x)) +
  geom_point(size = 2, alpha = 0.8) +
  geom_text_repel(
    data = head(discovalues_005a_vs_264a_sorted, 50),
    aes(label = symbol.x),
    nudge_x = 0.5,
    nudge_y = 0.5,
    size = 5,
    max.overlaps = Inf,
    force = 2,  # Adjust force to spread labels out
    force_pull = 0.5,  # Controls how far labels can move
    box.padding = 0.6,  # Increase space around text
    point.padding = 0.5,  # Increase space from points
    min.segment.length = 0  # Ensure all labels have connecting lines
  ) +  # Add labels for the top 50 genes
  geom_smooth(
    method = "lm", level = 0.95, col = alpha("#0A9396", 0.5)) + #add linear regression line for log2FC BIHi005-A and 264-A - all genes
  geom_smooth( #add linear regression line only for the genes that have an adjusted p-value of <0.05
    data = discovalues_005a_vs_264a_padj_smaller_0.05, 
    aes(log2FoldChange.x, log2FoldChange.y), 
    method = "lm", level = 0.95, col = alpha("#9B2226", 0.5)) +
  geom_vline(xintercept = 0, col = "black", lty = 2) +
  geom_hline(yintercept = 0, col = "black", lty = 4) +
  xlim(-4.5, 6) + ylim(-5, 6) +
  xlab("log2FoldChange(BIHi005-A)") +
  ylab("log2FoldChange(BIHi264-A)") +
  scale_color_gradient(low = "#005F73", high = "#9B2226", limits = c(0, 400)) +
  theme(axis.title = element_text(size = 13))

ggsave("Discoplot_264-A_005-A_ylim5_simple_20250717.tiff", width = 9, height = 6.5, dpi = 600)

# Discoplot Correlation BIHi272-A and BIHi005-A with Labels simple, no correlation coefficient
RNA005A %>%
  left_join(RNA272A, by = "gene_id") %>%
  mutate(disco = log2FoldChange.x * log2FoldChange.y * abs(log10(padj.x) + log10(padj.y))) %>%
  ggplot(aes(x = log2FoldChange.x, y = log2FoldChange.y, col = disco, label = symbol.x)) +
  geom_point(size = 2, alpha = 0.8) +
  geom_text_repel(
    data = head(discovalues_005a_vs_272a_sorted, 50),
    aes(label = symbol.x),
    nudge_x = 0.5,
    nudge_y = 0.5,
    size = 5,
    max.overlaps = Inf,
    force = 2,  # Adjust force to spread labels out
    force_pull = 0.5,  # Controls how far labels can move
    box.padding = 0.6,  # Increase space around text
    point.padding = 0.5,  # Increase space from points
    min.segment.length = 0  # Ensure all labels have connecting lines
  ) +  # Add labels for the top 50 genes
  geom_smooth(
    method = "lm", level = 0.95, col = alpha("#0A9396", 0.5)) + #add linear regression line for log2FC BIHi005-A and 272-A - all genes
  geom_smooth( #add linear regression line only for the genes that have an adjusted p-value of <0.05
    data = discovalues_005a_vs_272a_padj_smaller_0.05, 
    aes(log2FoldChange.x, log2FoldChange.y), 
    method = "lm", level = 0.95, col = alpha("#9B2226", 0.5)) +
  geom_vline(xintercept = 0, col = "black", lty = 2) +
  geom_hline(yintercept = 0, col = "black", lty = 4) +
  xlim(-4.5, 6) + ylim(-5, 6) +
  xlab("log2FoldChange(BIHi005-A)") +
  ylab("log2FoldChange(BIHi272-A)") +
  scale_color_gradient(low = "#005F73", high = "#9B2226", limits = c(0, 400)) +
  theme(axis.title = element_text(size = 13))

ggsave("Discoplot_272-A_005-A_ylim5_simple_20250717.tiff", width = 9, height = 6.5, dpi = 600)

# Discoplot Correlation BIHi272-A and BIHi273-A with Labels simple, no correlation coefficient
RNA272A %>%
  left_join(RNA273A, by = "gene_id") %>%
  mutate(disco = log2FoldChange.x * log2FoldChange.y * abs(log10(padj.x) + log10(padj.y))) %>%
  ggplot(aes(x = log2FoldChange.x, y = log2FoldChange.y, col = disco, label = symbol.x)) +
  geom_point(size = 2, alpha = 0.8) +
  geom_text_repel(
    data = head(discovalues_272a_vs_273a_sorted, 50),
    aes(label = symbol.x),
    nudge_x = 0.5,
    nudge_y = 0.5,
    size = 5,
    max.overlaps = Inf,
    force = 2,  # Adjust force to spread labels out
    force_pull = 0.5,  # Controls how far labels can move
    box.padding = 0.6,  # Increase space around text
    point.padding = 0.5,  # Increase space from points
    min.segment.length = 0  # Ensure all labels have connecting lines
  ) +  # Add labels for the top 50 genes
  geom_smooth(
    method = "lm", level = 0.95, col = alpha("#0A9396", 0.5)) + #add linear regression line for log2FC BIHi005-A and 272-A - all genes
  geom_smooth( #add linear regression line only for the genes that have an adjusted p-value of <0.05
    data = discovalues_272a_vs_273a_padj_smaller_0.05, 
    aes(log2FoldChange.x, log2FoldChange.y), 
    method = "lm", level = 0.95, col = alpha("#9B2226", 0.5)) +
  geom_vline(xintercept = 0, col = "black", lty = 2) +
  geom_hline(yintercept = 0, col = "black", lty = 4) +
  xlim(-4.5, 6) + ylim(-5, 6) +
  xlab("log2FoldChange(BIHi272-A)") +
  ylab("log2FoldChange(BIHi273-A)") +
  scale_color_gradient(low = "#005F73", high = "#9B2226", limits = c(0, 400)) +
  theme(axis.title = element_text(size = 13))

# Discoplot Correlation BIHi263-A and BIHi272-A with Labels simple, no correlation coefficient
RNA263A %>%
  left_join(RNA272A, by = "gene_id") %>%
  mutate(disco = log2FoldChange.x * log2FoldChange.y * abs(log10(padj.x) + log10(padj.y))) %>%
  ggplot(aes(x = log2FoldChange.x, y = log2FoldChange.y, col = disco, label = symbol.x)) +
  geom_point(size = 2, alpha = 0.8) +
  geom_text_repel(
    data = head(discovalues_263a_vs_272a_sorted, 50),
    aes(label = symbol.x),
    nudge_x = 0.5,
    nudge_y = 0.5,
    size = 5,
    max.overlaps = Inf,
    force = 2,  # Adjust force to spread labels out
    force_pull = 0.5,  # Controls how far labels can move
    box.padding = 0.6,  # Increase space around text
    point.padding = 0.5,  # Increase space from points
    min.segment.length = 0  # Ensure all labels have connecting lines
  ) +  # Add labels for the top 50 genes
  geom_smooth(
    method = "lm", level = 0.95, col = alpha("#0A9396", 0.5)) + #add linear regression line for log2FC BIHi005-A and 272-A - all genes
  geom_smooth( #add linear regression line only for the genes that have an adjusted p-value of <0.05
    data = discovalues_263a_vs_272a_padj_smaller_0.05, 
    aes(log2FoldChange.x, log2FoldChange.y), 
    method = "lm", level = 0.95, col = alpha("#9B2226", 0.5)) +
  geom_vline(xintercept = 0, col = "black", lty = 2) +
  geom_hline(yintercept = 0, col = "black", lty = 4) +
  xlim(-4.5, 6) + ylim(-5, 6) +
  xlab("log2FoldChange(BIHi263-A)") +
  ylab("log2FoldChange(BIHi272-A)") +
  scale_color_gradient(low = "#005F73", high = "#9B2226", limits = c(0, 400)) +
  theme(axis.title = element_text(size = 13))

ggsave("Discoplot_263-A_272-A_ylim5_simple_20250717.tiff", width = 9, height = 6.5, dpi = 600)

#CDD - Plot Genes of Interest - Figure 2----
# Load necessary libraries
library(ggplot2)
library(ggrepel)
library(readxl)
library(tidyverse)

# Clear environment
rm(list=ls())  

# Set working directory
setwd("C:/Users/Christian Schinke/Desktop/R-Kurs_Timeline_Project/202501_Revision")

# Read Excel file
Genes_of_interest <- read_excel("Genes_of_interest_one_list.xlsx")
Genes_of_interest <- as.data.frame(Genes_of_interest)

# Optional: preserve the order of gene symbols as they appear in the data
Genes_of_interest$symbol <- factor(Genes_of_interest$symbol, levels = unique(Genes_of_interest$symbol))

# Get current levels
current_levels <- levels(Genes_of_interest$symbol)

# Define the genes after which you want a gap
genes_to_gap <- c("SESN2", "PPP1R17", "NPFFR2", "THBS2", "HTR7", "SYNPO2L")

# Create a new vector of levels with a gap level inserted after each target gene
new_levels <- c()
gap_counter <- 1
for (gene in current_levels) {
  new_levels <- c(new_levels, gene)
  if (gene %in% genes_to_gap) {
    new_levels <- c(new_levels, paste0("gap", gap_counter))
    gap_counter <- gap_counter + 1
  }
}

# Update the factor with the new levels
Genes_of_interest$symbol <- factor(Genes_of_interest$symbol, levels = new_levels)

p <- ggplot(Genes_of_interest, aes(x = log2FoldChange, y = symbol, color = padj, size = Overlaps)) +
  geom_vline(xintercept = 0, color = "black", linewidth = 0.5) +
  geom_point() +
  scale_x_continuous(
    limits = c(-1.5, 4.5),
    breaks = c(-2, -1.5, -1, -0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4)
  ) +
  scale_y_discrete(
    limits = rev(new_levels),
    breaks = rev(new_levels[!grepl("^gap", new_levels)])
  ) +
  labs(x = "Log2FC", y = "", color = "padj", size = "Overlaps") +
  scale_color_gradientn(
    colors = c("red", "darkred", "darkblue"),
    trans = "log10",
    limits = c(9e-75, 0.02),
    breaks = c(9e-75, 1e-50, 1e-25, 1e-10, 1e-5, 0.02),
    labels = c("9e-75", "1e-50", "1e-25", "1e-10", "", "0.02")
  ) +
  scale_size_continuous(
    range = c(2, 4),
    limits = c(0, 5),
    breaks = c(0, 1, 2, 3, 4, 5),
    labels = c("average", "1", "2", "3", "4", "5")
  ) +
  theme_bw() +
  theme(
    panel.grid.major.x = element_line(color = "grey90", linetype = "dashed"),
    axis.text = element_text(size = 9),
    axis.title = element_text(size = 10),
    legend.text = element_text(size = 8),
    legend.title = element_text(size = 10)
  )

p

# Save the plot as a TIFF file
ggsave(filename = "Genes_of_interest_plot.tiff", plot = p, device = "tiff", width = 6, height = 12, dpi = 300)

#Plot with TPM (from 264-A)
p <- ggplot(Genes_of_interest, aes(x = log2FoldChange, y = symbol, color = padj, size = Overlaps)) +
  geom_vline(xintercept = 0, color = "black", linewidth = 0.5) +
  # Main points
  geom_point() +
  # Add squares next to gene names, using a fixed x position and mapping fill to RLD_AVG.
  # Note: inherit.aes = FALSE so that we set our own aesthetics for this layer.
  geom_point(aes(x = -1.5, y = symbol, fill = AVG_TPM),
             shape = 22, color = "white", size = 3, inherit.aes = FALSE) +
  scale_x_continuous(
    limits = c(-1.5, 4.5),
    breaks = c(-2.5, -2, -1.5, -1, -0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4)
  ) +
  scale_y_discrete(
    limits = rev(new_levels),
    breaks = rev(new_levels[!grepl("^gap", new_levels)])
  ) +
  labs(x = "Log2FC", y = "", color = "padj", size = "Overlaps", fill = "Abundance [tpm]") +
  scale_color_gradientn(
    colors = c("red", "darkred", "darkblue"),
    trans = "log10",
    limits = c(9e-75, 0.02),
    breaks = c(9e-75, 1e-50, 1e-25, 1e-10, 1e-5, 0.02),
    labels = c("9e-75", "1e-50", "1e-25", "1e-10", "", "0.02")
  ) +
  scale_fill_gradient(
    low = "#94D2BD",
    high = "#EE9B00",
    trans = "log10",
    limits = c(0.08, 249)
  ) +
  scale_size_continuous(
    range = c(2, 4),
    limits = c(0, 5),
    breaks = c(0, 1, 2, 3, 4, 5),
    labels = c("average", "1", "2", "3", "4", "5")
  ) +
  guides(
    color = guide_colorbar(order = 1),
    size = guide_legend(order = 2),
    fill = guide_colorbar(order = 3)
  ) +
  theme_bw() +
  theme(
    panel.grid.major.x = element_line(color = "grey90", linetype = "dashed"),
    axis.text = element_text(size = 9),
    axis.title = element_text(size = 10),
    legend.text = element_text(size = 8),
    legend.title = element_text(size = 10)
  )

p

# Save the plot as a TIFF file
ggsave(filename = "Genes_of_interest_plot_with_TPM.tiff", plot = p, device = "tiff", width = 6, height = 12, dpi = 300)

#Merge list of Genes of Interest with RLD to create Figure with abundancy measures
# Load necessary libraries
library(dplyr)
library(readxl)
library(writexl)

#Import data
setwd("C:/Users/Christian Schinke/Desktop/R-Kurs_Timeline_Project/202501_Revision")

# Read the Excel files
Genes_of_interest <- read_excel("Genes_of_interest_one_list.xlsx") %>%
  as.data.frame()

RNA_RLD_all <- read_excel("All_Cell_Lines_DESeq2.all.rld.blind.xlsx") %>%
  as.data.frame() #loads data of RLD from DeSeq2 from Cell lines 263-A, 264-A, 272-A, 273-A

# Specify the columns to merge from RNA_RLD_all
columns_needed <- c("gene_id",	
                    "p264_1_Treatment",	"p264_2_Treatment",	"p264_3_Treatment",	"p264_1_Control",	"p264_2_Control",	"p264_3_Control",	"p263_4_Treatment",	
                    "p263_5_Treatment",	"p263_6_Treatment",	"p273_7_Treatment",	"p273_8_Treatment",	"p273_9_Treatment",	"p272_10_Treatment",	"p272_11_Treatment",	
                    "p272_12_Treatment",	"p263_4_Control",	"p263_5_Control",	"p263_6_Control",	"p273_7_Control",	"p273_8_Control",	"p273_9_Control",	
                    "p272_10_Control",	"p272_11_Control",	"p272_12_Control",	"p005_13_Control",	"p005_13_Treatment",	"p005_14_Treatment",	"p005_15_Treatment",	
                    "p005_14_Control",	"p005_15_Control")

# Subset RNA_RLD_all to those columns
RNA_RLD_all_subset <- RNA_RLD_all[, columns_needed]

# Merge (left_join keeps all rows from Genes_of_interest)
Genes_of_interest_merged_RLD <- Genes_of_interest %>%
  left_join(RNA_RLD_all_subset, by = "gene_id")

# View the result
head(Genes_of_interest_merged_RLD)

# Save as a new Excel file
write_xlsx(Genes_of_interest_merged_RLD, "Genes_of_interest_merged_RLD.xlsx")

#Plot Genes of interest with RLD
# Load necessary libraries
library(ggplot2)
library(ggrepel)
library(readxl)
library(tidyverse)

# Clear environment
rm(list=ls())  

# Set working directory
setwd("C:/Users/Christian Schinke/Desktop/R-Kurs_Timeline_Project/202501_Revision")

# Read Excel file
Genes_of_interest <- read_excel("Genes_of_interest_merged_RLD.xlsx")
Genes_of_interest <- as.data.frame(Genes_of_interest)

# Optional: preserve the order of gene symbols as they appear in the data
Genes_of_interest$symbol <- factor(Genes_of_interest$symbol, levels = unique(Genes_of_interest$symbol))

# Get current levels
current_levels <- levels(Genes_of_interest$symbol)

# Define the genes after which you want a gap
genes_to_gap <- c("SESN2", "PPP1R17", "NPFFR2", "THBS2", "HTR7", "SYNPO2L")

# Create a new vector of levels with a gap level inserted after each target gene
new_levels <- c()
gap_counter <- 1
for (gene in current_levels) {
  new_levels <- c(new_levels, gene)
  if (gene %in% genes_to_gap) {
    new_levels <- c(new_levels, paste0("gap", gap_counter))
    gap_counter <- gap_counter + 1
  }
}

# Update the factor with the new levels
Genes_of_interest$symbol <- factor(Genes_of_interest$symbol, levels = new_levels)

p <- ggplot(Genes_of_interest, aes(x = log2FoldChange, y = symbol, color = padj, size = Overlaps)) +
  geom_vline(xintercept = 0, color = "black", linewidth = 0.5) +
  geom_point() +
  scale_x_continuous(
    limits = c(-1.5, 4.5),
    breaks = c(-2, -1.5, -1, -0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4)
  ) +
  scale_y_discrete(
    limits = rev(new_levels),
    breaks = rev(new_levels[!grepl("^gap", new_levels)])
  ) +
  labs(x = "Log2FC", y = "", color = "padj", size = "Overlaps") +
  scale_color_gradientn(
    colors = c("red", "darkred", "darkblue"),
    trans = "log10",
    limits = c(9e-75, 0.02),
    breaks = c(9e-75, 1e-50, 1e-25, 1e-10, 1e-5, 0.02),
    labels = c("9e-75", "1e-50", "1e-25", "1e-10", "", "0.02")
  ) +
  scale_size_continuous(
    range = c(2, 4),
    limits = c(0, 5),
    breaks = c(0, 1, 2, 3, 4, 5),
    labels = c("average", "1", "2", "3", "4", "5")
  ) +
  theme_bw() +
  theme(
    panel.grid.major.x = element_line(color = "grey90", linetype = "dashed"),
    axis.text = element_text(size = 9),
    axis.title = element_text(size = 10),
    legend.text = element_text(size = 8),
    legend.title = element_text(size = 10)
  )

p

# Save the plot as a TIFF file
ggsave(filename = "Genes_of_interest_plot.tiff", plot = p, device = "tiff", width = 6, height = 12, dpi = 300)

#Plot with AVG_RLD
p <- ggplot(Genes_of_interest, aes(x = log2FoldChange, y = symbol, color = padj, size = Overlaps)) +
  geom_vline(xintercept = 0, color = "black", linewidth = 0.5) +
  # Main points
  geom_point() +
  # Add squares next to gene names, using a fixed x position and mapping fill to AVG_RLD.
  # Note: inherit.aes = FALSE so that we set our own aesthetics for this layer.
  geom_point(aes(x = -1.5, y = symbol, fill = AVG_RLD),
             shape = 22, color = "white", size = 3, inherit.aes = FALSE) +
  scale_x_continuous(
    limits = c(-1.5, 4.5),
    breaks = c(-2.5, -2, -1.5, -1, -0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4)
  ) +
  scale_y_discrete(
    limits = rev(new_levels),
    breaks = rev(new_levels[!grepl("^gap", new_levels)])
  ) +
  labs(x = "Log2FC", y = "", color = "padj", size = "Overlaps", fill = "Abundance [RLD]") +
  scale_color_gradientn(
    colors = c("red", "darkred", "darkblue"),
    trans = "log10",
    limits = c(9e-75, 0.02),
    breaks = c(9e-75, 1e-50, 1e-25, 1e-10, 1e-5, 0.02),
    labels = c("9e-75", "1e-50", "1e-25", "1e-10", "", "0.02")
  ) +
  scale_fill_gradient(
    low = "#94D2BD",
    high = "#EE9B00",
    limits = c(5, 13)
  ) +
  scale_size_continuous(
    range = c(2, 4),
    limits = c(0, 5),
    breaks = c(0, 1, 2, 3, 4, 5),
    labels = c("average", "1", "2", "3", "4", "5")
  ) +
  guides(
    color = guide_colorbar(order = 1),
    size = guide_legend(order = 2),
    fill = guide_colorbar(order = 3)
  ) +
  theme_bw() +
  theme(
    panel.grid.major.x = element_line(color = "grey90", linetype = "dashed"),
    axis.text = element_text(size = 9),
    axis.title = element_text(size = 10),
    legend.text = element_text(size = 8),
    legend.title = element_text(size = 10)
  )

p

# Save the plot as a TIFF file- according to Figure 2
ggsave(filename = "Genes_of_interest_plot_with_RLD.tiff", plot = p, device = "tiff", width = 6, height = 12, dpi = 300)

#CDD - Correlative analyses, Suppl. Fig. 3----
# --- 0) Libraries
library(readxl)
library(dplyr)
library(stringr)
library(WGCNA)
library(igraph)
library(pheatmap)
library(scales)
# optional
# library(STRINGdb); library(minet)

options(stringsAsFactors = FALSE)
allowWGCNAThreads()

# --- 1) Load data
df <- readxl::read_excel("Merged_All_Cell_lines_DEGs_RLD_Log2FC_Padj_overlap1_log2FC0.5.xlsx") |> as.data.frame()

# Identify gene-id + symbol columns present in your sheet
gene_id_col <- intersect(c("gene_id","GeneID","ensembl_id","EnsemblID","id"), colnames(df))[1]
symbol_col  <- intersect(c("symbol","Symbol","symbol.x","gene_symbol","hgnc_symbol"), colnames(df))[1]
stopifnot(length(gene_id_col) == 1)

# Expression columns (genes x samples), e.g. p123_1_Treatment / Control
expr_cols <- grep("^p\\d+_\\d+_(Treatment|Control)$", colnames(df), value = TRUE)
stopifnot(length(expr_cols) > 1)

expr <- df[, expr_cols, drop = FALSE]
rownames(expr) <- df[[gene_id_col]]

# --- 2) Filter low-variance genes (tune threshold if needed)
expr <- expr[apply(expr, 1, function(x) var(as.numeric(x), na.rm=TRUE)) > 0.1, , drop = FALSE]

# --- 3) Map IDs -> Symbols with robust fallbacks
# Build a symbol vector the same length/order as rownames(expr)
if (!is.null(symbol_col)) {
  id_to_symbol <- df[, c(gene_id_col, symbol_col)] |>
    distinct(.data[[gene_id_col]], .keep_all = TRUE) |>
    tibble::deframe()  # named vector: names = gene_id, values = symbol
  sym <- id_to_symbol[rownames(expr)]
} else {
  sym <- rep(NA_character_, nrow(expr))
}

# Fallback: if symbol is NA or empty, use the gene_id
sym[is.na(sym) | sym==""] <- rownames(expr)

# --- 4) Collapse duplicate symbols (average duplicates OR keep most variable row)
# Using WGCNA::collapseRows keeps one representative per symbol by max variance
collapse <- collapseRows(
  datET    = expr,
  rowGroup = sym,
  rowID    = rownames(expr),
  method   = "MaxMean"   # good default; picks the row best representing each group
)
expr_sym <- collapse$datETcollapsed
# rownames(expr_sym) are now unique symbols

# --- 5) Correlation network (symbols)
cor_mat <- cor(t(expr_sym), method = "pearson", use = "pairwise.complete.obs")

# Pick “top” genes by connectivity
kTotal <- rowSums(abs(cor_mat), na.rm = TRUE)
top_genes <- names(sort(kTotal, decreasing = TRUE))[1:min(60, nrow(cor_mat))]

# Build edge list for |r| > 0.85
thr <- 0.80
upper_idx <- which(abs(cor_mat) > thr & upper.tri(cor_mat), arr.ind = TRUE)
interactions <- data.frame(
  gene1 = rownames(cor_mat)[upper_idx[,1]],
  gene2 = colnames(cor_mat)[upper_idx[,2]],
  correlation = cor_mat[upper_idx],
  row.names = NULL
)

# --- Quick graph (size = Overlaps, color = Log2FC, edge width & color = correlation strength/sign, with transparency)
if (nrow(interactions) > 0) {
  g <- graph_from_data_frame(interactions, directed = FALSE)
  
  # Focus on the top genes
  keep <- intersect(V(g)$name, top_genes)
  if (length(keep) >= 2) {
    g <- induced_subgraph(g, vids = keep)
  }
  
  # --- Map Overlaps to nodes
  overlap_col <- intersect(c("Overlaps", "overlaps", "overlap", "Overlap"), colnames(df))[1]
  if (!is.na(overlap_col)) {
    overlap_map <- df[, c(symbol_col, overlap_col)] |>
      dplyr::distinct(.data[[symbol_col]], .keep_all = TRUE) |>
      tibble::deframe()
    V(g)$overlap <- overlap_map[V(g)$name]
  } else {
    V(g)$overlap <- NA
  }
  V(g)$overlap[is.na(V(g)$overlap)] <- stats::median(V(g)$overlap, na.rm = TRUE)
  
  # Larger node sizes, still proportional to overlaps
  V(g)$size <- scales::rescale(V(g)$overlap, to = c(4, 10))
  
  # --- Map log2FoldChange to nodes
  logfc_col <- intersect(c("log2FoldChange", "log2FC", "Log2FC", "logFC"), colnames(df))[1]
  if (!is.na(logfc_col)) {
    logfc_map <- df[, c(symbol_col, logfc_col)] |>
      dplyr::distinct(.data[[symbol_col]], .keep_all = TRUE) |>
      tibble::deframe()
    V(g)$log2FC <- logfc_map[V(g)$name]
  } else {
    V(g)$log2FC <- 0
  }
  V(g)$log2FC[is.na(V(g)$log2FC)] <- 0
  
  # --- Smooth diverging color scale with white at 0
  library(scales)
  breaks <- c(-2, -1, 0, 2, 4.5)
  cols   <- c("darkblue", "blue", "#f7f7f7", "darkred", "red")
  rng    <- range(breaks)
  pal    <- gradient_n_pal(cols, values = rescale(breaks, to = c(0,1), from = rng))
  
  val <- V(g)$log2FC
  val[is.na(val)] <- 0
  val <- pmin(pmax(val, rng[1]), rng[2])
  V(g)$color <- pal(rescale(val, to = c(0,1), from = rng))
  
  # --- Labels
  V(g)$label         <- V(g)$name
  V(g)$label.cex     <- 1.0
  V(g)$label.family  <- "sans"
  V(g)$label.color   <- "black"
  
  # --- Edge width and color ~ correlation, with transparency
  if (!is.null(E(g)$correlation)) {
    E(g)$width <- pmax(1, 3 * abs(E(g)$correlation))
    base_colors <- ifelse(E(g)$correlation > 0, "#f8766d", "#00b0f6")  # light red/blue
    E(g)$color <- adjustcolor(base_colors, alpha.f = 0.5)  # 50% transparency
  } else {
    E(g)$width <- 1
    E(g)$color <- adjustcolor("grey80", alpha.f = 0.5)
  }
  
  # --- Build layout (igraph >= 0.8)
  set.seed(7)
  layout_fr <- layout_with_fr(
    g,
    niter      = 3000,
    start.temp = sqrt(vcount(g)) * 3,
    grid       = "nogrid",
    weights    = NULL
  )
  
  # Normalize coordinates
  if ("norm_coords" %in% ls("package:igraph")) {
    layout_fr <- norm_coords(layout_fr, ymin = -1, ymax = 1, xmin = -1, xmax = 1)
  } else {
    layout_fr <- layout.norm(layout_fr, ymin = -1, ymax = 1, xmin = -1, xmax = 1)
  }
  
  layout_fr <- layout_fr * 1.2  # global expansion
  
  # --- Plot network
  plot(
    g,
    layout = layout_fr,
    vertex.frame.color = NA,
    main = ""
  )
  
  # --- Legend (1) log2FC color scale
  legend_vals <- breaks
  legend_cols <- pal(rescale(legend_vals, to = c(0,1), from = rng))
  legend("bottomleft",
         legend = sprintf("%+.1f", legend_vals),
         fill   = legend_cols,
         title  = "log2FC",
         border = NA,
         bty    = "n",
         cex    = 1.0)
  
  # --- Legend (2) Overlaps (fixed 1–5)
  overlap_vals <- 1:5
  size_seq     <- scales::rescale(overlap_vals, to = c(4, 10))
  legend("topright",
         legend = overlap_vals,
         pt.cex = size_seq / 3.5,
         pch = 21,
         pt.bg = "grey80",
         col = "black",
         title = "Overlaps",
         bty = "n",
         cex = 1.0)
  
  # --- Legend (3) Edge thickness = correlation strength and color = sign
  corr_vals_signed <- c(-1.0, -0.9, -0.8, 0.8, 0.9, 1.0)
  line_widths <- pmax(1, 3 * abs(corr_vals_signed))  # thickness by |r|
  legend_cols <- ifelse(corr_vals_signed < 0,
                        adjustcolor("#00b0f6", alpha.f = 0.75),  # light blue (−)
                        adjustcolor("#f8766d", alpha.f = 0.75))  # light red (+)
  
  legend("bottomright",
         legend = sprintf("%+.1f", corr_vals_signed),
         lwd = line_widths,
         col = legend_cols,
         title = "Correlation (r)",
         bty = "n",
         cex = 1.0)
}

# Save the current plot:
dev.copy(tiff, "Gene_correlation_network_60_genes.tiff", width = 10, height = 8, units = "in", res = 300, compression = "lzw")
dev.off() #This is Suppl. Figure 3b.

# --- 6) Heatmap on top symbols by connectivity
pheatmap(cor_mat[top_genes, top_genes],
         clustering_method = "complete",
         show_rownames = TRUE, show_colnames = TRUE,
         fontsize_row = 9, fontsize_col = 9)

# Save the current plot (whatever is on screen)
dev.copy(tiff, "Heatmap_Top60_genes.tiff", width = 8, height = 8, units = "in", res = 300, compression = "lzw")
dev.off() #This is Suppl. Figure 3a.



#CDD - RNA-Seq Volcano-Plots for different time points - Figure 3----
# load libraries
library(ggplot2)
library(ggrepel)
library(ggpubr) #warning message: ggpubr wurde unter R Version 4.1.3 erstellt
library(readxl)
library(writexl)
library(tidyverse) # includes ggplot2, for data visualisation. dplyr, for data manipulation.
library(RColorBrewer) # for a colourful plot
library(ggrepel) # for nice annotations
library(dplyr)

#Load data
setwd("C:/Users/Christian Schinke/Desktop/R-Kurs_Timeline_Project/202501_Revision")
RNA2h <- read_excel(path = "01_contrasts_full.2h_PTX_vs_DMSO_ID1.xlsx")
RNA2h <- as.data.frame(RNA2h)
RNA2h <- RNA2h %>% filter(!is.na(padj))
RNA2h <- RNA2h %>% arrange(desc(abs(log2FoldChange)))

RNA6h <- read_excel(path = "02_contrasts_full.6h_PTX_vs_DMSO_ID2.xlsx")
RNA6h <- as.data.frame(RNA6h)
RNA6h <- RNA6h %>% filter(!is.na(padj))
RNA6h <- RNA6h %>% arrange(desc(abs(log2FoldChange)))

RNA12h <- read_excel(path = "03_contrast.12h_PTX_vs_DMSO_ID3.andTPMs.xls")
RNA12h <- as.data.frame(RNA12h)
RNA12h <- RNA12h %>% filter(!is.na(padj))
RNA12h <- RNA12h %>% arrange(desc(abs(log2FoldChange)))

RNA24h <- read_excel(path = "04_contrast.24h_PTX_vs_DMSO_ID4.andTPMs.xls")
RNA24h <- as.data.frame(RNA24h)
RNA24h <- RNA24h %>% filter(!is.na(padj)) 
RNA24h <- RNA24h %>% arrange(desc(abs(log2FoldChange)))

RNA48h <- read_excel(path = "05_contrasts_full.48h_PTX_vs_DMSO_ID5.xlsx")
RNA48h <- as.data.frame(RNA48h)
RNA48h <- RNA48h %>% filter(!is.na(padj)) # filter out na values 
RNA48h <- RNA48h %>% arrange(desc(abs(log2FoldChange)))

RNA48h_005a <- read_excel(path = "005a_100nM_PTX_48h_Andranik_full_List.xlsx")
RNA48h_005a <- as.data.frame(RNA48h_005a)
RNA48h_005a <- RNA48h_005a %>% filter(!is.na(padj))
RNA48h_005a <- RNA48h_005a %>% arrange(desc(abs(log2FoldChange)))

RNA72h <- read_excel(path = "06_contrasts_full.72h_PTX_vs_DMSO_ID0.xlsx")
RNA72h <- as.data.frame(RNA72h)
RNA72h <- RNA72h %>% filter(!is.na(padj))
RNA72h <- RNA72h %>% arrange(desc(abs(log2FoldChange)))

RNA5d <- read_excel(path = "07_contrast.5d_PTX_vs_DMSO_ID6.andTPMs.xls")
RNA5d <- as.data.frame(RNA5d)
RNA5d <- RNA5d %>% filter(!is.na(padj))
RNA5d <- RNA5d %>% arrange(desc(abs(log2FoldChange)))

#Define up- and downregulated as well as n.s. significant differential regulated genes
RNA2h$diffexpr <- "NO"
RNA2h$diffexpr[RNA2h$log2FoldChange >= 0.5 & RNA2h$padj < 0.05] <- "UP"
RNA2h$diffexpr[RNA2h$log2FoldChange <= -0.5 & RNA2h$padj < 0.05] <- "DOWN"

RNA6h$diffexpr <- "NO"
RNA6h$diffexpr[RNA6h$log2FoldChange >= 0.5 & RNA6h$padj < 0.05] <- "UP"
RNA6h$diffexpr[RNA6h$log2FoldChange <= -0.5 & RNA6h$padj < 0.05] <- "DOWN"

RNA12h$diffexpr <- "NO"
RNA12h$diffexpr[RNA12h$log2FoldChange >= 0.5 & RNA12h$padj < 0.05] <- "UP"
RNA12h$diffexpr[RNA12h$log2FoldChange <= -0.5 & RNA12h$padj < 0.05] <- "DOWN"

RNA24h$diffexpr <- "NO"
RNA24h$diffexpr[RNA24h$log2FoldChange >= 0.5 & RNA24h$padj < 0.05] <- "UP"
RNA24h$diffexpr[RNA24h$log2FoldChange <= -0.5 & RNA24h$padj < 0.05] <- "DOWN"

RNA48h$diffexpr <- "NO"
RNA48h$diffexpr[RNA48h$log2FoldChange >= 0.5 & RNA48h$padj < 0.05] <- "UP"
RNA48h$diffexpr[RNA48h$log2FoldChange <= -0.5 & RNA48h$padj < 0.05] <- "DOWN"

RNA72h$diffexpr <- "NO"
RNA72h$diffexpr[RNA72h$log2FoldChange >= 0.5 & RNA72h$padj < 0.05] <- "UP"
RNA72h$diffexpr[RNA72h$log2FoldChange <= -0.5 & RNA72h$padj < 0.05] <- "DOWN"

RNA5d$diffexpr <- "NO"
RNA5d$diffexpr[RNA5d$log2FoldChange >= 0.5 & RNA5d$padj < 0.05] <- "UP"
RNA5d$diffexpr[RNA5d$log2FoldChange <= -0.5 & RNA5d$padj < 0.05] <- "DOWN"

RNA48h_005a$diffexpr <- "NO"
RNA48h_005a$diffexpr[RNA48h_005a$log2FoldChange >= 0.5 & RNA48h_005a$padj < 0.05] <- "UP"
RNA48h_005a$diffexpr[RNA48h_005a$log2FoldChange <= -0.5 & RNA48h_005a$padj < 0.05] <- "DOWN"

#Label genes
RNA2h$Labels <- NA #Add column named "Labels" with NA values
RNA2h$Labels[RNA2h$diffexpr != "NO"] <- RNA2h$symbol[RNA2h$diffexpr != "NO"]

RNA6h$Labels <- NA #Add column named "Labels" with NA values
RNA6h$Labels[RNA6h$diffexpr != "NO"] <- RNA6h$symbol[RNA6h$diffexpr != "NO"]

RNA12h$Labels <- NA #Add column named "Labels" with NA values
RNA12h$Labels[RNA12h$diffexpr != "NO"] <- RNA12h$symbol[RNA12h$diffexpr != "NO"]

RNA24h$Labels <- NA #Add column named "Labels" with NA values
RNA24h$Labels[RNA24h$diffexpr != "NO"] <- RNA24h$symbol[RNA24h$diffexpr != "NO"]

RNA48h$Labels <- NA #Add column named "Labels" with NA values
RNA48h$Labels[RNA48h$diffexpr != "NO"] <- RNA48h$symbol[RNA48h$diffexpr != "NO"]

RNA72h$Labels <- NA #Add column named "Labels" with NA values
RNA72h$Labels[RNA72h$diffexpr != "NO"] <- RNA72h$symbol[RNA72h$diffexpr != "NO"]

RNA5d$Labels <- NA #Add column named "Labels" with NA values
RNA5d$Labels[RNA5d$diffexpr != "NO"] <- RNA5d$symbol[RNA5d$diffexpr != "NO"]

RNA48h_005a$Labels <- NA #Add column named "Labels" with NA values
RNA48h_005a$Labels[RNA48h_005a$diffexpr != "NO"] <- RNA48h_005a$symbol[RNA48h_005a$diffexpr != "NO"]

#VolcanoPlots
#2h
ggplot(data = RNA2h, aes(x = log2FoldChange, y = -log10(padj), col = diffexpr, label = Labels)) +
  geom_point(size = 2, alpha = 0.8) +
  geom_text_repel(max.overlaps = 12, size = 4.5) + #organize labels nicely with ggrepel --> geom_text_repel() 
  scale_color_manual(values = c("black", "darkred", "darkblue")) + #define color scheme
  geom_vline(xintercept = 0, col = "black", lty = 2) + 
  geom_hline(yintercept = -log10(0.05), col = "black", lty = 4) + 
  xlim(-5, 5) +
  theme(text = element_text(size = 14))

ggsave("02h_Volcano_p0.05.tiff", width = 8, height = 8, dpi = 600)

#2h - bigger letters
ggplot(data = RNA2h, aes(x = log2FoldChange, y = -log10(padj), col = diffexpr, label = Labels)) +
  geom_point(size = 6, alpha = 0.5) +
  geom_text_repel(max.overlaps = 12, size = 4.5) + #organize labels nicely with ggrepel --> geom_text_repel() 
  scale_color_manual(values = c("black", "darkred", "darkblue")) + #define color scheme
  geom_vline(xintercept = 0, col = "black", lty = 2) + 
  geom_hline(yintercept = -log10(0.05), col = "black", lty = 4) + 
  xlim(-5, 5) +
  theme(text = element_text(size = 25))

ggsave("02h_Volcano_p0.05_big_letters.tiff", width = 8, height = 7, dpi = 600)

#6h
ggplot(data = RNA6h, aes(x = log2FoldChange, y = -log10(padj), col = diffexpr, label = Labels)) +
  geom_point(size = 6, alpha = 0.5) +
  geom_text_repel(max.overlaps = 12, size = 4.5) + #organize labels nicely with ggrepel --> geom_text_repel() 
  scale_color_manual(values = c("black", "darkred", "darkblue")) + #define color scheme
  geom_vline(xintercept = 0, col = "black", lty = 2) + 
  geom_hline(yintercept = -log10(0.05), col = "black", lty = 4) + 
  xlim(-5, 5) +
  theme(text = element_text(size = 25))

ggsave("06h_Volcano_p0.05_big_Letters.tiff", width = 8, height = 7, dpi = 600)

#12h
ggplot(data = RNA12h, aes(x = log2FoldChange, y = -log10(padj), col = diffexpr, label = Labels)) +
  geom_point(size = 2, alpha = 0.8) +
  geom_text_repel(max.overlaps = 12, size = 5.5) +  
  scale_color_manual(values = c("black", "darkred", "darkblue")) + # define color scheme
  geom_vline(xintercept = 0, col = "black", lty = 2) + 
  geom_hline(yintercept = -log10(0.05), col = "black", lty = 4) + 
  xlim(-5, 5) +
  theme(
    text = element_text(size = 14),
    axis.title = element_text(size = 18),   
    axis.text = element_text(size = 16),                   
    legend.text = element_text(size = 14),
    legend.title = element_text(size = 15)
  )

ggsave("12h_Volcano_p0.05_bigletters2_scaled.tiff", width = 7.5, height = 7, dpi = 600)

#24h
ggplot(data = RNA24h, aes(x = log2FoldChange, y = -log10(padj), col = diffexpr, label = Labels)) +
  geom_point(size = 2, alpha = 0.8) +
  geom_text_repel(max.overlaps = 13, size = 5.5) +  
  scale_color_manual(values = c("darkblue", "black", "darkred")) + #define color scheme
  geom_vline(xintercept = 0, col = "black", lty = 2) + 
  geom_hline(yintercept = -log10(0.05), col = "black", lty = 4) + 
  xlim(-5, 5) +
  theme(
    text = element_text(size = 14),
    axis.title = element_text(size = 18),   
    axis.text = element_text(size = 16),                   
    legend.text = element_text(size = 14),
    legend.title = element_text(size = 15)
  )

ggsave("24h_Volcano_p0.05_bigletters_scaled.tiff", width = 8, height = 7, dpi = 600)

#48h
ggplot(data = RNA48h, aes(x = log2FoldChange, y = -log10(padj), col = diffexpr, label = Labels)) +
  geom_point(size = 2, alpha = 0.8) +
  geom_text_repel(max.overlaps = 12, size = 5) + 
  scale_color_manual(values = c("darkblue", "black", "darkred")) + #define color scheme
  geom_vline(xintercept = 0, col = "black", lty = 2) + 
  geom_hline(yintercept = -log10(0.05), col = "black", lty = 4) +
  xlim(-5, 5) +
  theme(
    text = element_text(size = 14),
    axis.title = element_text(size = 18),   
    axis.text = element_text(size = 16),                   
    legend.text = element_text(size = 14),
    legend.title = element_text(size = 15)
  )

ggsave("48h_Volcano_p0.05_bigletters_for_revision_scaled.tiff", width = 8, height = 7, dpi = 600)

#72h
ggplot(data = RNA72h, aes(x = log2FoldChange, y = -log10(padj), col = diffexpr, label = Labels)) +
  geom_point(size = 2, alpha = 0.8) +
  geom_text_repel(max.overlaps = 13, size = 5.5) + #organize labels nicely with ggrepel --> geom_text_repel() 
  scale_color_manual(values = c("darkblue", "black", "darkred")) + #define color scheme
  geom_vline(xintercept = 0, col = "black", lty = 2) + 
  geom_hline(yintercept = -log10(0.05), col = "black", lty = 4) + 
  xlim(-5, 5) +
  theme(
    text = element_text(size = 14),
    axis.title = element_text(size = 18),   
    axis.text = element_text(size = 16),                   
    legend.text = element_text(size = 14),
    legend.title = element_text(size = 15)
  )

ggsave("72h_Volcano_p0.05_bigletters_scaled.tiff", width = 8, height = 7, dpi = 600)

#5d
ggplot(data = RNA5d, aes(x = log2FoldChange, y = -log10(padj), col = diffexpr, label = Labels)) +
  geom_point(size = 2, alpha = 0.8) +
  geom_text_repel(max.overlaps = 12, size = 5.5) + #organize labels nicely with ggrepel --> geom_text_repel() 
  scale_color_manual(values = c("darkblue", "black", "darkred")) + #define color scheme
  geom_vline(xintercept = 0, col = "black", lty = 2) + 
  geom_hline(yintercept = -log10(0.05), col = "black", lty = 4) + 
  xlim(-5, 5) +
  theme(text = element_text(size = 14))

ggsave("5d_Volcano_p0.05_bigletters.tiff", width = 8, height = 7, dpi = 600)

#Other Cell line- 005a, treated with Paclitaxel 100nM 48h
ggplot(data = RNA48h_005a, aes(x = log2FoldChange, y = -log10(padj), col = diffexpr, label = Labels)) +
  geom_point(size = 2, alpha = 0.8) +
  geom_text_repel(max.overlaps = 12, size = 4.5) + #organize labels nicely with ggrepel --> geom_text_repel() 
  scale_color_manual(values = c("darkblue", "black", "darkred")) + #define color scheme
  geom_vline(xintercept = 0, col = "black", lty = 2) + 
  geom_hline(yintercept = -log10(0.05), col = "black", lty = 4) + 
  xlim(-5, 5) +
  theme(text = element_text(size = 14))

ggsave("005a_48h_Volcano_p0.05.tiff", width = 8, height = 8, dpi = 600)

#Volcanoplots with 35 most differently expressed genes
#12h - TOP35
ggplot(data = RNA12h, aes(x = log2FoldChange, y = -log10(padj), col = diffexpr, label = Labels)) +
  geom_point(size = 2, alpha = 0.8) +
  geom_text_repel(data = head(RNA12h, 35), max.overlaps = Inf, size = 4.5) + #organize labels nicely with ggrepel --> geom_text_repel() 
  scale_color_manual(values = c("black", "darkred", "darkblue")) + #define color scheme
  geom_vline(xintercept = 0, col = "black", lty = 2) + 
  geom_hline(yintercept = -log10(0.05), col = "black", lty = 4) + 
  xlim(-5, 5) +
  theme(text = element_text(size = 14))

ggsave("12h_Volcano_p0.05_TOP35.tiff", width = 8, height = 8, dpi = 600)

#24h - TOP35
ggplot(data = RNA24h, aes(x = log2FoldChange, y = -log10(padj), col = diffexpr, label = Labels)) +
  geom_point(size = 2, alpha = 0.8) +
  geom_text_repel(data = head(RNA24h, 35), max.overlaps = Inf, size = 4.5) + #organize labels nicely with ggrepel --> geom_text_repel() 
  scale_color_manual(values = c("darkblue", "black", "darkred")) + #define color scheme
  geom_vline(xintercept = 0, col = "black", lty = 2) + 
  geom_hline(yintercept = -log10(0.05), col = "black", lty = 4) + 
  xlim(-5, 5) +
  theme(text = element_text(size = 14))

ggsave("24h_Volcano_p0.05_TOP35.tiff", width = 8, height = 8, dpi = 600)

#48h - TOP35
ggplot(data = RNA48h, aes(x = log2FoldChange, y = -log10(padj), col = diffexpr, label = Labels)) +
  geom_point(size = 2, alpha = 0.8) +
  geom_text_repel(data = head(RNA48h, 35), max.overlaps = Inf, size = 4.5) + #organize labels nicely with ggrepel --> geom_text_repel() 
  scale_color_manual(values = c("darkblue", "black", "darkred")) + #define color scheme
  geom_vline(xintercept = 0, col = "black", lty = 2) + 
  geom_hline(yintercept = -log10(0.05), col = "black", lty = 4) +
  xlim(-5, 5) +
  theme(text = element_text(size = 14))

ggsave("48h_Volcano_p0.05_TOP35.tiff", width = 8, height = 8, dpi = 600)

#72h - TOP35
ggplot(data = RNA72h, aes(x = log2FoldChange, y = -log10(padj), col = diffexpr, label = Labels)) +
  geom_point(size = 2, alpha = 0.8) +
  geom_text_repel(max.overlaps = 12, size = 4.5) + #organize labels nicely with ggrepel --> geom_text_repel() 
  scale_color_manual(values = c("darkblue", "black", "darkred")) + #define color scheme
  geom_vline(xintercept = 0, col = "black", lty = 2) + 
  geom_hline(yintercept = -log10(0.05), col = "black", lty = 4) + 
  xlim(-5, 5) +
  theme(text = element_text(size = 14))

ggsave("72h_Volcano_p0.05_TOP35.tiff", width = 8, height = 8, dpi = 600)

#5d - TOP35
ggplot(data = RNA5d, aes(x = log2FoldChange, y = -log10(padj), col = diffexpr, label = Labels)) +
  geom_point(size = 2, alpha = 0.8) +
  geom_text_repel(data = head(RNA5d, 35), max.overlaps = Inf, size = 4.5) + #organize labels nicely with ggrepel --> geom_text_repel() 
  scale_color_manual(values = c("darkblue", "black", "darkred")) + #define color scheme
  geom_vline(xintercept = 0, col = "black", lty = 2) + 
  geom_hline(yintercept = -log10(0.05), col = "black", lty = 4) + 
  xlim(-5, 5) +
  theme(text = element_text(size = 14))

ggsave("5d_Volcano_p0.05_TOP35.tiff", width = 8, height = 8, dpi = 600)

#Other Cell line- 005a, treated with Paclitaxel 100nM 48h
ggplot(data = RNA48h_005a, aes(x = log2FoldChange, y = -log10(padj), col = diffexpr, label = Labels)) +
  geom_point(size = 2, alpha = 0.8) +
  geom_text_repel(data = head(RNA48h_005a, 35), max.overlaps = Inf, size = 4.5) + #organize labels nicely with ggrepel --> geom_text_repel() 
  scale_color_manual(values = c("darkblue", "black", "darkred")) + #define color scheme
  geom_vline(xintercept = 0, col = "black", lty = 2) + 
  geom_hline(yintercept = -log10(0.05), col = "black", lty = 4) + 
  xlim(-5, 5) +
  theme(text = element_text(size = 14))

#CDD - Lipidomic analyses - Rest see also GraphPadFile----
library(limma)
library(readxl)
library(writexl)
library(ggrepel)
library(tidyverse)
library(dplyr)
library(gplots)

#prepare data
setwd("C:/Users/Christian Schinke/Desktop/R-Kurs_Timeline_Project/RNA_Seq_Timeline_Andranik")
d <- read_xlsx("20220902_data_matrix_15pc_pool_cut_off_valid_lipids_samples_only.xlsx")
d <- as.data.frame(d)
d <- as_lipidomics_experiment(d)
d <- add_sample_annotation(d, "20220902_data_annotations_15pc_pool_cut_off_valid_lipids_samples_only.csv")

#Volcanoplots
lipids48h <- read_excel(path = "20220902_descriptives_15pc_pool_cut_off_valid_lipids_samples_only.xlsx")
lipids48h <- as.data.frame(lipids48h)
lipids48h <- lipids48h %>% filter(!is.na(padj)) # filter out na values 
lipids48h <- lipids48h %>% arrange(desc(abs(log2FoldChange)))

#Define up- and downregulated as well as n.s. significant differentially regulated lipids
lipids48h$diffexpr <- "NO"
lipids48h$diffexpr[lipids48h$log2FoldChange >= 0.2 & lipids48h$padj < 0.05] <- "UP"
lipids48h$diffexpr[lipids48h$log2FoldChange <= 0.2 & lipids48h$padj < 0.05] <- "DOWN"

#label lipids
lipids48h$Labels <- NA #Add column named "Labels" with NA values
lipids48h$Labels[lipids48h$diffexpr != "NO"] <- lipids48h$lipids[lipids48h$diffexpr != "NO"]

#Volcano lipids 48h
ggplot(data = lipids48h, aes(x = log2FoldChange, y = -log10(padj), col = diffexpr, label = Labels)) +
  geom_point(size = 2, alpha = 0.8) +
  geom_text_repel(max.overlaps = 18, size = 6) + #organize labels nicely with ggrepel --> geom_text_repel() 
  scale_color_manual(values = c("#0A9396", "black", "#8D3B72")) + #define color scheme
  geom_vline(xintercept = 0, col = "black", lty = 2) + 
  geom_hline(yintercept = -log10(0.05), col = "black", lty = 4) + 
  xlim(-5, 5) +
  theme(text = element_text(size = 18))

ggsave("lipids48h.tiff", width = 8, height = 8, dpi = 600)
