1 Introduction

This document guides through the analysis of the MS data for the PTex HEK293 project. Inputdata have to be provided in the folder inputdata inside the working directory. These are:

working directory/
  PTex.Rmd
  inputdata/
    proteinGroups.txt                   # MS data
    RBP table-Table 1.csv               # Gerstberger review (2014) RBPs
    human TFs-Table 1.csv               # Gerstberger review (2014) TFs
    2015-06-09_human_interactomes.csv   # Landthaler human mRNA IC
    preiss_genenames.csv                # Preiss
    Brannan_annotated.txt               # Brannan SONAR annotated RBPs
    Brannan_SONAR_0.79.txt              # Brannan SONAR predicted
    hubstenberger.csv                   # Hubstenberger P-Body Proteins
    RICK_high_conf.csv                  # RICK high confidence
    RICK_low_conf.csv                   # RICK low confidence
    RICK_unique.csv                     # RICK unique
    caric.csv                           # CARIC
    human_proteome_reviewed_290118.list # UniProt SwissProt
    UP000005640_9606_all.fasta.pI.csv   # Isoelectricpointdb

2 Libraries

The following libraries are needed for the analysis. Packages have to be installed prior to loading.

library("dplyr")

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
library("limma")
library("reshape2")
library("ggplot2")
library("GGally")

Attaching package: ‘GGally’

The following object is masked from ‘package:dplyr’:

    nasa
library("Peptides")
library("DT")

To produce pairwise scatter plots with correlation information we use the ggpairs() function from the GGally package. Custom theme for pairwise scatter plots:

scatter_theme <- theme(legend.position = "none", 
      panel.grid.major = element_blank(), 
      axis.ticks = element_blank(), 
      panel.border = element_rect(linetype = "solid", colour = "black", fill = NA))
ggplot <- function(...) ggplot2::ggplot(...) + scale_colour_manual(values = c("black","red")) + scale_fill_manual(values = c("black","red"))
unlockBinding("ggplot",parent.env(asNamespace("GGally")))
assign("ggplot",ggplot,parent.env(asNamespace("GGally")))

3 Data preparation

The primary MS data are contained in the file proteinGroups.txt.

primaryMassSpecData <- read.table("inputdata/proteinGroups.txt", header = TRUE, sep = "\t")

To ease later analyses, character vectors of columnnames are created.

intensities <- c("Intensity.Input_NoCl_I",
               "Intensity.Input_NoCl_II",
               "Intensity.Input_NoCl_III",
               "Intensity.Input_0015_I",
               "Intensity.Input_0015_II",
               "Intensity.Input_0015_III",
               "Intensity.Input_015_I",
               "Intensity.Input_015_II",
               "Intensity.Input_015_III",
               "Intensity.Input_15_I",
               "Intensity.Input_15_II",
               "Intensity.Input_15_III",
               "Intensity.PTex_NoCl_I",
               "Intensity.PTex_NoCl_II",
               "Intensity.PTex_NoCl_III",
               "Intensity.PTex_0015_I",
               "Intensity.PTex_0015_II",
               "Intensity.PTex_0015_III",
               "Intensity.PTex_015_I",
               "Intensity.PTex_015_II",
               "Intensity.PTex_015_III",
               "Intensity.PTex_15_I",
               "Intensity.PTex_15_II",
               "Intensity.PTex_15_III")
LFQ <- c("LFQ.intensity.Input_NoCl_I",
               "LFQ.intensity.Input_NoCl_II",
               "LFQ.intensity.Input_NoCl_III",
               "LFQ.intensity.Input_0015_I",
               "LFQ.intensity.Input_0015_II",
               "LFQ.intensity.Input_0015_III",
               "LFQ.intensity.Input_015_I",
               "LFQ.intensity.Input_015_II",
               "LFQ.intensity.Input_015_III",
               "LFQ.intensity.Input_15_I",
               "LFQ.intensity.Input_15_II",
               "LFQ.intensity.Input_15_III",
               "LFQ.intensity.PTex_NoCl_I",
               "LFQ.intensity.PTex_NoCl_II",
               "LFQ.intensity.PTex_NoCl_III",
               "LFQ.intensity.PTex_0015_I",
               "LFQ.intensity.PTex_0015_II",
               "LFQ.intensity.PTex_0015_III",
               "LFQ.intensity.PTex_015_I",
               "LFQ.intensity.PTex_015_II",
               "LFQ.intensity.PTex_015_III",
               "LFQ.intensity.PTex_15_I",
               "LFQ.intensity.PTex_15_II",
               "LFQ.intensity.PTex_15_III")
IBAQ <- c("iBAQ.Input_NoCl_I",
               "iBAQ.Input_NoCl_II",
               "iBAQ.Input_NoCl_III",
               "iBAQ.Input_0015_I",
               "iBAQ.Input_0015_II",
               "iBAQ.Input_0015_III",
               "iBAQ.Input_015_I",
               "iBAQ.Input_015_II",
               "iBAQ.Input_015_III",
               "iBAQ.Input_15_I",
               "iBAQ.Input_15_II",
               "iBAQ.Input_15_III",
               "iBAQ.PTex_NoCl_I",
               "iBAQ.PTex_NoCl_II",
               "iBAQ.PTex_NoCl_III",
               "iBAQ.PTex_0015_I",
               "iBAQ.PTex_0015_II",
               "iBAQ.PTex_0015_III",
               "iBAQ.PTex_015_I",
               "iBAQ.PTex_015_II",
               "iBAQ.PTex_015_III",
               "iBAQ.PTex_15_I",
               "iBAQ.PTex_15_II",
               "iBAQ.PTex_15_III")

3.1 First filter

In a first filtering step reverse and hits only identified by site are removed.

MassSpecData <- primaryMassSpecData[primaryMassSpecData$Reverse != "+",]
MassSpecData <- MassSpecData[MassSpecData$Only.identified.by.site != "+",]

3.2 Transformation

All intensites are log2 transformed.

MassSpecData[c(intensities, IBAQ, LFQ)] <- log2(MassSpecData[c(intensities, IBAQ, LFQ)])

Infinity values that resulted from log2 transformation are converted to NA:

is.na(MassSpecData[c(intensities,IBAQ,LFQ)]) <- sapply(MassSpecData[c(intensities,IBAQ,LFQ)], is.infinite)

3.3 Trypsin normalisation factor

The LFQ intensities are normalised under the assumption that most intensities won’t change. This is not true for PTex data, so that the normalisation has to be corrected. Correction is done by using trypsin (added to all samples in the same amount) to calculate a normalisation factor.

trypsin = MassSpecData[MassSpecData$Majority.protein.IDs == "CON__P00761",]
trp_mean_Inp_NoCl <- mean(trypsin[,LFQ[1]],trypsin[,LFQ[2]],trypsin[,LFQ[3]])
trp_mean_Inp_0015 <- mean(trypsin[,LFQ[4]],trypsin[,LFQ[5]],trypsin[,LFQ[6]])
trp_mean_Inp_015 <- mean(trypsin[,LFQ[7]],trypsin[,LFQ[8]],trypsin[,LFQ[9]])
trp_mean_Inp_15 <- mean(trypsin[,LFQ[10]],trypsin[,LFQ[11]],trypsin[,LFQ[12]])
trp_mean_PTex_NoCl <- mean(trypsin[,LFQ[13]],trypsin[,LFQ[14]],trypsin[,LFQ[15]])
trp_mean_PTex_0015 <- mean(trypsin[,LFQ[16]],trypsin[,LFQ[17]],trypsin[,LFQ[18]])
trp_mean_PTex_015 <- mean(trypsin[,LFQ[19]],trypsin[,LFQ[20]],trypsin[,LFQ[21]])
trp_mean_PTex_15 <- mean(trypsin[,LFQ[22]],trypsin[,LFQ[23]],trypsin[,LFQ[24]]) 
nf_NoCl <- trp_mean_PTex_NoCl - trp_mean_Inp_NoCl
nf_0015Cl <- trp_mean_PTex_0015 - trp_mean_Inp_0015
nf_015Cl <- trp_mean_PTex_015 - trp_mean_Inp_015
nf_15Cl <- trp_mean_PTex_15 - trp_mean_Inp_15

Tidy up:

rm(list = ls(pattern = "trp_mean_"))
rm(trypsin)

3.4 Contaminant removal

After calculation of the normalisation factors the contaminants are removed from the data set.

MassSpecData <- MassSpecData[MassSpecData$Potential.contaminant != "+",]

3.5 Trypsin normalisation

The normalisation factors are applied by subtraction, since the data were already log transformed.

norm_MassSpecData <- MassSpecData
norm_MassSpecData[LFQ[13:15]] <- norm_MassSpecData[LFQ[13:15]] - nf_NoCl
norm_MassSpecData[LFQ[16:18]] <- norm_MassSpecData[LFQ[16:18]] - nf_0015Cl
norm_MassSpecData[LFQ[19:21]] <- norm_MassSpecData[LFQ[19:21]] - nf_015Cl
norm_MassSpecData[LFQ[22:24]] <- norm_MassSpecData[LFQ[22:24]] - nf_15Cl

Tidy up:

rm(list = ls(pattern = "nf_"))

3.6 Incomplete observation removal

We only want to consider those proteins, that were found in all replicates of all experiments.

for(exp in c("PTex_noCL","PTex_0015","PTex_015","PTex_15", "Input_noCL","Input_0015","Input_015","Input_15")){
  
  sub_MassSpecData <- select(norm_MassSpecData, matches(paste("LFQ.intensity", exp, sep = ".")))
  x <- apply(sub_MassSpecData, 1, function(x)sum(!is.na(x))>2)
  norm_MassSpecData <- norm_MassSpecData[x,]
}

Tidy up:

rm(list = ls(pattern = "sub_"))
rm(exp)
rm(x)

3.7 Plots

3.7.1 Intensities Scatterplots

## Input noCL
input_nocl_scatter <- ggpairs(norm_MassSpecData[,LFQ[1:3]], 
        lower = list(continuous = wrap("cor", size = 7)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag")) + scatter_theme
## Input 0.015
input_0015_scatter <- ggpairs(norm_MassSpecData[,LFQ[4:6]], 
        lower = list(continuous = wrap("cor", size = 7)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag")) + scatter_theme
## Input 0.15
input_015_scatter <- ggpairs(norm_MassSpecData[,LFQ[7:9]], 
        lower = list(continuous = wrap("cor", size = 7)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag"))  + scatter_theme
## Input 1.5
input_15_scatter <- ggpairs(norm_MassSpecData[,LFQ[10:12]], 
        lower = list(continuous = wrap("cor", size = 7)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag"))  + scatter_theme
## PTex noCL
ptex_nocl_scatter <- ggpairs(norm_MassSpecData[,LFQ[13:15]], 
        lower = list(continuous = wrap("cor", size = 7)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag"))  + scatter_theme
## PTex 0.015
ptex_0015_scatter <- ggpairs(norm_MassSpecData[,LFQ[16:18]], 
        lower = list(continuous = wrap("cor", size = 7)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag"))  + scatter_theme
## PTex 0.15
ptex_015_scatter <- ggpairs(norm_MassSpecData[,LFQ[19:21]], 
        lower = list(continuous = wrap("cor", size = 7)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag"))  + scatter_theme
## PTex 1.5
ptex_15_scatter <- ggpairs(norm_MassSpecData[,LFQ[22:24]],
        lower = list(continuous = wrap("cor", size = 7)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag"))  + scatter_theme
input_nocl_scatter

input_0015_scatter

input_015_scatter

input_15_scatter

ptex_nocl_scatter

ptex_0015_scatter

ptex_015_scatter

ptex_15_scatter

Tidy up:

rm(list = ls(pattern = "_scatter"))

4 Enrichment analysis

After preparation the fold changes between respective Input/PTex pairs can be calculated and a moderated t-test with following Benjamini-Hochberg correction is used to determine the false discovery rate (FDR).

4.1 Fold Changes

The fold changes are calculated by subtracting the log-transformed LFQ intensity values of the non-crosslinked (-CL) from the crosslinked (+CL) samples.

norm_MassSpecData$FC.Input_0015.rep1 <- (norm_MassSpecData$LFQ.intensity.Input_0015_I - norm_MassSpecData$LFQ.intensity.Input_NoCl_I)
norm_MassSpecData$FC.Input_0015.rep2 <- (norm_MassSpecData$LFQ.intensity.Input_0015_II - norm_MassSpecData$LFQ.intensity.Input_NoCl_II)
norm_MassSpecData$FC.Input_0015.rep3 <- (norm_MassSpecData$LFQ.intensity.Input_0015_III - norm_MassSpecData$LFQ.intensity.Input_NoCl_III)
norm_MassSpecData$FC.Input_015.rep1 <- (norm_MassSpecData$LFQ.intensity.Input_015_I - norm_MassSpecData$LFQ.intensity.Input_NoCl_I)
norm_MassSpecData$FC.Input_015.rep2 <- (norm_MassSpecData$LFQ.intensity.Input_015_II - norm_MassSpecData$LFQ.intensity.Input_NoCl_II)
norm_MassSpecData$FC.Input_015.rep3 <- (norm_MassSpecData$LFQ.intensity.Input_015_III - norm_MassSpecData$LFQ.intensity.Input_NoCl_III)
norm_MassSpecData$FC.Input_15.rep1 <- (norm_MassSpecData$LFQ.intensity.Input_15_I - norm_MassSpecData$LFQ.intensity.Input_NoCl_I)
norm_MassSpecData$FC.Input_15.rep2 <- (norm_MassSpecData$LFQ.intensity.Input_15_II - norm_MassSpecData$LFQ.intensity.Input_NoCl_II)
norm_MassSpecData$FC.Input_15.rep3 <- (norm_MassSpecData$LFQ.intensity.Input_15_III - norm_MassSpecData$LFQ.intensity.Input_NoCl_III)
norm_MassSpecData$FC.PTex_0015.rep1 <- (norm_MassSpecData$LFQ.intensity.PTex_0015_I - norm_MassSpecData$LFQ.intensity.PTex_NoCl_I)
norm_MassSpecData$FC.PTex_0015.rep2 <- (norm_MassSpecData$LFQ.intensity.PTex_0015_II - norm_MassSpecData$LFQ.intensity.PTex_NoCl_II)
norm_MassSpecData$FC.PTex_0015.rep3 <- (norm_MassSpecData$LFQ.intensity.PTex_0015_III - norm_MassSpecData$LFQ.intensity.PTex_NoCl_III)
norm_MassSpecData$FC.PTex_015.rep1 <- (norm_MassSpecData$LFQ.intensity.PTex_015_I - norm_MassSpecData$LFQ.intensity.PTex_NoCl_I)
norm_MassSpecData$FC.PTex_015.rep2 <- (norm_MassSpecData$LFQ.intensity.PTex_015_II - norm_MassSpecData$LFQ.intensity.PTex_NoCl_II)
norm_MassSpecData$FC.PTex_015.rep3 <- (norm_MassSpecData$LFQ.intensity.PTex_015_III - norm_MassSpecData$LFQ.intensity.PTex_NoCl_III)
norm_MassSpecData$FC.PTex_15.rep1 <- (norm_MassSpecData$LFQ.intensity.PTex_15_I - norm_MassSpecData$LFQ.intensity.PTex_NoCl_I)
norm_MassSpecData$FC.PTex_15.rep2 <- (norm_MassSpecData$LFQ.intensity.PTex_15_II - norm_MassSpecData$LFQ.intensity.PTex_NoCl_II)
norm_MassSpecData$FC.PTex_15.rep3 <- (norm_MassSpecData$LFQ.intensity.PTex_15_III - norm_MassSpecData$LFQ.intensity.PTex_NoCl_III)

Mean fold changes are calculated.

norm_MassSpecData$FC.Input_0015.mean <- rowMeans(norm_MassSpecData[,251:253])
norm_MassSpecData$FC.Input_015.mean <- rowMeans(norm_MassSpecData[,254:256])
norm_MassSpecData$FC.Input_15.mean <- rowMeans(norm_MassSpecData[,257:259])
norm_MassSpecData$FC.PTex_0015.mean <- rowMeans(norm_MassSpecData[,260:262])
norm_MassSpecData$FC.PTex_015.mean <- rowMeans(norm_MassSpecData[,263:265])
norm_MassSpecData$FC.PTex_15.mean <- rowMeans(norm_MassSpecData[,266:268])

4.2 Moderated t-test and Benjamini-Hochberg correction

A moderated t-test including Benjamini-Hochberg p-value correction is layed out on the fold changes.

pval =  eBayes(lmFit(norm_MassSpecData[,grep("FC.Input_0015.rep",names(norm_MassSpecData))]))
norm_MassSpecData$pval.Input_0015 <- pval$p.value
norm_MassSpecData$padj.Input_0015 <-  p.adjust(norm_MassSpecData$pval.Input_0015, method="BH")
pval =  eBayes(lmFit(norm_MassSpecData[,grep("FC.Input_015.rep",names(norm_MassSpecData))]))
norm_MassSpecData$pval.Input_015 <- pval$p.value
norm_MassSpecData$padj.Input_015 <-  p.adjust(norm_MassSpecData$pval.Input_015, method="BH")
pval =  eBayes(lmFit(norm_MassSpecData[,grep("FC.Input_15.rep",names(norm_MassSpecData))]))
norm_MassSpecData$pval.Input_15 <- pval$p.value
norm_MassSpecData$padj.Input_15 <-  p.adjust(norm_MassSpecData$pval.Input_15, method="BH")
pval = eBayes(lmFit(norm_MassSpecData[,grep("FC.PTex_0015.rep",names(norm_MassSpecData))]))
norm_MassSpecData$pval.PTex_0015 <- pval$p.value
norm_MassSpecData$padj.PTex_0015 <- p.adjust(norm_MassSpecData$pval.PTex_0015, method="BH")
pval = eBayes(lmFit(norm_MassSpecData[,grep("FC.PTex_015.rep",names(norm_MassSpecData))]))
norm_MassSpecData$pval.PTex_015 <- pval$p.value
norm_MassSpecData$padj.PTex_015 <- p.adjust(norm_MassSpecData$pval.PTex_015, method="BH")
pval = eBayes(lmFit(norm_MassSpecData[,grep("FC.PTex_15.rep",names(norm_MassSpecData))]))
norm_MassSpecData$pval.PTex_15 <- pval$p.value
norm_MassSpecData$padj.PTex_15 <- p.adjust(norm_MassSpecData$pval.PTex_15, method="BH")

Tidy up:

rm(pval)

4.3 Mean intensities

Mean intensities (LFQ and iBAQ) are calculated.

### LFQ
norm_MassSpecData$LFQ.intensity.Input_noCl_mean <- rowMeans(norm_MassSpecData[,c("LFQ.intensity.Input_NoCl_I", "LFQ.intensity.Input_NoCl_II", "LFQ.intensity.Input_NoCl_III")], na.rm = FALSE)
norm_MassSpecData$LFQ.intensity.Input_0015_mean <- rowMeans(norm_MassSpecData[,c("LFQ.intensity.Input_0015_I", "LFQ.intensity.Input_0015_II", "LFQ.intensity.Input_0015_III")], na.rm = FALSE)
norm_MassSpecData$LFQ.intensity.Input_015_mean <- rowMeans(norm_MassSpecData[,c("LFQ.intensity.Input_015_I", "LFQ.intensity.Input_015_II", "LFQ.intensity.Input_015_III")], na.rm = FALSE)
norm_MassSpecData$LFQ.intensity.Input_15_mean <- rowMeans(norm_MassSpecData[,c("LFQ.intensity.Input_15_I", "LFQ.intensity.Input_15_II", "LFQ.intensity.Input_15_III")], na.rm = FALSE)
norm_MassSpecData$LFQ.intensity.PTex_noCl_mean <- rowMeans(norm_MassSpecData[,c("LFQ.intensity.PTex_NoCl_I", "LFQ.intensity.PTex_NoCl_II", "LFQ.intensity.PTex_NoCl_III")], na.rm = FALSE)
norm_MassSpecData$LFQ.intensity.PTex_0015_mean <- rowMeans(norm_MassSpecData[,c("LFQ.intensity.PTex_0015_I", "LFQ.intensity.PTex_0015_II", "LFQ.intensity.PTex_0015_III")], na.rm = FALSE)
norm_MassSpecData$LFQ.intensity.PTex_015_mean <- rowMeans(norm_MassSpecData[,c("LFQ.intensity.PTex_015_I", "LFQ.intensity.PTex_015_II", "LFQ.intensity.PTex_015_III")], na.rm = FALSE)
norm_MassSpecData$LFQ.intensity.PTex_15_mean <- rowMeans(norm_MassSpecData[,c("LFQ.intensity.PTex_15_I", "LFQ.intensity.PTex_15_II", "LFQ.intensity.PTex_15_III")], na.rm = FALSE)
### iBAQ
norm_MassSpecData$iBAQ.Input_noCl_mean <- rowMeans(norm_MassSpecData[,c("iBAQ.Input_NoCl_I", "iBAQ.Input_NoCl_II", "iBAQ.Input_NoCl_III")], na.rm = FALSE)
norm_MassSpecData$iBAQ.Input_0015_mean <- rowMeans(norm_MassSpecData[,c("iBAQ.Input_0015_I", "iBAQ.Input_0015_II", "iBAQ.Input_0015_III")], na.rm = FALSE)
norm_MassSpecData$iBAQ.Input_015_mean <- rowMeans(norm_MassSpecData[,c("iBAQ.Input_015_I", "iBAQ.Input_015_II", "iBAQ.Input_015_III")], na.rm = FALSE)
norm_MassSpecData$iBAQ.Input_15_mean <- rowMeans(norm_MassSpecData[,c("iBAQ.Input_15_I", "iBAQ.Input_15_II", "iBAQ.Input_15_III")], na.rm = FALSE)
norm_MassSpecData$iBAQ.PTex_noCl_mean <- rowMeans(norm_MassSpecData[,c("iBAQ.PTex_NoCl_I", "iBAQ.PTex_NoCl_II", "iBAQ.PTex_NoCl_III")], na.rm = FALSE)
norm_MassSpecData$iBAQ.PTex_0015_mean <- rowMeans(norm_MassSpecData[,c("iBAQ.PTex_0015_I", "iBAQ.PTex_0015_II", "iBAQ.PTex_0015_III")], na.rm = FALSE)
norm_MassSpecData$iBAQ.PTex_015_mean <- rowMeans(norm_MassSpecData[,c("iBAQ.PTex_015_I", "iBAQ.PTex_015_II", "iBAQ.PTex_015_III")], na.rm = FALSE)
norm_MassSpecData$iBAQ.PTex_15_mean <- rowMeans(norm_MassSpecData[,c("iBAQ.PTex_15_I", "iBAQ.PTex_15_II", "iBAQ.PTex_15_III")], na.rm = FALSE)

4.4 FDR and FC filter subset

We want to additionally exclude proteins that are not significantly enriched in all 3 libraries and those, which are significantly depleted:

sig_norm_MassSpecData <- norm_MassSpecData[norm_MassSpecData$padj.PTex_0015 <= 0.01 & norm_MassSpecData$padj.PTex_015 <= 0.01 & norm_MassSpecData$padj.PTex_15 <= 0.01,]
dim(sig_norm_MassSpecData)
[1] 3042  302
sig_norm_MassSpecData <- sig_norm_MassSpecData[sig_norm_MassSpecData$FC.PTex_0015.mean > 0 & sig_norm_MassSpecData$FC.PTex_015.mean > 0 & sig_norm_MassSpecData$FC.PTex_15.mean > 0,]
dim(sig_norm_MassSpecData)
[1] 3037  302

4.5 Plots

4.5.1 Enrichments

FC <- colnames(norm_MassSpecData)[251:268]
## Input 0.015
padj_co <- norm_MassSpecData$padj.Input_0015 <= 0.01
padj_co[which(padj_co)] <- "significant"
padj_co[which(padj_co==FALSE)] <- "non-significant"
fc_input_0015_scatter <- ggpairs(cbind(norm_MassSpecData[,FC[1:3]], padj_co),
        columns = 1:3,
        mapping = ggplot2::aes(colour = padj_co, alpha = 0.5),
        lower = list(continuous = wrap("cor", size = 5)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag")) + scatter_theme
## Input 0.15
padj_co <- norm_MassSpecData$padj.Input_015 <= 0.01
padj_co[which(padj_co)] <- "significant"
padj_co[which(padj_co==FALSE)] <- "non-significant"
fc_input_015_scatter <- ggpairs(cbind(norm_MassSpecData[,FC[4:6]], padj_co),
        columns = 1:3,
        mapping = ggplot2::aes(colour = padj_co, alpha = 0.5),
        lower = list(continuous = wrap("cor", size = 5)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag"))  + scatter_theme
## Input 1.5
padj_co <- norm_MassSpecData$padj.Input_15 <= 0.01
padj_co[which(padj_co)] <- "significant"
padj_co[which(padj_co==FALSE)] <- "non-significant"
fc_input_15_scatter <- ggpairs(cbind(norm_MassSpecData[,FC[7:9]], padj_co),
        columns = 1:3,
        mapping = ggplot2::aes(colour = padj_co, alpha = 0.5), 
        lower = list(continuous = wrap("cor", size = 5)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag"))  + scatter_theme
## PTex 0.015
padj_co <- norm_MassSpecData$padj.PTex_0015 <= 0.01
padj_co[which(padj_co)] <- "significant"
padj_co[which(padj_co==FALSE)] <- "non-significant"
fc_ptex_0015_scatter <- ggpairs(cbind(norm_MassSpecData[,FC[10:12]], padj_co),
        columns = 1:3,
        mapping = ggplot2::aes(colour = padj_co, alpha = 0.5), 
        lower = list(continuous = wrap("cor", size = 5)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag"))  + scatter_theme
## PTex 0.15
padj_co <- norm_MassSpecData$padj.PTex_015 <= 0.01
padj_co[which(padj_co)] <- "significant"
padj_co[which(padj_co==FALSE)] <- "non-significant"
fc_ptex_015_scatter <- ggpairs(cbind(norm_MassSpecData[,FC[13:15]], padj_co),
        columns = 1:3,
        mapping = ggplot2::aes(colour = padj_co, alpha = 0.5),
        lower = list(continuous = wrap("cor", size = 5)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag"))  + scatter_theme
## PTex 1.5
padj_co <- norm_MassSpecData$padj.PTex_15 <= 0.01
padj_co[which(padj_co)] <- "significant"
padj_co[which(padj_co==FALSE)] <- "non-significant"
fc_ptex_15_scatter <- ggpairs(cbind(norm_MassSpecData[,FC[16:18]], padj_co),
        columns = 1:3,
        mapping = ggplot2::aes(colour = padj_co, alpha = 0.5), 
        lower = list(continuous = wrap("cor", size = 5)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag"))  + scatter_theme
fc_input_0015_scatter

fc_input_015_scatter

fc_input_15_scatter

fc_ptex_0015_scatter

fc_ptex_015_scatter

fc_ptex_15_scatter

4.5.2 Volcano Plots

## Input 0.015
padj_co <- norm_MassSpecData$padj.Input_0015 <= 0.01
padj_co[which(padj_co)] <- "significant"
padj_co[which(padj_co==FALSE)] <- "non-significant"
fc_input_0015_volcano <- ggplot(data=cbind(norm_MassSpecData, padj_co), aes(x=FC.Input_0015.mean, y=-log10(padj.Input_0015), colour = padj_co ) ) +
  geom_point(alpha=0.5, size=1.75) +
#  xlim(c(-6, 6)) + ylim(c(0, 10)) +
  xlab("log2 fold change") + ylab("-log10 adj. p-value")
## Input 0.15
padj_co <- norm_MassSpecData$padj.Input_015 <= 0.01
padj_co[which(padj_co)] <- "significant"
padj_co[which(padj_co==FALSE)] <- "non-significant"
fc_input_015_volcano <- ggplot(data=cbind(norm_MassSpecData, padj_co), aes(x=FC.Input_015.mean, y=-log10(padj.Input_015), colour = padj_co ) ) +
  geom_point(alpha=0.5, size=1.75) +
#  xlim(c(-6, 6)) + ylim(c(0, 10)) +
  xlab("log2 fold change") + ylab("-log10 adj. p-value")
## Input 1.5
padj_co <- norm_MassSpecData$padj.Input_15 <= 0.01
padj_co[which(padj_co)] <- "significant"
padj_co[which(padj_co==FALSE)] <- "non-significant"
fc_input_15_volcano <- ggplot(data=cbind(norm_MassSpecData, padj_co), aes(x=FC.Input_15.mean, y=-log10(padj.Input_15), colour = padj_co ) ) +
  geom_point(alpha=0.5, size=1.75) +
#  xlim(c(-6, 6)) + ylim(c(0, 10)) +
  xlab("log2 fold change") + ylab("-log10 adj. p-value")
## PTex 0.015
padj_co <- norm_MassSpecData$padj.PTex_0015 <= 0.01
padj_co[which(padj_co)] <- "significant"
padj_co[which(padj_co==FALSE)] <- "non-significant"
fc_ptex_0015_volcano <- ggplot(data=cbind(norm_MassSpecData, padj_co), aes(x=FC.PTex_0015.mean, y=-log10(padj.PTex_0015), colour = padj_co ) ) +
  geom_point(alpha=0.5, size=1.75) +
#  xlim(c(-6, 6)) + ylim(c(0, 10)) +
  xlab("log2 fold change") + ylab("-log10 adj. p-value")
## PTex 0.15
padj_co <- norm_MassSpecData$padj.PTex_015 <= 0.01
padj_co[which(padj_co)] <- "significant"
padj_co[which(padj_co==FALSE)] <- "non-significant"
fc_ptex_015_volcano <- ggplot(data=cbind(norm_MassSpecData, padj_co), aes(x=FC.PTex_015.mean, y=-log10(padj.PTex_015), colour = padj_co ) ) +
  geom_point(alpha=0.5, size=1.75) +
#  xlim(c(-6, 6)) + ylim(c(0, 10)) +
  xlab("log2 fold change") + ylab("-log10 adj. p-value")
## PTex 1.5
padj_co <- norm_MassSpecData$padj.PTex_15 <= 0.01
padj_co[which(padj_co)] <- "significant"
padj_co[which(padj_co==FALSE)] <- "non-significant"
fc_ptex_15_volcano <- ggplot(data=cbind(norm_MassSpecData, padj_co), aes(x=FC.PTex_15.mean, y=-log10(padj.PTex_15), colour = padj_co ) ) +
  geom_point(alpha=0.5, size=1.75) +
#  xlim(c(-6, 6)) + ylim(c(0, 10)) +
  xlab("log2 fold change") + ylab("-log10 adj. p-value")
fc_input_0015_volcano

fc_input_015_volcano

fc_input_15_volcano

fc_ptex_0015_volcano

fc_ptex_015_volcano

fc_ptex_15_volcano

Tidy up:

rm(list = ls(pattern = "_scatter"))
rm(list = ls(pattern = "_volcano"))
rm(padj_co)

5 Matching

To assess performance of PTex to find RBPs, the significantly enriched proteins are compared to other studies. The following studies are taken into account: - Gerstberger review 2014 - Landthaler HEK293 interactome capture - Preiss - SONAR (annotated and predicted) - Hubstenberger P-Body proteins - RICK (high, low and unique) - CARIC

## Gerstberger review (2014) RBPs
ge <- read.csv("inputdata/RBP table-Table 1.csv")
## Gerstberger review (2014) TFs
tf <- read.csv("inputdata/human TFs-Table 1.csv")
## Human mRNA interactome capture data:
ic <- read.csv("inputdata/2015-06-09_human_interactomes.csv",na.strings=c("", "NA") )
## Preiss
preiss_gn <- read.csv("inputdata/preiss_genenames.csv", header=T, sep="\t")
## annotated RBPs from SONAR paper (Brannan et al.)
sonar_all <- read.csv("inputdata/Brannan_annotated.txt", header = FALSE)
## Predicted SONAR proteins (with score > 0.79, see Brannan et al. Mol Cell 2016)
sonar_079 <- read.csv("inputdata/Brannan_SONAR_0.79.txt", header = FALSE)
## Hubstenberger P-Body Proteins
hubst <- read.csv("inputdata/hubstenberger.csv", header = TRUE, sep = "\t")
## RICK
rick_high <- read.csv("inputdata/RICK_high_conf.csv", sep = "\t", stringsAsFactors = F)
rick_low <- read.csv("inputdata/RICK_low_conf.csv", sep = "\t", stringsAsFactors = F)
rick_unique <- read.csv("inputdata/RICK_unique.csv", sep="\t", stringsAsFactors = F)
## CARIC
caric <- read.csv("inputdata/caric.csv", sep = "\t", stringsAsFactors = F)

5.1 Matching table

A matching table is created to perform the matching and store the results. It contains the following identifier columns: - Majority.protein.IDs: the Majority.protein.IDs of the significantly enriched PTex hits - MPIDsNoIso: Majority.protein.IDs with isoforms resolved to canonical form - gene_name: the gene names of the significantly enriched PTex hits

matching_table <- data.frame(Majority.protein.IDs = as.character(sig_norm_MassSpecData$Majority.protein.IDs))
matching_table$Majority.protein.IDs <- apply(X = matching_table, MARGIN = 1, FUN = function(x)unlist(strsplit(as.character(x), split=";")))
matching_table$MPIDsNoIso <- lapply(strsplit(gsub("-[0-9]{1,2}","",sig_norm_MassSpecData$Majority.protein.IDs),split=";"), function(x)unique(x))
matching_table$gene_name <- sig_norm_MassSpecData$Gene.names

The canonical MPIDs were matched with all reviewed human UniProt (SwissProt) entries.

all_rev_uniprot <- read.table("inputdata/human_proteome_reviewed_291118.list", header=F, col.names = "uniprot")
MPIDsNoIso_rev_stat <- apply(matching_table, 1, function(x) unlist(x$MPIDsNoIso) %in% all_rev_uniprot$uniprot)
MPIDsNoIso.rev <- mapply(x=matching_table$MPIDsNoIso, y=MPIDsNoIso_rev_stat, function(x,y)unlist(x[unlist(y)]))
MPIDsNoIso.rev[unlist(lapply(MPIDsNoIso.rev, function(x)length(x)==0))] <- NA
sum(is.na(MPIDsNoIso.rev))
[1] 16
matching_table$MPIDr <- MPIDsNoIso.rev

For 16 majority protein ID groups no SwissProt entry could be found.

datatable(matching_table[which(MPIDsNoIso.rev%in%"NA"),])

Those 16 hits were subjected to manual curation, meaning the accession numbers and gene names were used for a UniProt query. If an unambiguous hit was found it was corrected. The following were corrected:

matching_table$MPIDr[74] <- "P43243"
matching_table$MPIDr[83] <- "O75122"
matching_table$MPIDr[93] <- "Q9UBX3"
matching_table$MPIDr[134] <- "O95819"
matching_table$MPIDr[226] <- "Q9UPN3"
matching_table$MPIDr[264] <- "Q96MU7"
matching_table$MPIDr[268] <- "P36776"
matching_table$MPIDr[674] <- "P0DMV8"
matching_table$MPIDr[2937] <- "P0DPB6"
matching_table$MPIDr[3034] <- "P17980"
matching_table$MPIDr[3035] <- "Q9UKV3"

After manual correction, the following were still not identifiable.

datatable(matching_table[which(matching_table$MPIDr%in%"NA"),])

These hits were excluded from further analyses.

matching_table <- matching_table[-c(162,169,178,896,3037),]
datatable(matching_table[which(matching_table$MPIDr%in%"NA"),])

WARNING:235 Q13404;A5PLL7 UBE2V1;TMEM189 T;F stays! Others might, too!

5.1.1 Matching with other datasets

## Gerstberger RBPs
matching_table$Gerstberger.RBP <- apply(matching_table,1,function(x) any(ge$gene.name %in% unlist(strsplit(as.character(x$gene_name),split=";"))))
## Gerstberger TFs (negative control)
matching_table$Gerstberger.TF <- apply(matching_table,1,function(x) any(tf$gene.name %in% unlist(strsplit(as.character(x$gene_name),split=";"))))
## Landthaler HEK293 interactome capture data
hekmRBP <- subset(ic, ic$HEK293 == "mRNA interactome")
matching_table$Landthaler.mRBP <- apply(matching_table, 1, function(x) any(hekmRBP$Uniprot.entry %in% unlist(x$MPIDr)))
## Preiss
matching_table$Preiss <- apply(matching_table, 1, function(x) any(preiss_gn$gene.name %in% unlist(strsplit(as.character(x$gene_name),split=";"))))
## Annotated RBPs (from the SONAR paper)
matching_table$Brannan.annotated <- apply(matching_table, 1, function(x) any(sonar_all$V1 %in% unlist(strsplit(as.character(x$gene_name),split=";"))))
## SONAR-predicted RBPs
matching_table$Brannan.predicted <- apply(matching_table, 1, function(x) any(sonar_079$V1 %in% unlist(strsplit(as.character(x$gene_name),split=";"))))
## Hubstenberger P-Body proteins
matching_table$Hubstenberger.PBP <- apply(matching_table, 1, function(x) any(hubst$gene.name %in% unlist(strsplit(as.character(x$gene_name),split=";"))))
## RICK data
matching_table$Rick.high <- apply(matching_table, 1, function(x) any(rick_high$Protein_ID %in% unlist(x$MPIDr)))
matching_table$Rick.low <- apply(matching_table, 1, function(x) any(rick_low$Protein_ID %in% unlist(x$MPIDr)))
matching_table$Rick.unique <- apply(matching_table, 1, function(x) any(rick_unique$Protein_ID %in% unlist(x$MPIDr)))
## CARIC RBPs
matching_table$Caric <- apply(matching_table, 1, function(x) any(caric$UniProt.accession %in% unlist(x$MPIDr)))

Note that matching via stringsplitted gene names can produce multiple hits in reference for a single PTex hit.

5.2 Matching df

# inPTex, not inPTex
matching_df <- data.frame(
  match = c("yes", "no"),
  Gerstberger.RBPs = c(round(sum(matching_table$Gerstberger.RBP)/nrow(ge),4), round((nrow(ge) - sum(matching_table$Gerstberger.RBP))/nrow(ge),4)),
  Gerstberger.TFs =c(round(sum(matching_table$Gerstberger.TF)/nrow(tf),4), round((nrow(tf) - sum(matching_table$Gerstberger.TF))/nrow(tf),4)),
  Landthaler = c(round(sum(matching_table$Landthaler.mRBP)/nrow(hekmRBP),4), round((nrow(hekmRBP) - sum(matching_table$Landthaler.mRBP))/nrow(hekmRBP),4)),
  Preiss = c(round(sum(matching_table$Preiss)/nrow(preiss_gn),4), round((nrow(preiss_gn)-sum(matching_table$Preiss))/nrow(preiss_gn),4)),
  Brannan.annotated = c(round(sum(matching_table$Brannan.annotated)/nrow(sonar_all),4), round((nrow(sonar_all) - sum(matching_table$Brannan.annotated))/nrow(sonar_all),4)),
  Brannan.predicted = c(round(sum(matching_table$Brannan.predicted)/nrow(sonar_079),4), round((nrow(sonar_079) - sum(matching_table$Brannan.predicted))/nrow(sonar_079),4)),
  Hubstenberger.PBP = c(round(sum(matching_table$Hubstenberger.PBP)/nrow(hubst),4), round((nrow(hubst) - sum(matching_table$Hubstenberger.PBP))/nrow(hubst),4)),
  Rick.high = c(round(sum(matching_table$Rick.high)/nrow(rick_high),4), round((nrow(rick_high) - sum(matching_table$Rick.high))/nrow(rick_high),4)),
  Rick.low = c(round(sum(matching_table$Rick.low)/nrow(rick_low),4), round((nrow(rick_low) - sum(matching_table$Rick.low))/nrow(rick_low),4)),
  Rick.unique = c(round(sum(matching_table$Rick.unique)/nrow(rick_unique),4), round((nrow(rick_unique) - sum(matching_table$Rick.unique))/nrow(rick_unique),4)),
  Caric = c(round(sum(matching_table$Caric)/nrow(caric),4), round((nrow(caric) - sum(matching_table$Caric))/nrow(caric),4))
)
ggplot(melt(matching_df, id.vars = "match"), aes(x=variable, y=value, fill=match)) +
  geom_bar(stat = "identity", alpha=.6) +
  ylab("fraction") + xlab("") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

6 Annotations

The significantly enriched hits are annotated with - isoelectric point - molecular weight - hydrophobicity - (RBD) - (GO)

For that a list of UniProt accession numbers is needed. For that the MPIDr of the matching table is used, while for entries with multiple accession numbers, only the first is used.

accessions <- unlist(lapply(X=matching_table$MPIDr, FUN = function(x) strsplit(as.character(x), split = ";")[[1]][1]))
write.table(accessions, "accessions.tsv", row.names = F, col.names = F)

6.1 Isoelectric Point, Molecular Weight, Sequences and hydrophobicity

For comparison of the MS data with the human proteome a dataset from isoelectricpointdb.org was retrieved (including isoelectric points, molecular weights and sequences) (as of 29.11.18):

isoelectricpointdb <- read.csv("inputdata/UP000005640_9606_all.fasta.pI.csv", header=TRUE, sep=",")
IPDB_matching <- data.frame(
  UniProt = accessions,
  Sequence = rep(NA,length(accessions)),
  MolWeight = rep(NA,length(accessions)),
  IPC_protein = rep(NA,length(accessions)),
  Hydrophobicity = rep(NA,length(accessions))
)
for(i in seq(nrow(IPDB_matching))){
  try(IPDB_matching$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",IPDB_matching$UniProt[i],"|", sep=""), x = isoelectricpointdb$header, fixed = TRUE)]))
  try(IPDB_matching$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",IPDB_matching$UniProt[i],"|", sep=""), x = isoelectricpointdb$header, fixed = TRUE)])
  try(IPDB_matching$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",IPDB_matching$UniProt[i],"|", sep=""), x = isoelectricpointdb$header, fixed = TRUE)])
}
Error in IPDB_matching$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero

Not found: P0DPB6 which is >sp|Q9Y2S0|RPAC2_HUMAN DNA-directed RNA polymerases I and III subunit RPAC2 OS=Homo sapiens GN=POLR1D PE=1 SV=1

for(i in seq(nrow(IPDB_matching))){
  try(IPDB_matching$Hydrophobicity[i] <- aaComp(IPDB_matching$Sequence[i])[[1]]["NonPolar","Mole%"])
}
Sequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculated

all_rev_uniprot were matched with the isoelectricpointdb. (Note that the following chunk runs very long, because of the low speed of for loops)

IPDB_matching_proteome <- data.frame(
  UniProt = as.character(all_rev_uniprot$uniprot),
  Sequence = rep(NA,length(all_rev_uniprot)),
  MolWeight = rep(NA,length(all_rev_uniprot)),
  IPC_protein = rep(NA,length(all_rev_uniprot)),
  Hydrophobicity = rep(NA,length(all_rev_uniprot))
)
for(i in seq(nrow(IPDB_matching_proteome))){
  try(IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",IPDB_matching_proteome$UniProt[i],"|", sep=""), x = isoelectricpointdb$header, fixed = TRUE)]))
  try(IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",IPDB_matching_proteome$UniProt[i],"|", sep=""), x = isoelectricpointdb$header, fixed = TRUE)])
  try(IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",IPDB_matching_proteome$UniProt[i],"|", sep=""), x = isoelectricpointdb$header, fixed = TRUE)])
}
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",  : 
  replacement has length zero
Error in IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",  : 
  replacement has length zero
for(i in seq(nrow(IPDB_matching_proteome))){
  try(IPDB_matching_proteome$Hydrophobicity[i] <- aaComp(IPDB_matching_proteome$Sequence[i])[[1]]["NonPolar","Mole%"])
}
Sequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculatedSequence 1 has unrecognized amino acid types. Output value might be wrong calculated

Some of the accession numbers were not included in the isoelectricpointdb.

sum(sapply(IPDB_matching_proteome$Sequence, function(x)is.na(x)))
[1] 173
sum(sapply(IPDB_matching_proteome$MolWeight, function(x)is.na(x)))
[1] 173
sum(sapply(IPDB_matching_proteome$IPC_protein, function(x)is.na(x)))
[1] 173
sum(sapply(IPDB_matching_proteome$Hydrophobicity, function(x)is.nan(x)))
[1] 173

6.1.1 Plots

6.1.1.1 Molecular Weight

mw_distribution <- data.frame(
  MolWeight = c(IPDB_matching$MolWeight, IPDB_matching_proteome$MolWeight),
  set = c(rep("PTex", length(IPDB_matching$MolWeight)), rep("Proteome", length(IPDB_matching_proteome$MolWeight)))
)
ggplot(mw_distribution, aes(x = set, y = MolWeight)) +
  geom_boxplot() + 
  scale_y_log10() +
  xlab("") +
  ylab("Molecular Weight")
pdf("MolWeight_boxplot.pdf", width = 10, height = 7)
last_plot()
dev.off()
png 
  2 

6.1.1.2 Isoelectric Point

6.1.1.2.1 Boxplot
pi_distribution <- data.frame(
  IPC_protein = c(IPDB_matching$IPC_protein, IPDB_matching_proteome$IPC_protein),
  set = c(rep("PTex", length(IPDB_matching$IPC_protein)), rep("Proteome", length(IPDB_matching_proteome$IPC_protein)))
)
ggplot(pi_distribution, aes(x = set, y = IPC_protein)) +
  geom_boxplot() + 
  xlab("") +
  ylab("IPC_protein")
pdf("IPC_protein_boxplot.pdf", width = 10, height = 7)
last_plot()
dev.off()
png 
  2 

6.1.1.2.2 Density Plot

A density plot of the IPC_protein distributions comparing PTex, Landthaler, Rick unique, Rick high and proteome is prepared.

rick_high_ip <- data.frame(
  IPC_protein = IPDB_matching_proteome$IPC_protein[IPDB_matching_proteome$UniProt %in% rick_high$Protein_ID],
  set = rep("Rick.high", nrow(rick_high))
)
rick_unique_ip <- data.frame(
  IPC_protein = IPDB_matching_proteome$IPC_protein[IPDB_matching_proteome$UniProt %in% rick_unique$Protein_ID],
  set = rep("Rick.unique", nrow(rick_unique))
)
# 6 accessions of the Landthaler data are not included in the isoelectricpointdb and will be removed
landthaler_ip <- data.frame(
  IPC_protein = IPDB_matching_proteome$IPC_protein[IPDB_matching_proteome$UniProt %in% hekmRBP$Uniprot.entry[-which(hekmRBP$Uniprot.entry %in% IPDB_matching_proteome$UniProt==FALSE)]],
  set = rep("Landthaler.mRBP", length(which(hekmRBP$Uniprot.entry %in% IPDB_matching_proteome$UniProt==TRUE)))
)
ip_density_distribution <- data.frame(
  IPC_protein = c(IPDB_matching$IPC_protein, landthaler_ip$IPC_protein, rick_unique_ip$IPC_protein, rick_high_ip$IPC_protein, IPDB_matching_proteome$IPC_protein),
  set = c(rep("PTex", nrow(IPDB_matching)), as.character(landthaler_ip$set), as.character(rick_unique_ip$set), as.character(rick_high_ip$set), rep("Proteome", nrow(IPDB_matching_proteome)))
)
ggplot2::ggplot(ip_density_distribution, aes(x = IPC_protein, colour = set)) +
  geom_density()
pdf("IPC_protein_density.pdf", width = 10, height = 7)
last_plot()
dev.off()
png 
  2 

6.1.1.3 Hydrophobicity

hydrophobicity_distribution <- data.frame(
  Hydrophobicity = c(IPDB_matching$Hydrophobicity, IPDB_matching_proteome$Hydrophobicity),
  set = c(rep("PTex", length(IPDB_matching$Hydrophobicity)), rep("Proteome", length(IPDB_matching_proteome$Hydrophobicity)))
)
ggplot(hydrophobicity_distribution, aes(x = set, y = Hydrophobicity)) +
  geom_boxplot() + 
  xlab("") +
  ylab("Hydrophobicity")
pdf("Hydrophobicity_boxplot.pdf", width = 10, height = 7)
last_plot()
dev.off()
png 
  2 

6.2 RBD

The RBD domain annotation is performed using DAVID online tool. Since it only accepts lists with maximum 3000 entries 10 randomly sampled lists were created.

set.seed(42)
accessions_S1 <- sample(accessions, 3000)
accessions_S2 <- sample(accessions, 3000)
accessions_S3 <- sample(accessions, 3000)
accessions_S4 <- sample(accessions, 3000)
accessions_S5 <- sample(accessions, 3000)
accessions_S6 <- sample(accessions, 3000)
accessions_S7 <- sample(accessions, 3000)
accessions_S8 <- sample(accessions, 3000)
accessions_S9 <- sample(accessions, 3000)
accessions_S10 <- sample(accessions, 3000)
write.table(accessions_S1, "accessions_S1.tsv", row.names = F, col.names = F)
write.table(accessions_S2, "accessions_S2.tsv", row.names = F, col.names = F)
write.table(accessions_S3, "accessions_S3.tsv", row.names = F, col.names = F)
write.table(accessions_S4, "accessions_S4.tsv", row.names = F, col.names = F)
write.table(accessions_S5, "accessions_S5.tsv", row.names = F, col.names = F)
write.table(accessions_S6, "accessions_S6.tsv", row.names = F, col.names = F)
write.table(accessions_S7, "accessions_S7.tsv", row.names = F, col.names = F)
write.table(accessions_S8, "accessions_S8.tsv", row.names = F, col.names = F)
write.table(accessions_S9, "accessions_S9.tsv", row.names = F, col.names = F)
write.table(accessions_S10, "accessions_S10.tsv", row.names = F, col.names = F)

6.3 GO

GO term analyses was performed by the PANTHER online tool using the accessions table.

7 Master Table

A master table containing identifiers, enrichments, matches and annotations is created.

# create vector holding rows of sig_norm_MassSpecData that are in matching_table
MSD_to_MT <- which(sapply(X = sig_norm_MassSpecData$Majority.protein.IDs, FUN = function(x)unlist(strsplit(as.character(x), split=";"))) %in% matching_table$Majority.protein.IDs)
master_table <- data.frame(
  UniProt = accessions,
  Majority.protein.IDs = sig_norm_MassSpecData$Majority.protein.IDs[MSD_to_MT],
  MPID.reviewed = as.character(matching_table$MPIDr),
  Protein.name = sig_norm_MassSpecData$Protein.names[MSD_to_MT],
  Gene.name = matching_table$gene_name,
  FC.PTex_0015.mean = sig_norm_MassSpecData$FC.PTex_0015.mean[MSD_to_MT],
  padj.PTex_0015 = sig_norm_MassSpecData$padj.PTex_0015[MSD_to_MT],
  FC.PTex_015.mean = sig_norm_MassSpecData$FC.PTex_015.mean[MSD_to_MT],
  padj.PTex_015 = sig_norm_MassSpecData$padj.PTex_015[MSD_to_MT],
  FC.PTex_15.mean = sig_norm_MassSpecData$FC.PTex_15.mean[MSD_to_MT],
  padj.PTex_15 = sig_norm_MassSpecData$padj.PTex_15[MSD_to_MT],
  Gerstberger.RBP = matching_table$Gerstberger.RBP,
  Gerstberger.TF = matching_table$Gerstberger.TF,
  Landthaler.mRBP = matching_table$Landthaler.mRBP,
  Preiss = matching_table$Preiss,
  Brannan.annotated = matching_table$Brannan.annotated,
  Brannan.predicted = matching_table$Brannan.predicted,
  Hubstenberger.PBP = matching_table$Hubstenberger.PBP,
  Rick.high = matching_table$Rick.high,
  Rick.low = matching_table$Rick.low,
  Rick.unique = matching_table$Rick.unique,
  Caric = matching_table$Caric,
  IPC_protein = IPDB_matching$IPC_protein,
  MolWeight = IPDB_matching$MolWeight,
  Hydrophobicity = IPDB_matching$Hydrophobicity
)
write.table(master_table, "master_table.tsv", sep = "\t", row.names = F)

8 Session Info

sessionInfo()
R version 3.5.1 (2018-07-02)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Arch Linux

Matrix products: default
BLAS/LAPACK: /usr/lib/libopenblas_haswellp-r0.3.3.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=en_US.UTF-8   
 [6] LC_MESSAGES=en_US.UTF-8    LC_PAPER=en_US.UTF-8       LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] DT_0.4         Peptides_2.4   GGally_1.4.0   ggplot2_3.0.0  reshape2_1.4.3 limma_3.36.3   dplyr_0.7.6   

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.0         later_0.7.4        RColorBrewer_1.1-2 pillar_1.3.0       compiler_3.5.1     plyr_1.8.4         bindr_0.1.1        base64enc_0.1-3   
 [9] tools_3.5.1        digest_0.6.17      jsonlite_1.5       evaluate_0.11      tibble_1.4.2       gtable_0.2.0       pkgconfig_2.0.2    rlang_0.2.2       
[17] shiny_1.1.0        rstudioapi_0.7     crosstalk_1.0.0    yaml_2.2.0         bindrcpp_0.2.2     withr_2.1.2        stringr_1.3.1      knitr_1.20        
[25] htmlwidgets_1.2    rprojroot_1.3-2    grid_3.5.1         tidyselect_0.2.4   reshape_0.8.7      glue_1.3.0         R6_2.3.0           rmarkdown_1.10    
[33] purrr_0.2.5        magrittr_1.5       promises_1.0.1     backports_1.1.2    scales_1.0.0       htmltools_0.3.6    assertthat_0.2.0   xtable_1.8-3      
[41] mime_0.5           colorspace_1.3-2   httpuv_1.4.5       labeling_0.3       stringi_1.2.4      lazyeval_0.2.1     munsell_0.5.0      crayon_1.3.4      
---
title: "PTex HEK293 MS Analysis"
output:
  html_notebook: 
    toc: yes
    number_sections: true
    smart: false
    md_extensions: +smart
---

# Introduction

This document guides through the analysis of the MS data for the PTex HEK293 project.
Inputdata have to be provided in the folder `inputdata` inside the working directory. These are:

    working directory/
      PTex.Rmd
      inputdata/
        proteinGroups.txt                   # MS data
        RBP table-Table 1.csv               # Gerstberger review (2014) RBPs
        human TFs-Table 1.csv               # Gerstberger review (2014) TFs
        2015-06-09_human_interactomes.csv   # Landthaler human mRNA IC
        preiss_genenames.csv                # Preiss
        Brannan_annotated.txt               # Brannan SONAR annotated RBPs
        Brannan_SONAR_0.79.txt              # Brannan SONAR predicted
        hubstenberger.csv                   # Hubstenberger P-Body Proteins
        RICK_high_conf.csv                  # RICK high confidence
        RICK_low_conf.csv                   # RICK low confidence
        RICK_unique.csv                     # RICK unique
        caric.csv                           # CARIC
        human_proteome_reviewed_290118.list # UniProt SwissProt
        UP000005640_9606_all.fasta.pI.csv   # Isoelectricpointdb

# Libraries 

The following libraries are needed for the analysis. Packages have to be installed prior to loading.

```{r}
library("dplyr")
library("limma")
library("reshape2")
library("ggplot2")
library("GGally")
library("Peptides")
library("DT")
```

To produce pairwise scatter plots with correlation information we use the `ggpairs()` function from the `GGally` package.
Custom theme for pairwise scatter plots:

```{r}
scatter_theme <- theme(legend.position = "none", 
      panel.grid.major = element_blank(), 
      axis.ticks = element_blank(), 
      panel.border = element_rect(linetype = "solid", colour = "black", fill = NA))

ggplot <- function(...) ggplot2::ggplot(...) + scale_colour_manual(values = c("black","red")) + scale_fill_manual(values = c("black","red"))
unlockBinding("ggplot",parent.env(asNamespace("GGally")))
assign("ggplot",ggplot,parent.env(asNamespace("GGally")))
```

# Data preparation

The primary MS data are contained in the file `proteinGroups.txt`.

```{r load primary MS data}
primaryMassSpecData <- read.table("inputdata/proteinGroups.txt", header = TRUE, sep = "\t")
```

To ease later analyses, character vectors of columnnames are created.

```{r colnames intensities}
intensities <- c("Intensity.Input_NoCl_I",
               "Intensity.Input_NoCl_II",
               "Intensity.Input_NoCl_III",
               "Intensity.Input_0015_I",
               "Intensity.Input_0015_II",
               "Intensity.Input_0015_III",
               "Intensity.Input_015_I",
               "Intensity.Input_015_II",
               "Intensity.Input_015_III",
               "Intensity.Input_15_I",
               "Intensity.Input_15_II",
               "Intensity.Input_15_III",
               "Intensity.PTex_NoCl_I",
               "Intensity.PTex_NoCl_II",
               "Intensity.PTex_NoCl_III",
               "Intensity.PTex_0015_I",
               "Intensity.PTex_0015_II",
               "Intensity.PTex_0015_III",
               "Intensity.PTex_015_I",
               "Intensity.PTex_015_II",
               "Intensity.PTex_015_III",
               "Intensity.PTex_15_I",
               "Intensity.PTex_15_II",
               "Intensity.PTex_15_III")
```

```{r colnames LFQ}
LFQ <- c("LFQ.intensity.Input_NoCl_I",
               "LFQ.intensity.Input_NoCl_II",
               "LFQ.intensity.Input_NoCl_III",
               "LFQ.intensity.Input_0015_I",
               "LFQ.intensity.Input_0015_II",
               "LFQ.intensity.Input_0015_III",
               "LFQ.intensity.Input_015_I",
               "LFQ.intensity.Input_015_II",
               "LFQ.intensity.Input_015_III",
               "LFQ.intensity.Input_15_I",
               "LFQ.intensity.Input_15_II",
               "LFQ.intensity.Input_15_III",
               "LFQ.intensity.PTex_NoCl_I",
               "LFQ.intensity.PTex_NoCl_II",
               "LFQ.intensity.PTex_NoCl_III",
               "LFQ.intensity.PTex_0015_I",
               "LFQ.intensity.PTex_0015_II",
               "LFQ.intensity.PTex_0015_III",
               "LFQ.intensity.PTex_015_I",
               "LFQ.intensity.PTex_015_II",
               "LFQ.intensity.PTex_015_III",
               "LFQ.intensity.PTex_15_I",
               "LFQ.intensity.PTex_15_II",
               "LFQ.intensity.PTex_15_III")
```

```{r colnames iBAQ}
IBAQ <- c("iBAQ.Input_NoCl_I",
               "iBAQ.Input_NoCl_II",
               "iBAQ.Input_NoCl_III",
               "iBAQ.Input_0015_I",
               "iBAQ.Input_0015_II",
               "iBAQ.Input_0015_III",
               "iBAQ.Input_015_I",
               "iBAQ.Input_015_II",
               "iBAQ.Input_015_III",
               "iBAQ.Input_15_I",
               "iBAQ.Input_15_II",
               "iBAQ.Input_15_III",
               "iBAQ.PTex_NoCl_I",
               "iBAQ.PTex_NoCl_II",
               "iBAQ.PTex_NoCl_III",
               "iBAQ.PTex_0015_I",
               "iBAQ.PTex_0015_II",
               "iBAQ.PTex_0015_III",
               "iBAQ.PTex_015_I",
               "iBAQ.PTex_015_II",
               "iBAQ.PTex_015_III",
               "iBAQ.PTex_15_I",
               "iBAQ.PTex_15_II",
               "iBAQ.PTex_15_III")
```

## First filter

In a first filtering step reverse and hits only identified by site are removed.

```{r first filter step}
MassSpecData <- primaryMassSpecData[primaryMassSpecData$Reverse != "+",]
MassSpecData <- MassSpecData[MassSpecData$Only.identified.by.site != "+",]
```

## Transformation

All intensites are log2 transformed.

```{r log2 transformation}
MassSpecData[c(intensities, IBAQ, LFQ)] <- log2(MassSpecData[c(intensities, IBAQ, LFQ)])
```

Infinity values that resulted from log2 transformation are converted to `NA`:

```{r conversion of infinity values}
is.na(MassSpecData[c(intensities,IBAQ,LFQ)]) <- sapply(MassSpecData[c(intensities,IBAQ,LFQ)], is.infinite)
```

## Trypsin normalisation factor

The LFQ intensities are normalised under the assumption that most intensities won't change. This is not true for PTex data, so that the normalisation has to be corrected. Correction is done by using trypsin (added to all samples in the same amount) to calculate a normalisation factor.

```{r calculation of trypsin normalisation factor}
trypsin = MassSpecData[MassSpecData$Majority.protein.IDs == "CON__P00761",]

trp_mean_Inp_NoCl <- mean(trypsin[,LFQ[1]],trypsin[,LFQ[2]],trypsin[,LFQ[3]])
trp_mean_Inp_0015 <- mean(trypsin[,LFQ[4]],trypsin[,LFQ[5]],trypsin[,LFQ[6]])
trp_mean_Inp_015 <- mean(trypsin[,LFQ[7]],trypsin[,LFQ[8]],trypsin[,LFQ[9]])
trp_mean_Inp_15 <- mean(trypsin[,LFQ[10]],trypsin[,LFQ[11]],trypsin[,LFQ[12]])

trp_mean_PTex_NoCl <- mean(trypsin[,LFQ[13]],trypsin[,LFQ[14]],trypsin[,LFQ[15]])
trp_mean_PTex_0015 <- mean(trypsin[,LFQ[16]],trypsin[,LFQ[17]],trypsin[,LFQ[18]])
trp_mean_PTex_015 <- mean(trypsin[,LFQ[19]],trypsin[,LFQ[20]],trypsin[,LFQ[21]])
trp_mean_PTex_15 <- mean(trypsin[,LFQ[22]],trypsin[,LFQ[23]],trypsin[,LFQ[24]]) 

nf_NoCl <- trp_mean_PTex_NoCl - trp_mean_Inp_NoCl
nf_0015Cl <- trp_mean_PTex_0015 - trp_mean_Inp_0015
nf_015Cl <- trp_mean_PTex_015 - trp_mean_Inp_015
nf_15Cl <- trp_mean_PTex_15 - trp_mean_Inp_15
```

Tidy up:

```{r}
rm(list = ls(pattern = "trp_mean_"))
rm(trypsin)
```


## Contaminant removal

After calculation of the normalisation factors the contaminants are removed from the data set.

```{r contaminant removal}
MassSpecData <- MassSpecData[MassSpecData$Potential.contaminant != "+",]
```

## Trypsin normalisation

The normalisation factors are applied by subtraction, since the data were already log transformed.

```{r trypsin normalisation}
norm_MassSpecData <- MassSpecData

norm_MassSpecData[LFQ[13:15]] <- norm_MassSpecData[LFQ[13:15]] - nf_NoCl
norm_MassSpecData[LFQ[16:18]] <- norm_MassSpecData[LFQ[16:18]] - nf_0015Cl
norm_MassSpecData[LFQ[19:21]] <- norm_MassSpecData[LFQ[19:21]] - nf_015Cl
norm_MassSpecData[LFQ[22:24]] <- norm_MassSpecData[LFQ[22:24]] - nf_15Cl
```

Tidy up:

```{r}
rm(list = ls(pattern = "nf_"))
```


## Incomplete observation removal

We only want to consider those proteins, that were found in all replicates of all experiments.

```{r removal of incomplete observations}
for(exp in c("PTex_noCL","PTex_0015","PTex_015","PTex_15", "Input_noCL","Input_0015","Input_015","Input_15")){
  
  sub_MassSpecData <- select(norm_MassSpecData, matches(paste("LFQ.intensity", exp, sep = ".")))
  x <- apply(sub_MassSpecData, 1, function(x)sum(!is.na(x))>2)
  norm_MassSpecData <- norm_MassSpecData[x,]
}
```

Tidy up:

```{r}
rm(list = ls(pattern = "sub_"))
rm(exp)
rm(x)
```


## Plots

### Intensities Scatterplots

```{r}
## Input noCL
input_nocl_scatter <- ggpairs(norm_MassSpecData[,LFQ[1:3]], 
        lower = list(continuous = wrap("cor", size = 7)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag")) + scatter_theme


## Input 0.015
input_0015_scatter <- ggpairs(norm_MassSpecData[,LFQ[4:6]], 
        lower = list(continuous = wrap("cor", size = 7)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag")) + scatter_theme


## Input 0.15
input_015_scatter <- ggpairs(norm_MassSpecData[,LFQ[7:9]], 
        lower = list(continuous = wrap("cor", size = 7)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag"))  + scatter_theme


## Input 1.5
input_15_scatter <- ggpairs(norm_MassSpecData[,LFQ[10:12]], 
        lower = list(continuous = wrap("cor", size = 7)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag"))  + scatter_theme


## PTex noCL
ptex_nocl_scatter <- ggpairs(norm_MassSpecData[,LFQ[13:15]], 
        lower = list(continuous = wrap("cor", size = 7)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag"))  + scatter_theme

## PTex 0.015
ptex_0015_scatter <- ggpairs(norm_MassSpecData[,LFQ[16:18]], 
        lower = list(continuous = wrap("cor", size = 7)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag"))  + scatter_theme


## PTex 0.15
ptex_015_scatter <- ggpairs(norm_MassSpecData[,LFQ[19:21]], 
        lower = list(continuous = wrap("cor", size = 7)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag"))  + scatter_theme


## PTex 1.5
ptex_15_scatter <- ggpairs(norm_MassSpecData[,LFQ[22:24]],
        lower = list(continuous = wrap("cor", size = 7)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag"))  + scatter_theme

```

```{r, fig.width=12, fig.height=7}
input_nocl_scatter
input_0015_scatter
input_015_scatter
input_15_scatter

ptex_nocl_scatter
ptex_0015_scatter
ptex_015_scatter
ptex_15_scatter
```

Tidy up:

```{r}
rm(list = ls(pattern = "_scatter"))
```


# Enrichment analysis

After preparation the fold changes between respective Input/PTex pairs can be calculated and a moderated t-test with following Benjamini-Hochberg correction is used to determine the false discovery rate (FDR).

## Fold Changes

The fold changes are calculated by subtracting the log-transformed LFQ intensity values of the non-crosslinked (-CL) from the crosslinked (+CL) samples.

```{r calculation of individual fold changes}
norm_MassSpecData$FC.Input_0015.rep1 <- (norm_MassSpecData$LFQ.intensity.Input_0015_I - norm_MassSpecData$LFQ.intensity.Input_NoCl_I)
norm_MassSpecData$FC.Input_0015.rep2 <- (norm_MassSpecData$LFQ.intensity.Input_0015_II - norm_MassSpecData$LFQ.intensity.Input_NoCl_II)
norm_MassSpecData$FC.Input_0015.rep3 <- (norm_MassSpecData$LFQ.intensity.Input_0015_III - norm_MassSpecData$LFQ.intensity.Input_NoCl_III)

norm_MassSpecData$FC.Input_015.rep1 <- (norm_MassSpecData$LFQ.intensity.Input_015_I - norm_MassSpecData$LFQ.intensity.Input_NoCl_I)
norm_MassSpecData$FC.Input_015.rep2 <- (norm_MassSpecData$LFQ.intensity.Input_015_II - norm_MassSpecData$LFQ.intensity.Input_NoCl_II)
norm_MassSpecData$FC.Input_015.rep3 <- (norm_MassSpecData$LFQ.intensity.Input_015_III - norm_MassSpecData$LFQ.intensity.Input_NoCl_III)

norm_MassSpecData$FC.Input_15.rep1 <- (norm_MassSpecData$LFQ.intensity.Input_15_I - norm_MassSpecData$LFQ.intensity.Input_NoCl_I)
norm_MassSpecData$FC.Input_15.rep2 <- (norm_MassSpecData$LFQ.intensity.Input_15_II - norm_MassSpecData$LFQ.intensity.Input_NoCl_II)
norm_MassSpecData$FC.Input_15.rep3 <- (norm_MassSpecData$LFQ.intensity.Input_15_III - norm_MassSpecData$LFQ.intensity.Input_NoCl_III)

norm_MassSpecData$FC.PTex_0015.rep1 <- (norm_MassSpecData$LFQ.intensity.PTex_0015_I - norm_MassSpecData$LFQ.intensity.PTex_NoCl_I)
norm_MassSpecData$FC.PTex_0015.rep2 <- (norm_MassSpecData$LFQ.intensity.PTex_0015_II - norm_MassSpecData$LFQ.intensity.PTex_NoCl_II)
norm_MassSpecData$FC.PTex_0015.rep3 <- (norm_MassSpecData$LFQ.intensity.PTex_0015_III - norm_MassSpecData$LFQ.intensity.PTex_NoCl_III)

norm_MassSpecData$FC.PTex_015.rep1 <- (norm_MassSpecData$LFQ.intensity.PTex_015_I - norm_MassSpecData$LFQ.intensity.PTex_NoCl_I)
norm_MassSpecData$FC.PTex_015.rep2 <- (norm_MassSpecData$LFQ.intensity.PTex_015_II - norm_MassSpecData$LFQ.intensity.PTex_NoCl_II)
norm_MassSpecData$FC.PTex_015.rep3 <- (norm_MassSpecData$LFQ.intensity.PTex_015_III - norm_MassSpecData$LFQ.intensity.PTex_NoCl_III)

norm_MassSpecData$FC.PTex_15.rep1 <- (norm_MassSpecData$LFQ.intensity.PTex_15_I - norm_MassSpecData$LFQ.intensity.PTex_NoCl_I)
norm_MassSpecData$FC.PTex_15.rep2 <- (norm_MassSpecData$LFQ.intensity.PTex_15_II - norm_MassSpecData$LFQ.intensity.PTex_NoCl_II)
norm_MassSpecData$FC.PTex_15.rep3 <- (norm_MassSpecData$LFQ.intensity.PTex_15_III - norm_MassSpecData$LFQ.intensity.PTex_NoCl_III)


```

Mean fold changes are calculated.

```{r calculation of mean fold changes}
norm_MassSpecData$FC.Input_0015.mean <- rowMeans(norm_MassSpecData[,251:253])
norm_MassSpecData$FC.Input_015.mean <- rowMeans(norm_MassSpecData[,254:256])
norm_MassSpecData$FC.Input_15.mean <- rowMeans(norm_MassSpecData[,257:259])
norm_MassSpecData$FC.PTex_0015.mean <- rowMeans(norm_MassSpecData[,260:262])
norm_MassSpecData$FC.PTex_015.mean <- rowMeans(norm_MassSpecData[,263:265])
norm_MassSpecData$FC.PTex_15.mean <- rowMeans(norm_MassSpecData[,266:268])
```

## Moderated t-test and Benjamini-Hochberg correction

A moderated t-test including Benjamini-Hochberg p-value correction is layed out on the fold changes.

```{r mod t-test and BH correction}
pval =  eBayes(lmFit(norm_MassSpecData[,grep("FC.Input_0015.rep",names(norm_MassSpecData))]))
norm_MassSpecData$pval.Input_0015 <- pval$p.value
norm_MassSpecData$padj.Input_0015 <-  p.adjust(norm_MassSpecData$pval.Input_0015, method="BH")

pval =  eBayes(lmFit(norm_MassSpecData[,grep("FC.Input_015.rep",names(norm_MassSpecData))]))
norm_MassSpecData$pval.Input_015 <- pval$p.value
norm_MassSpecData$padj.Input_015 <-  p.adjust(norm_MassSpecData$pval.Input_015, method="BH")

pval =  eBayes(lmFit(norm_MassSpecData[,grep("FC.Input_15.rep",names(norm_MassSpecData))]))
norm_MassSpecData$pval.Input_15 <- pval$p.value
norm_MassSpecData$padj.Input_15 <-  p.adjust(norm_MassSpecData$pval.Input_15, method="BH")

pval = eBayes(lmFit(norm_MassSpecData[,grep("FC.PTex_0015.rep",names(norm_MassSpecData))]))
norm_MassSpecData$pval.PTex_0015 <- pval$p.value
norm_MassSpecData$padj.PTex_0015 <- p.adjust(norm_MassSpecData$pval.PTex_0015, method="BH")

pval = eBayes(lmFit(norm_MassSpecData[,grep("FC.PTex_015.rep",names(norm_MassSpecData))]))
norm_MassSpecData$pval.PTex_015 <- pval$p.value
norm_MassSpecData$padj.PTex_015 <- p.adjust(norm_MassSpecData$pval.PTex_015, method="BH")

pval = eBayes(lmFit(norm_MassSpecData[,grep("FC.PTex_15.rep",names(norm_MassSpecData))]))
norm_MassSpecData$pval.PTex_15 <- pval$p.value
norm_MassSpecData$padj.PTex_15 <- p.adjust(norm_MassSpecData$pval.PTex_15, method="BH")
```

Tidy up:

```{r}
rm(pval)
```


## Mean intensities

Mean intensities (LFQ and iBAQ) are calculated.

```{r}
### LFQ
norm_MassSpecData$LFQ.intensity.Input_noCl_mean <- rowMeans(norm_MassSpecData[,c("LFQ.intensity.Input_NoCl_I", "LFQ.intensity.Input_NoCl_II", "LFQ.intensity.Input_NoCl_III")], na.rm = FALSE)
norm_MassSpecData$LFQ.intensity.Input_0015_mean <- rowMeans(norm_MassSpecData[,c("LFQ.intensity.Input_0015_I", "LFQ.intensity.Input_0015_II", "LFQ.intensity.Input_0015_III")], na.rm = FALSE)
norm_MassSpecData$LFQ.intensity.Input_015_mean <- rowMeans(norm_MassSpecData[,c("LFQ.intensity.Input_015_I", "LFQ.intensity.Input_015_II", "LFQ.intensity.Input_015_III")], na.rm = FALSE)
norm_MassSpecData$LFQ.intensity.Input_15_mean <- rowMeans(norm_MassSpecData[,c("LFQ.intensity.Input_15_I", "LFQ.intensity.Input_15_II", "LFQ.intensity.Input_15_III")], na.rm = FALSE)
norm_MassSpecData$LFQ.intensity.PTex_noCl_mean <- rowMeans(norm_MassSpecData[,c("LFQ.intensity.PTex_NoCl_I", "LFQ.intensity.PTex_NoCl_II", "LFQ.intensity.PTex_NoCl_III")], na.rm = FALSE)
norm_MassSpecData$LFQ.intensity.PTex_0015_mean <- rowMeans(norm_MassSpecData[,c("LFQ.intensity.PTex_0015_I", "LFQ.intensity.PTex_0015_II", "LFQ.intensity.PTex_0015_III")], na.rm = FALSE)
norm_MassSpecData$LFQ.intensity.PTex_015_mean <- rowMeans(norm_MassSpecData[,c("LFQ.intensity.PTex_015_I", "LFQ.intensity.PTex_015_II", "LFQ.intensity.PTex_015_III")], na.rm = FALSE)
norm_MassSpecData$LFQ.intensity.PTex_15_mean <- rowMeans(norm_MassSpecData[,c("LFQ.intensity.PTex_15_I", "LFQ.intensity.PTex_15_II", "LFQ.intensity.PTex_15_III")], na.rm = FALSE)

### iBAQ
norm_MassSpecData$iBAQ.Input_noCl_mean <- rowMeans(norm_MassSpecData[,c("iBAQ.Input_NoCl_I", "iBAQ.Input_NoCl_II", "iBAQ.Input_NoCl_III")], na.rm = FALSE)
norm_MassSpecData$iBAQ.Input_0015_mean <- rowMeans(norm_MassSpecData[,c("iBAQ.Input_0015_I", "iBAQ.Input_0015_II", "iBAQ.Input_0015_III")], na.rm = FALSE)
norm_MassSpecData$iBAQ.Input_015_mean <- rowMeans(norm_MassSpecData[,c("iBAQ.Input_015_I", "iBAQ.Input_015_II", "iBAQ.Input_015_III")], na.rm = FALSE)
norm_MassSpecData$iBAQ.Input_15_mean <- rowMeans(norm_MassSpecData[,c("iBAQ.Input_15_I", "iBAQ.Input_15_II", "iBAQ.Input_15_III")], na.rm = FALSE)
norm_MassSpecData$iBAQ.PTex_noCl_mean <- rowMeans(norm_MassSpecData[,c("iBAQ.PTex_NoCl_I", "iBAQ.PTex_NoCl_II", "iBAQ.PTex_NoCl_III")], na.rm = FALSE)
norm_MassSpecData$iBAQ.PTex_0015_mean <- rowMeans(norm_MassSpecData[,c("iBAQ.PTex_0015_I", "iBAQ.PTex_0015_II", "iBAQ.PTex_0015_III")], na.rm = FALSE)
norm_MassSpecData$iBAQ.PTex_015_mean <- rowMeans(norm_MassSpecData[,c("iBAQ.PTex_015_I", "iBAQ.PTex_015_II", "iBAQ.PTex_015_III")], na.rm = FALSE)
norm_MassSpecData$iBAQ.PTex_15_mean <- rowMeans(norm_MassSpecData[,c("iBAQ.PTex_15_I", "iBAQ.PTex_15_II", "iBAQ.PTex_15_III")], na.rm = FALSE)
```


## FDR and FC filter subset

We want to additionally exclude proteins that are not significantly enriched in all 3 libraries and those, which are significantly depleted:

```{r}
sig_norm_MassSpecData <- norm_MassSpecData[norm_MassSpecData$padj.PTex_0015 <= 0.01 & norm_MassSpecData$padj.PTex_015 <= 0.01 & norm_MassSpecData$padj.PTex_15 <= 0.01,]
dim(sig_norm_MassSpecData)
```

```{r}
sig_norm_MassSpecData <- sig_norm_MassSpecData[sig_norm_MassSpecData$FC.PTex_0015.mean > 0 & sig_norm_MassSpecData$FC.PTex_015.mean > 0 & sig_norm_MassSpecData$FC.PTex_15.mean > 0,]
dim(sig_norm_MassSpecData)
```


## Plots

### Enrichments

```{r}
FC <- colnames(norm_MassSpecData)[251:268]

## Input 0.015
padj_co <- norm_MassSpecData$padj.Input_0015 <= 0.01
padj_co[which(padj_co)] <- "significant"
padj_co[which(padj_co==FALSE)] <- "non-significant"

fc_input_0015_scatter <- ggpairs(cbind(norm_MassSpecData[,FC[1:3]], padj_co),
        columns = 1:3,
        mapping = ggplot2::aes(colour = padj_co, alpha = 0.5),
        lower = list(continuous = wrap("cor", size = 5)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag")) + scatter_theme


## Input 0.15
padj_co <- norm_MassSpecData$padj.Input_015 <= 0.01
padj_co[which(padj_co)] <- "significant"
padj_co[which(padj_co==FALSE)] <- "non-significant"

fc_input_015_scatter <- ggpairs(cbind(norm_MassSpecData[,FC[4:6]], padj_co),
        columns = 1:3,
        mapping = ggplot2::aes(colour = padj_co, alpha = 0.5),
        lower = list(continuous = wrap("cor", size = 5)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag"))  + scatter_theme


## Input 1.5
padj_co <- norm_MassSpecData$padj.Input_15 <= 0.01
padj_co[which(padj_co)] <- "significant"
padj_co[which(padj_co==FALSE)] <- "non-significant"

fc_input_15_scatter <- ggpairs(cbind(norm_MassSpecData[,FC[7:9]], padj_co),
        columns = 1:3,
        mapping = ggplot2::aes(colour = padj_co, alpha = 0.5), 
        lower = list(continuous = wrap("cor", size = 5)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag"))  + scatter_theme


## PTex 0.015
padj_co <- norm_MassSpecData$padj.PTex_0015 <= 0.01
padj_co[which(padj_co)] <- "significant"
padj_co[which(padj_co==FALSE)] <- "non-significant"

fc_ptex_0015_scatter <- ggpairs(cbind(norm_MassSpecData[,FC[10:12]], padj_co),
        columns = 1:3,
        mapping = ggplot2::aes(colour = padj_co, alpha = 0.5), 
        lower = list(continuous = wrap("cor", size = 5)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag"))  + scatter_theme


## PTex 0.15
padj_co <- norm_MassSpecData$padj.PTex_015 <= 0.01
padj_co[which(padj_co)] <- "significant"
padj_co[which(padj_co==FALSE)] <- "non-significant"

fc_ptex_015_scatter <- ggpairs(cbind(norm_MassSpecData[,FC[13:15]], padj_co),
        columns = 1:3,
        mapping = ggplot2::aes(colour = padj_co, alpha = 0.5),
        lower = list(continuous = wrap("cor", size = 5)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag"))  + scatter_theme


## PTex 1.5
padj_co <- norm_MassSpecData$padj.PTex_15 <= 0.01
padj_co[which(padj_co)] <- "significant"
padj_co[which(padj_co==FALSE)] <- "non-significant"

fc_ptex_15_scatter <- ggpairs(cbind(norm_MassSpecData[,FC[16:18]], padj_co),
        columns = 1:3,
        mapping = ggplot2::aes(colour = padj_co, alpha = 0.5), 
        lower = list(continuous = wrap("cor", size = 5)), 
        upper = list(continuous = "smooth"),
        diag = list(continuous = "densityDiag"))  + scatter_theme

```

```{r, fig.width=12, fig.height=7}
fc_input_0015_scatter
fc_input_015_scatter
fc_input_15_scatter

fc_ptex_0015_scatter
fc_ptex_015_scatter
fc_ptex_15_scatter
```

### Volcano Plots

```{r}
## Input 0.015
padj_co <- norm_MassSpecData$padj.Input_0015 <= 0.01
padj_co[which(padj_co)] <- "significant"
padj_co[which(padj_co==FALSE)] <- "non-significant"

fc_input_0015_volcano <- ggplot(data=cbind(norm_MassSpecData, padj_co), aes(x=FC.Input_0015.mean, y=-log10(padj.Input_0015), colour = padj_co ) ) +
  geom_point(alpha=0.5, size=1.75) +
#  xlim(c(-6, 6)) + ylim(c(0, 10)) +
  xlab("log2 fold change") + ylab("-log10 adj. p-value")


## Input 0.15
padj_co <- norm_MassSpecData$padj.Input_015 <= 0.01
padj_co[which(padj_co)] <- "significant"
padj_co[which(padj_co==FALSE)] <- "non-significant"

fc_input_015_volcano <- ggplot(data=cbind(norm_MassSpecData, padj_co), aes(x=FC.Input_015.mean, y=-log10(padj.Input_015), colour = padj_co ) ) +
  geom_point(alpha=0.5, size=1.75) +
#  xlim(c(-6, 6)) + ylim(c(0, 10)) +
  xlab("log2 fold change") + ylab("-log10 adj. p-value")


## Input 1.5
padj_co <- norm_MassSpecData$padj.Input_15 <= 0.01
padj_co[which(padj_co)] <- "significant"
padj_co[which(padj_co==FALSE)] <- "non-significant"

fc_input_15_volcano <- ggplot(data=cbind(norm_MassSpecData, padj_co), aes(x=FC.Input_15.mean, y=-log10(padj.Input_15), colour = padj_co ) ) +
  geom_point(alpha=0.5, size=1.75) +
#  xlim(c(-6, 6)) + ylim(c(0, 10)) +
  xlab("log2 fold change") + ylab("-log10 adj. p-value")

## PTex 0.015
padj_co <- norm_MassSpecData$padj.PTex_0015 <= 0.01
padj_co[which(padj_co)] <- "significant"
padj_co[which(padj_co==FALSE)] <- "non-significant"

fc_ptex_0015_volcano <- ggplot(data=cbind(norm_MassSpecData, padj_co), aes(x=FC.PTex_0015.mean, y=-log10(padj.PTex_0015), colour = padj_co ) ) +
  geom_point(alpha=0.5, size=1.75) +
#  xlim(c(-6, 6)) + ylim(c(0, 10)) +
  xlab("log2 fold change") + ylab("-log10 adj. p-value")


## PTex 0.15
padj_co <- norm_MassSpecData$padj.PTex_015 <= 0.01
padj_co[which(padj_co)] <- "significant"
padj_co[which(padj_co==FALSE)] <- "non-significant"

fc_ptex_015_volcano <- ggplot(data=cbind(norm_MassSpecData, padj_co), aes(x=FC.PTex_015.mean, y=-log10(padj.PTex_015), colour = padj_co ) ) +
  geom_point(alpha=0.5, size=1.75) +
#  xlim(c(-6, 6)) + ylim(c(0, 10)) +
  xlab("log2 fold change") + ylab("-log10 adj. p-value")

## PTex 1.5
padj_co <- norm_MassSpecData$padj.PTex_15 <= 0.01
padj_co[which(padj_co)] <- "significant"
padj_co[which(padj_co==FALSE)] <- "non-significant"

fc_ptex_15_volcano <- ggplot(data=cbind(norm_MassSpecData, padj_co), aes(x=FC.PTex_15.mean, y=-log10(padj.PTex_15), colour = padj_co ) ) +
  geom_point(alpha=0.5, size=1.75) +
#  xlim(c(-6, 6)) + ylim(c(0, 10)) +
  xlab("log2 fold change") + ylab("-log10 adj. p-value")

```

```{r, fig.width=12, fig.height=7}
fc_input_0015_volcano
fc_input_015_volcano
fc_input_15_volcano

fc_ptex_0015_volcano
fc_ptex_015_volcano
fc_ptex_15_volcano
```

Tidy up:

```{r}
rm(list = ls(pattern = "_scatter"))
rm(list = ls(pattern = "_volcano"))
rm(padj_co)
```

# Matching

To assess performance of PTex to find RBPs, the significantly enriched proteins are compared to other studies.
The following studies are taken into account:
  - Gerstberger review 2014
  - Landthaler HEK293 interactome capture
  - Preiss
  - SONAR (annotated and predicted)
  - Hubstenberger P-Body proteins
  - RICK (high, low and unique)
  - CARIC

```{r data set loading}
## Gerstberger review (2014) RBPs

ge <- read.csv("inputdata/RBP table-Table 1.csv")

## Gerstberger review (2014) TFs

tf <- read.csv("inputdata/human TFs-Table 1.csv")

## Human mRNA interactome capture data:

ic <- read.csv("inputdata/2015-06-09_human_interactomes.csv",na.strings=c("", "NA") )

## Preiss

preiss_gn <- read.csv("inputdata/preiss_genenames.csv", header=T, sep="\t")

## annotated RBPs from SONAR paper (Brannan et al.)

sonar_all <- read.csv("inputdata/Brannan_annotated.txt", header = FALSE)

## Predicted SONAR proteins (with score > 0.79, see Brannan et al. Mol Cell 2016)

sonar_079 <- read.csv("inputdata/Brannan_SONAR_0.79.txt", header = FALSE)

## Hubstenberger P-Body Proteins

hubst <- read.csv("inputdata/hubstenberger.csv", header = TRUE, sep = "\t")

## RICK

rick_high <- read.csv("inputdata/RICK_high_conf.csv", sep = "\t", stringsAsFactors = F)
rick_low <- read.csv("inputdata/RICK_low_conf.csv", sep = "\t", stringsAsFactors = F)
rick_unique <- read.csv("inputdata/RICK_unique.csv", sep="\t", stringsAsFactors = F)

## CARIC

caric <- read.csv("inputdata/caric.csv", sep = "\t", stringsAsFactors = F)
```

## Matching table

A matching table is created to perform the matching and store the results. It contains the following identifier columns:
  - Majority.protein.IDs: the Majority.protein.IDs of the significantly enriched PTex hits
  - MPIDsNoIso: Majority.protein.IDs with isoforms resolved to canonical form
  - gene_name: the gene names of the significantly enriched PTex hits

```{r}
matching_table <- data.frame(Majority.protein.IDs = as.character(sig_norm_MassSpecData$Majority.protein.IDs))
matching_table$Majority.protein.IDs <- apply(X = matching_table, MARGIN = 1, FUN = function(x)unlist(strsplit(as.character(x), split=";")))
matching_table$MPIDsNoIso <- lapply(strsplit(gsub("-[0-9]{1,2}","",sig_norm_MassSpecData$Majority.protein.IDs),split=";"), function(x)unique(x))
matching_table$gene_name <- sig_norm_MassSpecData$Gene.names
```

The canonical MPIDs were matched with all reviewed human UniProt (SwissProt) entries.

```{r}
all_rev_uniprot <- read.table("inputdata/human_proteome_reviewed_291118.list", header=F, col.names = "uniprot")

MPIDsNoIso_rev_stat <- apply(matching_table, 1, function(x) unlist(x$MPIDsNoIso) %in% all_rev_uniprot$uniprot)
MPIDsNoIso.rev <- mapply(x=matching_table$MPIDsNoIso, y=MPIDsNoIso_rev_stat, function(x,y)unlist(x[unlist(y)]))
MPIDsNoIso.rev[unlist(lapply(MPIDsNoIso.rev, function(x)length(x)==0))] <- NA
sum(is.na(MPIDsNoIso.rev))

matching_table$MPIDr <- MPIDsNoIso.rev
```

For 16 majority protein ID groups no SwissProt entry could be found.

```{r}
datatable(matching_table[which(MPIDsNoIso.rev%in%"NA"),])
```

Those 16 hits were subjected to manual curation, meaning the accession numbers and gene names were used for a UniProt query. If an unambiguous hit was found it was corrected. The following were corrected:

```{r}
matching_table$MPIDr[74] <- "P43243"
matching_table$MPIDr[83] <- "O75122"
matching_table$MPIDr[93] <- "Q9UBX3"
matching_table$MPIDr[134] <- "O95819"
matching_table$MPIDr[226] <- "Q9UPN3"
matching_table$MPIDr[264] <- "Q96MU7"
matching_table$MPIDr[268] <- "P36776"
matching_table$MPIDr[674] <- "P0DMV8"
matching_table$MPIDr[2937] <- "P0DPB6"
matching_table$MPIDr[3034] <- "P17980"
matching_table$MPIDr[3035] <- "Q9UKV3"
```

After manual correction, the following were still not identifiable.

```{r}
datatable(matching_table[which(matching_table$MPIDr%in%"NA"),])
```

These hits were excluded from further analyses.

```{r}
matching_table <- matching_table[-c(162,169,178,896,3037),]
datatable(matching_table[which(matching_table$MPIDr%in%"NA"),])
```

WARNING:`235 Q13404;A5PLL7 UBE2V1;TMEM189  T;F` stays! Others might, too!

### Matching with other datasets

```{r}
## Gerstberger RBPs

matching_table$Gerstberger.RBP <- apply(matching_table,1,function(x) any(ge$gene.name %in% unlist(strsplit(as.character(x$gene_name),split=";"))))

## Gerstberger TFs (negative control)

matching_table$Gerstberger.TF <- apply(matching_table,1,function(x) any(tf$gene.name %in% unlist(strsplit(as.character(x$gene_name),split=";"))))

## Landthaler HEK293 interactome capture data

hekmRBP <- subset(ic, ic$HEK293 == "mRNA interactome")
matching_table$Landthaler.mRBP <- apply(matching_table, 1, function(x) any(hekmRBP$Uniprot.entry %in% unlist(x$MPIDr)))

## Preiss

matching_table$Preiss <- apply(matching_table, 1, function(x) any(preiss_gn$gene.name %in% unlist(strsplit(as.character(x$gene_name),split=";"))))

## Annotated RBPs (from the SONAR paper)

matching_table$Brannan.annotated <- apply(matching_table, 1, function(x) any(sonar_all$V1 %in% unlist(strsplit(as.character(x$gene_name),split=";"))))

## SONAR-predicted RBPs

matching_table$Brannan.predicted <- apply(matching_table, 1, function(x) any(sonar_079$V1 %in% unlist(strsplit(as.character(x$gene_name),split=";"))))

## Hubstenberger P-Body proteins

matching_table$Hubstenberger.PBP <- apply(matching_table, 1, function(x) any(hubst$gene.name %in% unlist(strsplit(as.character(x$gene_name),split=";"))))

## RICK data

matching_table$Rick.high <- apply(matching_table, 1, function(x) any(rick_high$Protein_ID %in% unlist(x$MPIDr)))
matching_table$Rick.low <- apply(matching_table, 1, function(x) any(rick_low$Protein_ID %in% unlist(x$MPIDr)))
matching_table$Rick.unique <- apply(matching_table, 1, function(x) any(rick_unique$Protein_ID %in% unlist(x$MPIDr)))

## CARIC RBPs

matching_table$Caric <- apply(matching_table, 1, function(x) any(caric$UniProt.accession %in% unlist(x$MPIDr)))
```

Note that matching via stringsplitted gene names can produce multiple hits in reference for a single PTex hit.

## Matching df

```{r}
# inPTex, not inPTex

matching_df <- data.frame(
  match = c("yes", "no"),
  Gerstberger.RBPs = c(round(sum(matching_table$Gerstberger.RBP)/nrow(ge),4), round((nrow(ge) - sum(matching_table$Gerstberger.RBP))/nrow(ge),4)),
  Gerstberger.TFs =c(round(sum(matching_table$Gerstberger.TF)/nrow(tf),4), round((nrow(tf) - sum(matching_table$Gerstberger.TF))/nrow(tf),4)),
  Landthaler = c(round(sum(matching_table$Landthaler.mRBP)/nrow(hekmRBP),4), round((nrow(hekmRBP) - sum(matching_table$Landthaler.mRBP))/nrow(hekmRBP),4)),
  Preiss = c(round(sum(matching_table$Preiss)/nrow(preiss_gn),4), round((nrow(preiss_gn)-sum(matching_table$Preiss))/nrow(preiss_gn),4)),
  Brannan.annotated = c(round(sum(matching_table$Brannan.annotated)/nrow(sonar_all),4), round((nrow(sonar_all) - sum(matching_table$Brannan.annotated))/nrow(sonar_all),4)),
  Brannan.predicted = c(round(sum(matching_table$Brannan.predicted)/nrow(sonar_079),4), round((nrow(sonar_079) - sum(matching_table$Brannan.predicted))/nrow(sonar_079),4)),
  Hubstenberger.PBP = c(round(sum(matching_table$Hubstenberger.PBP)/nrow(hubst),4), round((nrow(hubst) - sum(matching_table$Hubstenberger.PBP))/nrow(hubst),4)),
  Rick.high = c(round(sum(matching_table$Rick.high)/nrow(rick_high),4), round((nrow(rick_high) - sum(matching_table$Rick.high))/nrow(rick_high),4)),
  Rick.low = c(round(sum(matching_table$Rick.low)/nrow(rick_low),4), round((nrow(rick_low) - sum(matching_table$Rick.low))/nrow(rick_low),4)),
  Rick.unique = c(round(sum(matching_table$Rick.unique)/nrow(rick_unique),4), round((nrow(rick_unique) - sum(matching_table$Rick.unique))/nrow(rick_unique),4)),
  Caric = c(round(sum(matching_table$Caric)/nrow(caric),4), round((nrow(caric) - sum(matching_table$Caric))/nrow(caric),4))
)

```

```{r}
ggplot(melt(matching_df, id.vars = "match"), aes(x=variable, y=value, fill=match)) +
  geom_bar(stat = "identity", alpha=.6) +
  ylab("fraction") + xlab("") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
```

# Annotations

The significantly enriched hits are annotated with
- isoelectric point
- molecular weight
- hydrophobicity
- (RBD)
- (GO)

For that a list of UniProt accession numbers is needed. For that the `MPIDr` of the matching table is used, while for entries with multiple accession numbers, only the first is used.

```{r}
accessions <- unlist(lapply(X=matching_table$MPIDr, FUN = function(x) strsplit(as.character(x), split = ";")[[1]][1]))

write.table(accessions, "accessions.tsv", row.names = F, col.names = F)
```

## Isoelectric Point, Molecular Weight, Sequences and hydrophobicity

For comparison of the MS data with the human proteome a dataset from `isoelectricpointdb.org` was retrieved (including isoelectric points, molecular weights and sequences) (as of 29.11.18):
```{r}
isoelectricpointdb <- read.csv("inputdata/UP000005640_9606_all.fasta.pI.csv", header=TRUE, sep=",")
```

```{r}
IPDB_matching <- data.frame(
  UniProt = accessions,
  Sequence = rep(NA,length(accessions)),
  MolWeight = rep(NA,length(accessions)),
  IPC_protein = rep(NA,length(accessions)),
  Hydrophobicity = rep(NA,length(accessions))
)
```

```{r}
for(i in seq(nrow(IPDB_matching))){
  try(IPDB_matching$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",IPDB_matching$UniProt[i],"|", sep=""), x = isoelectricpointdb$header, fixed = TRUE)]))
  try(IPDB_matching$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",IPDB_matching$UniProt[i],"|", sep=""), x = isoelectricpointdb$header, fixed = TRUE)])
  try(IPDB_matching$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",IPDB_matching$UniProt[i],"|", sep=""), x = isoelectricpointdb$header, fixed = TRUE)])
}
```

Not found: P0DPB6 which is >sp|Q9Y2S0|RPAC2_HUMAN DNA-directed RNA polymerases I and III subunit RPAC2 OS=Homo sapiens GN=POLR1D PE=1 SV=1

```{r}
for(i in seq(nrow(IPDB_matching))){
  try(IPDB_matching$Hydrophobicity[i] <- aaComp(IPDB_matching$Sequence[i])[[1]]["NonPolar","Mole%"])
}
```

`all_rev_uniprot` were matched with the isoelectricpointdb. (Note that the following chunk runs very long, because of the low speed of for loops)

```{r}
IPDB_matching_proteome <- data.frame(
  UniProt = as.character(all_rev_uniprot$uniprot),
  Sequence = rep(NA,length(all_rev_uniprot)),
  MolWeight = rep(NA,length(all_rev_uniprot)),
  IPC_protein = rep(NA,length(all_rev_uniprot)),
  Hydrophobicity = rep(NA,length(all_rev_uniprot))
)

for(i in seq(nrow(IPDB_matching_proteome))){
  try(IPDB_matching_proteome$Sequence[i] <- as.character(isoelectricpointdb$sequence[grep(pattern = paste("|",IPDB_matching_proteome$UniProt[i],"|", sep=""), x = isoelectricpointdb$header, fixed = TRUE)]))
  try(IPDB_matching_proteome$MolWeight[i] <- isoelectricpointdb$molecular_weight[grep(pattern = paste("|",IPDB_matching_proteome$UniProt[i],"|", sep=""), x = isoelectricpointdb$header, fixed = TRUE)])
  try(IPDB_matching_proteome$IPC_protein[i] <- isoelectricpointdb$IPC_protein[grep(pattern = paste("|",IPDB_matching_proteome$UniProt[i],"|", sep=""), x = isoelectricpointdb$header, fixed = TRUE)])
}

for(i in seq(nrow(IPDB_matching_proteome))){
  try(IPDB_matching_proteome$Hydrophobicity[i] <- aaComp(IPDB_matching_proteome$Sequence[i])[[1]]["NonPolar","Mole%"])
}
```

Some of the accession numbers were not included in the isoelectricpointdb.

```{r}
sum(sapply(IPDB_matching_proteome$Sequence, function(x)is.na(x)))
sum(sapply(IPDB_matching_proteome$MolWeight, function(x)is.na(x)))
sum(sapply(IPDB_matching_proteome$IPC_protein, function(x)is.na(x)))
sum(sapply(IPDB_matching_proteome$Hydrophobicity, function(x)is.nan(x)))
```

### Plots

#### Molecular Weight

```{r}
mw_distribution <- data.frame(
  MolWeight = c(IPDB_matching$MolWeight, IPDB_matching_proteome$MolWeight),
  set = c(rep("PTex", length(IPDB_matching$MolWeight)), rep("Proteome", length(IPDB_matching_proteome$MolWeight)))
)

ggplot(mw_distribution, aes(x = set, y = MolWeight)) +
  geom_boxplot() + 
  scale_y_log10() +
  xlab("") +
  ylab("Molecular Weight")

pdf("MolWeight_boxplot.pdf", width = 10, height = 7)
last_plot()
dev.off()
```

#### Isoelectric Point

##### Boxplot

```{r}
pi_distribution <- data.frame(
  IPC_protein = c(IPDB_matching$IPC_protein, IPDB_matching_proteome$IPC_protein),
  set = c(rep("PTex", length(IPDB_matching$IPC_protein)), rep("Proteome", length(IPDB_matching_proteome$IPC_protein)))
)

ggplot(pi_distribution, aes(x = set, y = IPC_protein)) +
  geom_boxplot() + 
  xlab("") +
  ylab("IPC_protein")

pdf("IPC_protein_boxplot.pdf", width = 10, height = 7)
last_plot()
dev.off()
```

##### Density Plot

A density plot of the IPC_protein distributions comparing PTex, Landthaler, Rick unique, Rick high and proteome is prepared.

```{r}
rick_high_ip <- data.frame(
  IPC_protein = IPDB_matching_proteome$IPC_protein[IPDB_matching_proteome$UniProt %in% rick_high$Protein_ID],
  set = rep("Rick.high", nrow(rick_high))
)

rick_unique_ip <- data.frame(
  IPC_protein = IPDB_matching_proteome$IPC_protein[IPDB_matching_proteome$UniProt %in% rick_unique$Protein_ID],
  set = rep("Rick.unique", nrow(rick_unique))
)

# 6 accessions of the Landthaler data are not included in the isoelectricpointdb and will be removed

landthaler_ip <- data.frame(
  IPC_protein = IPDB_matching_proteome$IPC_protein[IPDB_matching_proteome$UniProt %in% hekmRBP$Uniprot.entry[-which(hekmRBP$Uniprot.entry %in% IPDB_matching_proteome$UniProt==FALSE)]],
  set = rep("Landthaler.mRBP", length(which(hekmRBP$Uniprot.entry %in% IPDB_matching_proteome$UniProt==TRUE)))
)
ip_density_distribution <- data.frame(
  IPC_protein = c(IPDB_matching$IPC_protein, landthaler_ip$IPC_protein, rick_unique_ip$IPC_protein, rick_high_ip$IPC_protein, IPDB_matching_proteome$IPC_protein),
  set = c(rep("PTex", nrow(IPDB_matching)), as.character(landthaler_ip$set), as.character(rick_unique_ip$set), as.character(rick_high_ip$set), rep("Proteome", nrow(IPDB_matching_proteome)))
)

ggplot2::ggplot(ip_density_distribution, aes(x = IPC_protein, colour = set)) +
  geom_density()

pdf("IPC_protein_density.pdf", width = 10, height = 7)
last_plot()
dev.off()
```


#### Hydrophobicity

```{r}
hydrophobicity_distribution <- data.frame(
  Hydrophobicity = c(IPDB_matching$Hydrophobicity, IPDB_matching_proteome$Hydrophobicity),
  set = c(rep("PTex", length(IPDB_matching$Hydrophobicity)), rep("Proteome", length(IPDB_matching_proteome$Hydrophobicity)))
)

ggplot(hydrophobicity_distribution, aes(x = set, y = Hydrophobicity)) +
  geom_boxplot() + 
  xlab("") +
  ylab("Hydrophobicity")

pdf("Hydrophobicity_boxplot.pdf", width = 10, height = 7)
last_plot()
dev.off()
```

## RBD

The RBD domain annotation is performed using DAVID online tool. Since it only accepts lists with maximum 3000 entries 10 randomly sampled lists were created.

```{r}
set.seed(42)
accessions_S1 <- sample(accessions, 3000)
accessions_S2 <- sample(accessions, 3000)
accessions_S3 <- sample(accessions, 3000)
accessions_S4 <- sample(accessions, 3000)
accessions_S5 <- sample(accessions, 3000)
accessions_S6 <- sample(accessions, 3000)
accessions_S7 <- sample(accessions, 3000)
accessions_S8 <- sample(accessions, 3000)
accessions_S9 <- sample(accessions, 3000)
accessions_S10 <- sample(accessions, 3000)

write.table(accessions_S1, "accessions_S1.tsv", row.names = F, col.names = F)
write.table(accessions_S2, "accessions_S2.tsv", row.names = F, col.names = F)
write.table(accessions_S3, "accessions_S3.tsv", row.names = F, col.names = F)
write.table(accessions_S4, "accessions_S4.tsv", row.names = F, col.names = F)
write.table(accessions_S5, "accessions_S5.tsv", row.names = F, col.names = F)
write.table(accessions_S6, "accessions_S6.tsv", row.names = F, col.names = F)
write.table(accessions_S7, "accessions_S7.tsv", row.names = F, col.names = F)
write.table(accessions_S8, "accessions_S8.tsv", row.names = F, col.names = F)
write.table(accessions_S9, "accessions_S9.tsv", row.names = F, col.names = F)
write.table(accessions_S10, "accessions_S10.tsv", row.names = F, col.names = F)
```


## GO

GO term analyses was performed by the PANTHER online tool using the `accessions` table.

# Master Table

A master table containing identifiers, enrichments, matches and annotations is created.

```{r}
# create vector holding rows of sig_norm_MassSpecData that are in matching_table
MSD_to_MT <- which(sapply(X = sig_norm_MassSpecData$Majority.protein.IDs, FUN = function(x)unlist(strsplit(as.character(x), split=";"))) %in% matching_table$Majority.protein.IDs)

master_table <- data.frame(
  UniProt = accessions,
  Majority.protein.IDs = sig_norm_MassSpecData$Majority.protein.IDs[MSD_to_MT],
  MPID.reviewed = as.character(matching_table$MPIDr),
  Protein.name = sig_norm_MassSpecData$Protein.names[MSD_to_MT],
  Gene.name = matching_table$gene_name,
  FC.PTex_0015.mean = sig_norm_MassSpecData$FC.PTex_0015.mean[MSD_to_MT],
  padj.PTex_0015 = sig_norm_MassSpecData$padj.PTex_0015[MSD_to_MT],
  FC.PTex_015.mean = sig_norm_MassSpecData$FC.PTex_015.mean[MSD_to_MT],
  padj.PTex_015 = sig_norm_MassSpecData$padj.PTex_015[MSD_to_MT],
  FC.PTex_15.mean = sig_norm_MassSpecData$FC.PTex_15.mean[MSD_to_MT],
  padj.PTex_15 = sig_norm_MassSpecData$padj.PTex_15[MSD_to_MT],
  Gerstberger.RBP = matching_table$Gerstberger.RBP,
  Gerstberger.TF = matching_table$Gerstberger.TF,
  Landthaler.mRBP = matching_table$Landthaler.mRBP,
  Preiss = matching_table$Preiss,
  Brannan.annotated = matching_table$Brannan.annotated,
  Brannan.predicted = matching_table$Brannan.predicted,
  Hubstenberger.PBP = matching_table$Hubstenberger.PBP,
  Rick.high = matching_table$Rick.high,
  Rick.low = matching_table$Rick.low,
  Rick.unique = matching_table$Rick.unique,
  Caric = matching_table$Caric,
  IPC_protein = IPDB_matching$IPC_protein,
  MolWeight = IPDB_matching$MolWeight,
  Hydrophobicity = IPDB_matching$Hydrophobicity
)

write.table(master_table, "master_table.tsv", sep = "\t", row.names = F)
```


# Session Info

```{r}
sessionInfo()
```