## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set(collapse=TRUE, comment = "#>") ## ----install, echo=TRUE, eval=FALSE------------------------------------------- # ## if you need to install the packages 'wrMisc','wrProteo' and 'wrGraph' from CRAN : # install.packages("wrMisc") # install.packages("wrProteo") # ## The package 'wrGraph' is not obligatory, but it allows making better graphs # install.packages("wrGraph") # # ## Installation of limma from Bioconductor # if(!requireNamespace("BiocManager", quietly=TRUE)) install.packages("BiocManager") # BiocManager::install("limma") ## ----setup, echo=FALSE, messages=FALSE, warnings=FALSE------------------------ suppressPackageStartupMessages({ library(wrMisc) library(wrProteo) library(wrGraph) library(knitr) library(rmarkdown) }) ## ----setup2------------------------------------------------------------------- ## Let's assume this is a fresh R-session ## Get started by loading the packages library("knitr") library("wrMisc") library("wrProteo") library("wrGraph") # This is wrProteo version no : packageVersion("wrProteo") ## ----Vigenttes1, echo=TRUE, eval=FALSE---------------------------------------- # browseVignettes("wrProteo") ## ----ChemFormMolMass1, echo=TRUE---------------------------------------------- massDeFormula(c("12H12O", "HO", " 2H 1 Se, 6C 2N", "HSeCN", " ", "e")) # Ignore empty/invalid entries massDeFormula(c("12H12O", "HO", " 2H 1 Se, 6C 2N", "HSeCN"), rmEmpty=TRUE) ## ----ChemFormMolMass2, echo=TRUE---------------------------------------------- massDeFormula(c("12H12O", "HO", " 2H 1 Se, 6C 2N", "HSeCN"), massTy="aver") ## ----AAseqMolMass, echo=TRUE-------------------------------------------------- AAmass() ## ----AAseqMolMass2, echo=TRUE------------------------------------------------- ## mass of peptide (or protein) pep1 <- c(aa="AAAA",de="DEFDEF") convAASeq2mass(pep1, seqN=FALSE) ## ----readFasta, echo=TRUE----------------------------------------------------- path1 <- system.file('extdata', package='wrProteo') fiNa <- "conta1.fasta.gz" ## basic reading of Fasta fasta1 <- readFasta2(file.path(path1, fiNa)) str(fasta1) ## now let's read and further separate details in annotation-fields fasta1b <- readFasta2(file.path(path1, fiNa), tableOut=TRUE) str(fasta1b) ## ----treatFasta2, echo=TRUE--------------------------------------------------- dupEntry <- duplicated(fasta1) table(dupEntry) ## ----treatFasta3, echo=TRUE--------------------------------------------------- fasta3 <- fasta1[which(!dupEntry)] length(fasta3) ## ----writeFasta1, echo=TRUE, eval=FALSE--------------------------------------- # writeFasta2(fasta3, fileNa="testWrite.fasta") ## ----readMaxQuant1, fig.height=8, fig.width=9.5, fig.align="center", echo=TRUE---- path1 <- system.file("extdata", package="wrProteo") dataMQ <- readMaxQuantFile(path1, specPref=NULL, normalizeMeth="median") ## number of lines and columns of quantitation data dim(dataMQ$quant) ## ----readMaxQuant2, fig.height=8, fig.width=9.5, fig.align="center", echo=TRUE---- ## The grouping of replicates grp9 <- rep(1:9,each=3) head(grp9) ## special group of proteins (we want to differentiate/ highlight lateron) UPS1ac <- getUPS1acc()$ac specPrefMQ <- list(conta="CON_|LYSC_CHICK", mainSpecies="OS=Saccharomyces cerevisiae", spike=getUPS1acc()$ac) dataMQ <- readMaxQuantFile(path1, specPref=specPrefMQ, suplAnnotFile=TRUE, groupPref=list(lowNumberOfGroups=FALSE), gr=grp9, plotGraph=FALSE) ## the quantifiation data is the same as before dim(dataMQ$quant) ## ----readMaxQuant3, echo=TRUE------------------------------------------------ ## count of tags based on argument specPref table(dataMQ$annot[,"SpecType"]) ## ----readMaxQuant4, echo=TRUE------------------------------------------------ dataMQ <- readMaxQuantFile(path1, specPref=specPrefMQ, sdrf="PXD001819", suplAnnotFile=TRUE, groupPref=list(lowNumberOfGroups=FALSE), plotGraph=FALSE) ## ----exportSdrfDraftMaxQuant5, echo=TRUE------------------------------------- path1 <- system.file("extdata", package="wrProteo") fiNaMQ <- "proteinGroups.txt.gz" dataMQ2 <- readMaxQuantFile(path1, file=fiNaMQ, refLi="mainSpe", sdrf=FALSE, suplAnnotFile=TRUE) ## Here we'll write simply in the current temporary directory of this R-session exportSdrfDraft(dataMQ2, file.path(tempdir(),"testSdrf.tsv")) ## ----readMaxQuantPeptides, echo=TRUE----------------------------------------- MQpepFi1 <- "peptides_tinyMQ.txt.gz" path1 <- system.file("extdata", package="wrProteo") specPref1 <- c(conta="conta|CON_|LYSC_CHICK", mainSpecies="YEAST", spec2="HUMAN") dataMQpep <- readMaxQuantPeptides(path1, file=MQpepFi1, specPref=specPref1, tit="Tiny MaxQuant Peptides") summary(dataMQpep$quant) ## ----readProteomeDiscovererProt1, echo=TRUE---------------------------------- fiNa <- "tinyPD_allProteins.txt.gz" dataPD <- readProteomeDiscovererFile(file=fiNa, path=path1, suplAnnotFile=FALSE, plotGraph=FALSE) summary(dataPD$quant) ## ----readDiaNN1, fig.height=8, fig.width=9.5, fig.align="center", echo=TRUE---- diaNNFi1 <- "tinyDiaNN1.tsv.gz" ## This file contains much less identifications than one may usually obtain path1 <- system.file("extdata", package="wrProteo") ## let's define the main species and allow tagging some contaminants specPref1 <- c(conta="conta|CON_|LYSC_CHICK", mainSpecies="HUMAN") dataNN <- readDiaNNFile(path1, file=diaNNFi1, specPref=specPref1, tit="Tiny DIA-NN Data", plotGraph=FALSE) summary(dataNN$quant) ## ----readProlineProt1, echo=TRUE--------------------------------------------- path1 <- system.file("extdata", package="wrProteo") fiNa <- "exampleProlineABC.csv.gz" # gz compressed data can be read, too dataPL <- readProlineFile(file=fiNa, path=path1, plotGraph=FALSE) summary(dataPL$quant[,1:8]) ## ----readFragpipe1, echo=TRUE------------------------------------------------ FPproFi1 <- "tinyFragpipe1.tsv.gz" ## let's define the main species and allow tagging some contaminants specPref1 <- c(conta="conta|CON_|LYSC_CHICK", mainSpecies="MOUSE") dataFP <- readFragpipeFile(path1, file=FPproFi1, specPref=specPref1, tit="Tiny Fragpipe Example", plotGraph=FALSE) summary(dataFP$quant) ## ----readMassChroq1, echo=TRUE----------------------------------------------- MCproFi1 <- "tinyMC.RData" dataMC <- readMassChroQFile(path1, file=MCproFi1, tit="Tiny MassChroq Example", plotGraph=FALSE) summary(dataMC$quant) ## ----readAlphaPept1, echo=TRUE----------------------------------------------- APproFi1 <- "tinyAlpaPeptide.csv.gz" ## let's define the main species and allow tagging some contaminants specPref1 <- c(conta="conta|CON_|LYSC_CHICK") dataAP <- readAlphaPeptFile(path1, file=APproFi1, specPref=specPref1, tit="Tiny AlphaPept Example", plotGraph=FALSE) summary(dataAP$quant) ## ----readIonbot1, echo=TRUE-------------------------------------------------- path1 <- system.file("extdata", package="wrProteo") fiIonbot <- "tinyIonbotFile1.tsv.gz" datIobPep <- readIonbotPeptides(fiIonbot, path=path1) summary(datIobPep$quant) ## ----readWombarP1, echo=TRUE------------------------------------------------- WBproFi1 <- "tinyWombCompo1.csv.gz" ## let's define the main species and allow tagging some contaminants specPref1 <- c(conta="conta|CON_|LYSC_CHICK", mainSpecies="YEAST") dataWB <- readWombatNormFile(path1, file=WBproFi1, specPref=specPref1, tit="Tiny Wombat-P Example", plotGraph=FALSE) summary(dataWB$quant) ## ----readSampleMetaData2, echo=TRUE------------------------------------------ MQsdrf001819Setup <- readSampleMetaData(quantMeth="MQ", sdrf="PXD001819", path=path1, suplAnnotFile="summary.txt.gz", abund=dataMQ$quant) names(MQsdrf001819Setup) ## ----fuseProteomicsProjects1, echo=TRUE-------------------------------------- path1 <- system.file("extdata", package="wrProteo") dataMQ <- readMaxQuantFile(path1, specPref=NULL, normalizeMeth="median") dataMC <- readMassChroQFile(path1, file="tinyMC.RData", tit="Tiny MassChroq Example", plotGraph=FALSE) dataFused <- fuseProteomicsProjects(dataMQ, dataMC) str(dataFused$quant) ## ----NA_MaxQuant, echo=TRUE--------------------------------------------------- ## Let's inspect NA values as graphic matrixNAinspect(dataMQ$quant, gr=grp9, tit="Histogram of Protein Abundances and NA-Neighbours") ## ----NArepl_MaxQuant, echo=TRUE----------------------------------------------- ## MaxQuant simple NA-imputation (single round) dataMQimp <- matrixNAneighbourImpute(dataMQ$quant, gr=grp9, tit="Histogram of Imputed and Final Data") ## ----testRobustToNAimputation_MQ1, echo=TRUE---------------------------------- ## Impute NA-values repeatedly and run statistical testing after each round of imputations testMQ <- testRobustToNAimputation(dataMQ, gr=grp9) ## Example of the data after repeated NA-imputation head(testMQ$datImp[,1:6]) ## ----PCA1MQ, fig.height=12, fig.width=9.5, fig.align="center", echo=TRUE------ # limit to UPS1 plotPCAw(testMQ$datImp, sampleGrp=grp9, tit="PCA on Protein Abundances (MaxQuant,NAs imputed)", rowTyName="proteins", useSymb2=0) ## ----MAplot1, fig.height=6.5, fig.width=9.5, fig.align="center", echo=TRUE---- # By default this plots at the first of all pairwise questions MAplotW(testMQ) ## ----MAplot2, fig.height=6.5, fig.width=9.5, fig.align="center", echo=TRUE---- res1 <- NULL MAplotW(testMQ, useComp=2, namesNBest="passFC") ## ----VolcanoPlot1MQ, fig.height=6.5, fig.width=9.5, fig.align="center", echo=TRUE---- ## by default the first pairwise comparison is taken ## using the argument 'namesNBest' we can add names from the annotation VolcanoPlotW(testMQ, useComp=2, namesNBest="passFDR") ## ----results1, echo=TRUE------------------------------------------------------ res1 <- extractTestingResults(testMQ, compNo=1, thrsh=0.05, FCthrs=2) ## ----results2, echo=TRUE------------------------------------------------------ knitr::kable(res1[,-1], caption="5%-FDR (BH) Significant results for 1st pairwise set", align="c") ## ----readUCSC1, echo=TRUE----------------------------------------------------- path1 <- system.file("extdata", package="wrProteo") gtfFi <- file.path(path1, "UCSC_hg38_chr11extr.gtf.gz") UcscAnnot1 <- readUCSCtable(gtfFi) # The Ensemble transcript identifyers and their chromosomal locations : head(UcscAnnot1) ## ----readUCSC2, echo=TRUE----------------------------------------------------- # Here we'll redo reading the UCSC table, plus immediatley write the file for UniProt conversion # (in this vignette we write to tempdir() to keep things tidy) expFi <- file.path(tempdir(),"deUcscForUniProt2.txt") UcscAnnot1 <- readUCSCtable(gtfFi, exportFileNa=expFi) ## ----readUniProt1, echo=TRUE-------------------------------------------------- deUniProtFi <- file.path(path1, "deUniProt_hg38chr11extr.tab") deUniPr1 <- readUniProtExport(UniP=deUniProtFi, deUcsc=UcscAnnot1, targRegion="chr11:1-135,086,622") str(deUniPr1) ## ----sessionInfo, echo=FALSE-------------------------------------------------- sessionInfo()