--- title: "Workflow" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Workflow} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` # Introduction This vignette is a workflow template including import, conversion to a standard format and highlighting inter-software proteinGroup denotation differences with **flowTraceR**. ## Loading R packages ```{r setup, message = FALSE, warning = FALSE} library(flowTraceR) library(magrittr) library(dplyr) library(tidyr) library(stringr) library(tibble) library(ggplot2) library(data.table) library(kableExtra) ``` # Import ## Import your data Importing the output files from each software can be easily performed with `data.table::fread()`. ```{r import, eval=FALSE, include=TRUE} diann <- data.table::fread("DIRECTORY/dia-nn_file.tsv") spectronaut <- data.table::fread("DIRECTORY/spectronaut_file.tsv") mq_evidence <- data.table::fread("DIRECTORY/maxquant_evidence.txt") mq_proteinGroups <- data.table::fread("DIRECTORY/maxquant_proteinGroups.txt") pd_psm <- data.table::fread("DIRECTORY/pd_PSMs.txt") ``` ## Examples Some examples are provided to explore the workflow. ```{r get example data} #DIA-NN diann <- flowTraceR::get_example("DIA-NN") #Spectronaut spectronaut <- flowTraceR::get_example("Spectronaut") #MaxQuant mq_evidence <- flowTraceR::get_example("MaxQuant")[["evidence"]] mq_proteinGroups <- flowTraceR::get_example("MaxQuant")[["proteinGroups"]] #PD pd_psm <- flowTraceR::get_example("PD") ``` # Conversion to standardized format The input data can be converted to a standardized output format on precursor, modified peptide and proteingroup level. The generated columns with **flowTraceR** are appended to the submitted data without any filtering performed. The generated columns are denoted with the prefix *traceR*. Note that only the modifications *UniMod:35 (Oxidation)* and *UniMod:4 (Carbamidomethyl)* are supported by **flowTraceR**. A column with the appendix *unknownMods* is generated to potentially filter modifications which are not supported: if TRUE, an unknown modification is detected. ## Precursor For converting the precursor level use `convert_precursor()`. ```{r precursor} diann_precursor_converted <- convert_precursor(input_df = diann, software = "DIA-NN") spectronaut_precursor_converted <- convert_precursor(input_df = spectronaut, software = "Spectronaut") mq_precursor_converted <- convert_precursor(input_df = mq_evidence, software = "MaxQuant") pd_precursor_converted <- convert_precursor(input_df = pd_psm, software = "PD") ``` ## Modified Peptides For converting the modified peptide level use `convert_modified_peptides()`. ```{r modified peptides} diann_peptides_converted <- convert_modified_peptides(input_df = diann, software = "DIA-NN") spectronaut_peptides_converted <- convert_modified_peptides(input_df = spectronaut, software = "Spectronaut") mq_peptides_converted <- convert_modified_peptides(input_df = mq_evidence, software = "MaxQuant") pd_peptides_converted <- convert_modified_peptides(input_df = pd_psm, software = "PD") ``` ## ProteinGroups For converting the proteinGroup level use `convert_proteingroups()`. ```{r proteinGroup} diann_proteinGroups_converted <- convert_proteingroups(input_df = diann, software = "DIA-NN") spectronaut_proteinGroups_converted <- convert_proteingroups(input_df = spectronaut, software = "Spectronaut") mq_proteinGroups_converted <- convert_proteingroups(input_df = mq_proteinGroups, software = "MaxQuant") pd_proteinGroups_converted <- convert_proteingroups(input_df = pd_psm, software = "PD") ``` ## All Levels For converting precursor, modified peptide and proteingroup level at once use `convert_all_levels()`. ```{r all levels} diann_all_converted <- convert_all_levels(input_df = diann, software = "DIA-NN") spectronaut_all_converted <- convert_all_levels(input_df = spectronaut, software = "Spectronaut") mq_all_converted <- convert_all_levels(input_df = mq_evidence, input_MQ_pg = mq_proteinGroups, software = "MaxQuant") pd_all_converted <- convert_all_levels(input_df = pd_psm, software = "PD") ``` ## Analyzing Conversion Since only the modifications UniMod:35 (Oxidation) and UniMod:4 (Carbamidomethyl) are currently supported, **flowTraceR** provides functions to analyze the conversion and shows how much unknown modifications are present in the dataset with `analyze_unknown_mods()`. ```{r analyzing conversion} #For one software example - equivalent for others. #Proteome Discoverer #Reports pd_precursor_report_unknown_mods <- analyze_unknown_mods(input_df = pd_precursor_converted, level = "precursor", plot = FALSE) pd_peptides_report_unknown_mods <- analyze_unknown_mods(input_df = pd_peptides_converted, level = "modified_peptides", plot = FALSE) #Plots pd_precursor_plot_unknown_mods <- analyze_unknown_mods(input_df = pd_precursor_converted, level = "precursor", plot = TRUE, plot_characteristic = "absolute") pd_peptides_plot_unknown_mods <- analyze_unknown_mods(input_df = pd_peptides_converted, level = "modified_peptides", plot = TRUE, plot_characteristic = "relative") ``` ### Example precursor level ```{r} kableExtra::kable(pd_precursor_report_unknown_mods) ```
```{r conversion-plot} pd_precursor_plot_unknown_mods ```
# Tracing inter-software differences For binary software comparisons **flowTraceR** allows to trace inter-software differences based on the standardized flowTraceR format. Each identification is classified as *common* - identified in both analyses or as *unique* - specific to one analysis. ## For each individual level ```{r trace individual level} #Binary Comparison - DIA-NN vs. Spectronaut #ProteinGroup level traced_proteinGroups <- trace_level(input_df1 = diann_all_converted , input_df2 = spectronaut_all_converted, analysis_name1 = "DIA-NN", analysis_name2 = "Spectronaut", level = "proteinGroups", filter_unknown_mods = TRUE) #Peptide level traced_peptides <- trace_level(input_df1 = diann_all_converted, input_df2 = spectronaut_all_converted, analysis_name1 = "DIA-NN", analysis_name2 = "Spectronaut", level = "modified_peptides", filter_unknown_mods = TRUE) #Precursor level traced_precursor <- trace_level(input_df1 = diann_all_converted, input_df2 = spectronaut_all_converted, analysis_name1 = "DIA-NN", analysis_name2 = "Spectronaut", level = "precursor", filter_unknown_mods = TRUE) ``` ## All levels ```{r trace all levels} #Binary Comparison - DIA-NN vs. Spectronaut #trace all levels in one step traced_all <- trace_all_levels(input_df1 = diann_all_converted, input_df2 = spectronaut_all_converted, analysis_name1 = "DIA-NN", analysis_name2 = "Spectronaut", filter_unknown_mods = TRUE) ``` ## Connect traced levels Combine two levels after categorization in unique and common entries. Possible connections are proteinGroup or modified peptide with precursor categorization. ```{r Connect flowTraceR levels} #ProteinGroup level DIANN_connected_proteinGroup <- connect_traceR_levels(input_df = traced_all[["DIA-NN"]], level = "proteinGroups") Spectronaut_connected_proteinGroup <- connect_traceR_levels(input_df = traced_all[["Spectronaut"]], level = "proteinGroups") #Peptide level DIANN_connected_peptides <- connect_traceR_levels(input_df = traced_all[["DIA-NN"]], level = "modified_peptides") Spectronaut_connected_peptides <- connect_traceR_levels(input_df = traced_all[["Spectronaut"]], level = "modified_peptides") ``` ## Show software differences for modified peptide/proteinGroup denotations Generate a report or visualize the output of connecting the flowTraceR levels on *proteinGroup_precursor* or *modified.peptides_precursor* categorization in: * common_common * common_unique * unique_common * unique_unique ```{r analyze connected levels} #Example for proteinGroup level #*Plots* #upper level - proteinGroup level - how many proteingroups have a specific categorization DIANN_plot_proteinGroups_upper <- analyze_connected_levels(input_df = DIANN_connected_proteinGroup, connected_levels = "proteinGroup_precursor",count_level = "upper", plot = TRUE, plot_characteristic = "absolute") Spectronaut_plot_proteinGroups_upper <- analyze_connected_levels(input_df = Spectronaut_connected_proteinGroup, connected_levels = "proteinGroup_precursor", count_level = "upper", plot = TRUE, plot_characteristic = "absolute") #lower level - precursor level - how many precursor have a specific categorization DIANN_plot_proteinGroups_lower <- analyze_connected_levels(input_df = DIANN_connected_proteinGroup, connected_levels = "proteinGroup_precursor",count_level = "lower", plot = TRUE, plot_characteristic = "absolute") Spectronaut_plot_proteinGroups_lower <- analyze_connected_levels(input_df = Spectronaut_connected_proteinGroup, connected_levels = "proteinGroup_precursor", count_level = "lower", plot = TRUE, plot_characteristic = "absolute") #*Reports* #ProteinGroup level DIANN_report_proteinGroups <- analyze_connected_levels(input_df = DIANN_connected_proteinGroup, connected_levels = "proteinGroup_precursor",count_level = "upper", plot = FALSE) Spectronaut_report_proteinGroups <- analyze_connected_levels(input_df = Spectronaut_connected_proteinGroup, connected_levels = "proteinGroup_precursor",count_level = "lower", plot = FALSE) ``` ### Example proteinGroup level ```{r} kableExtra::kable(DIANN_report_proteinGroups) ```
```{r proteingroups-connected} DIANN_plot_proteinGroups_upper ```
## Get software difference for proteinGroup denotations Filter for potential common precursor and unique proteinGroup connections. It is possible to trace differences in proteinGroup denotations for common precursor. ```{r get software difference} #with string_analysis = TRUE - if protein denotation is mentioned in both proteinGroups of input_df1/_df2 are filtered out - only distinct protein denotations remain Difference_proteinGroup <- trace_unique_common_pg(input_df1 = DIANN_connected_proteinGroup, input_df2 = Spectronaut_connected_proteinGroup, analysis_name1 = "DIA-NN", analysis_name2 = "Spectronaut", string_analysis = FALSE) Difference_proteinGroup_reduced <- trace_unique_common_pg(input_df1 = DIANN_connected_proteinGroup, input_df2 = Spectronaut_connected_proteinGroup, analysis_name1 = "DIA-NN", analysis_name2 = "Spectronaut", string_analysis = TRUE) ``` ### Results: First, string_analysis = FALSE is used. ProteinGroups, which share similar proteins, stay in the output. For example, for the common precursor *COMMON2* has in Spectronaut a proteinGroup denotation of *EXAMPLE2* and in DIA-NN of *EXAMPLE1;EXAMPLE2*.
```{r echo=FALSE} kableExtra::kable(Difference_proteinGroup, format = "pipe", caption = "Difference in proteinGroup denotation - string_analysis = FALSE") ```
Second, string_analysis = TRUE is applied. ProteiGroups, which have similar proteins, are filtered.
```{r echo=FALSE} kableExtra::kable(Difference_proteinGroup_reduced, format = "pipe", caption = "Difference in proteinGroup denotation - string_analysis = TRUE") ```