FragPipe数据分析一般推荐用GUI分析,但是如果我们需要在集群或者超算上批量分析数据可能就需要在shell下提交队列,执行数据分析了。

下面是官方给出的数据分析Workflow,分为离子淌度(也称4D蛋白组学:是在3D分离即保留时间(retention time)、质荷比(m/z)、离子强度(intensity)这三个维度的基础之上增加了第四个维度–离子淌度(mobility)的分离)和常规DDA数据分析。

1)TIMS-TOF数据分析workflow


#!/bin/bash

set -xe

# 设置环境、对应的可执行程序路径
dataDirPath="data/"
fastaPath="2020-01-22-decoys-reviewed-contam-UP000005640.fas"

# 下载地址 https://msfragger.arsci.com/upgrader/;学术免费申请,商业需要申请许可
msfraggerPath="MSFragger.jar" 
fraggerParamsPath="fragger.params"

# 下载地址https://github.com/Nesvilab/philosopher/releases/latest
philosopherPath="philosopher" 

# 下载地址 https://github.com/Nesvilab/Crystal-C/releases/latest
crystalcPath="CrystalC.jar"
crystalcParameterPath="crystalc.params"

# 下载地址 https://github.com/Nesvilab/IonQuant/releases/latest
ionquantPath="IonQuant.jar" 
# decoy前缀
decoyPrefix="rev_"

# 根据集群的配置修改-Xmx内存参数.
java -Xmx64G -jar $msfraggerPath $fraggerParamsPath $dataDirPath/<spectral files ending with .d>

# 移动.pepXML结果文件到当前目录
mv $dataDirPath/*.pepXML ./

# 移动MSFragger tsv文件到当前目录
mv $dataDirPath/*.tsv ./ # Comment this line if localize_delta_mass = 0 in your fragger.params file.

# 如果需要用open search则可以选择执行此步骤,否则跳过此步骤
for myFile in ./*.pepXML
do
	java -Xmx64G -jar $crystalcPath $crystalcParameterPath $myFile
done

# 运行PeptideProphet, ProteinProphet和FDR过滤
$philosopherPath workspace --clean
$philosopherPath workspace --init
$philosopherPath database --annotate $fastaPath --prefix $decoyPrefix

# 根据需求选择下面四个其中一条命令
# Closed search
$philosopherPath peptideprophet --nonparam --expectscore --decoyprobs --ppm --accmass --decoy $decoyPrefix --database $fastaPath ./*.pepXML 

# Open search if you ran Crystal-C
$philosopherPath peptideprophet --nonparam --expectscore --decoyprobs --masswidth 1000.0 --clevel -2 --decoy $decoyPrefix --combine --database $fastaPath ./*_c.pepXML 

# Open search if you did NOT ran Crystal-C
$philosopherPath peptideprophet --nonparam --expectscore --decoyprobs --masswidth 1000.0 --clevel -2 --decoy $decoyPrefix --combine --database $fastaPath ./*.pepXML 

# Non-specific closed search
$philosopherPath peptideprophet --nonparam --expectscore --decoyprobs --ppm --accmass --nontt --decoy $decoyPrefix --database $fastaPath ./*.pepXML 

$philosopherPath proteinprophet --maxppmdiff 2000000 --output combined ./*.pep.xml

# 选择下面两个其中一条命令执行即可
# closed or non-specific closed search
$philosopherPath filter --sequential --razor --mapmods --tag $decoyPrefix --pepxml ./ --protxml ./combined.prot.xml 

# Open search
$philosopherPath filter --sequential --razor --mapmods --tag $decoyPrefix --pepxml ./interact.pep.xml --protxml ./combined.prot.xml 

# 生成报告
$philosopherPath report
$philosopherPath workspace --clean

# 运行IonQuant
java -Xmx64G -jar $ionquantPath <options> <path to .d> <path to .pepXML>

2)常规rawdata数据分析流程


#!/bin/bash

set -xe

# 设置环境、对应的可执行程序路径,部分解释可以参考流程1
dataDirPath="data/"
fastaPath="2020-01-22-decoys-reviewed-contam-UP000005640.fas"
msfraggerPath="MSFragger.jar"
fraggerParamsPath="fragger.params"
philosopherPath="philosopher" 
crystalcPath="CrystalC.jar" 
crystalcParameterPath="crystalc.params"
ionquantPath="IonQuant.jar"
decoyPrefix="rev_"

java -Xmx64G -jar $msfraggerPath $fraggerParamsPath $dataDirPath/<spectral files ending with .mzML (required for quantification) or .raw>

mv $dataDirPath/*.pepXML ./

mv $dataDirPath/*.tsv ./ # Comment this line if localize_delta_mass = 0 in your fragger.params file.

# 如果需要用open search则可以选择执行此步骤,否则跳过此步骤(
for myFile in ./*.pepXML
do
	java -Xmx64G -cp $crystalcPath Main $crystalcParameterPath $myFile
done

# 运行PeptideProphet, ProteinProphet和FDR过滤
$philosopherPath workspace --clean
$philosopherPath workspace --init
$philosopherPath database --annotate $fastaPath --prefix $decoyPrefix

# 根据需求选择下面三个命令其中一条命令

# Closed search
$philosopherPath peptideprophet --nonparam --expectscore --decoyprobs --ppm --accmass --decoy $decoyPrefix --database $fastaPath ./*.pepXML 

# Open search if you ran Crystal-C
$philosopherPath peptideprophet --nonparam --expectscore --decoyprobs --masswidth 1000.0 --clevel -2 --decoy $decoyPrefix --combine --database $fastaPath ./*_c.pepXML 

# Open search if you did NOT ran Crystal-C
$philosopherPath peptideprophet --nonparam --expectscore --decoyprobs --masswidth 1000.0 --clevel -2 --decoy $decoyPrefix --combine --database $fastaPath ./*.pepXML 

# Non-specific closed search
$philosopherPath peptideprophet --nonparam --expectscore --decoyprobs --ppm --accmass --nontt --decoy $decoyPrefix --database $fastaPath ./*.pepXML 


$philosopherPath proteinprophet --maxppmdiff 2000000 --output combined ./*.pep.xml

# 根据需求选择下面两个其中一条命令
# closed or non-specific closed search
$philosopherPath filter --sequential --razor --mapmods --tag $decoyPrefix --pepxml ./ --protxml ./combined.prot.xml 

# Open search
$philosopherPath filter --sequential --razor --mapmods --tag $decoyPrefix --pepxml ./interact.pep.xml --protxml ./combined.prot.xml 


# Perform quantification.
$philosopherPath freequant --dir $dataDirPath

# 生成报告
$philosopherPath report
$philosopherPath workspace --clean

参考资料:

1.https://msfragger.nesvilab.org/tutorial_linux.html