FragPipe数据分析一般推荐用GUI分析,但是如果我们需要在集群或者超算上批量分析数据可能就需要在shell下提交队列,执行数据分析了。
下面是官方给出的数据分析Workflow,分为离子淌度
(也称4D蛋白组学:是在3D分离即保留时间(retention time)、质荷比(m/z)、离子强度(intensity)这三个维度的基础之上增加了第四个维度–离子淌度(mobility)的分离)和常规DDA数据分析。
1)TIMS-TOF数据分析workflow
#!/bin/bash
set -xe
# 设置环境、对应的可执行程序路径
dataDirPath="data/"
fastaPath="2020-01-22-decoys-reviewed-contam-UP000005640.fas"
# 下载地址 https://msfragger.arsci.com/upgrader/;学术免费申请,商业需要申请许可
msfraggerPath="MSFragger.jar"
fraggerParamsPath="fragger.params"
# 下载地址https://github.com/Nesvilab/philosopher/releases/latest
philosopherPath="philosopher"
# 下载地址 https://github.com/Nesvilab/Crystal-C/releases/latest
crystalcPath="CrystalC.jar"
crystalcParameterPath="crystalc.params"
# 下载地址 https://github.com/Nesvilab/IonQuant/releases/latest
ionquantPath="IonQuant.jar"
# decoy前缀
decoyPrefix="rev_"
# 根据集群的配置修改-Xmx内存参数.
java -Xmx64G -jar $msfraggerPath $fraggerParamsPath $dataDirPath/<spectral files ending with .d>
# 移动.pepXML结果文件到当前目录
mv $dataDirPath/*.pepXML ./
# 移动MSFragger tsv文件到当前目录
mv $dataDirPath/*.tsv ./ # Comment this line if localize_delta_mass = 0 in your fragger.params file.
# 如果需要用open search则可以选择执行此步骤,否则跳过此步骤
for myFile in ./*.pepXML
do
java -Xmx64G -jar $crystalcPath $crystalcParameterPath $myFile
done
# 运行PeptideProphet, ProteinProphet和FDR过滤
$philosopherPath workspace --clean
$philosopherPath workspace --init
$philosopherPath database --annotate $fastaPath --prefix $decoyPrefix
# 根据需求选择下面四个其中一条命令
# Closed search
$philosopherPath peptideprophet --nonparam --expectscore --decoyprobs --ppm --accmass --decoy $decoyPrefix --database $fastaPath ./*.pepXML
# Open search if you ran Crystal-C
$philosopherPath peptideprophet --nonparam --expectscore --decoyprobs --masswidth 1000.0 --clevel -2 --decoy $decoyPrefix --combine --database $fastaPath ./*_c.pepXML
# Open search if you did NOT ran Crystal-C
$philosopherPath peptideprophet --nonparam --expectscore --decoyprobs --masswidth 1000.0 --clevel -2 --decoy $decoyPrefix --combine --database $fastaPath ./*.pepXML
# Non-specific closed search
$philosopherPath peptideprophet --nonparam --expectscore --decoyprobs --ppm --accmass --nontt --decoy $decoyPrefix --database $fastaPath ./*.pepXML
$philosopherPath proteinprophet --maxppmdiff 2000000 --output combined ./*.pep.xml
# 选择下面两个其中一条命令执行即可
# closed or non-specific closed search
$philosopherPath filter --sequential --razor --mapmods --tag $decoyPrefix --pepxml ./ --protxml ./combined.prot.xml
# Open search
$philosopherPath filter --sequential --razor --mapmods --tag $decoyPrefix --pepxml ./interact.pep.xml --protxml ./combined.prot.xml
# 生成报告
$philosopherPath report
$philosopherPath workspace --clean
# 运行IonQuant
java -Xmx64G -jar $ionquantPath <options> <path to .d> <path to .pepXML>
2)常规rawdata数据分析流程
#!/bin/bash
set -xe
# 设置环境、对应的可执行程序路径,部分解释可以参考流程1
dataDirPath="data/"
fastaPath="2020-01-22-decoys-reviewed-contam-UP000005640.fas"
msfraggerPath="MSFragger.jar"
fraggerParamsPath="fragger.params"
philosopherPath="philosopher"
crystalcPath="CrystalC.jar"
crystalcParameterPath="crystalc.params"
ionquantPath="IonQuant.jar"
decoyPrefix="rev_"
java -Xmx64G -jar $msfraggerPath $fraggerParamsPath $dataDirPath/<spectral files ending with .mzML (required for quantification) or .raw>
mv $dataDirPath/*.pepXML ./
mv $dataDirPath/*.tsv ./ # Comment this line if localize_delta_mass = 0 in your fragger.params file.
# 如果需要用open search则可以选择执行此步骤,否则跳过此步骤(
for myFile in ./*.pepXML
do
java -Xmx64G -cp $crystalcPath Main $crystalcParameterPath $myFile
done
# 运行PeptideProphet, ProteinProphet和FDR过滤
$philosopherPath workspace --clean
$philosopherPath workspace --init
$philosopherPath database --annotate $fastaPath --prefix $decoyPrefix
# 根据需求选择下面三个命令其中一条命令
# Closed search
$philosopherPath peptideprophet --nonparam --expectscore --decoyprobs --ppm --accmass --decoy $decoyPrefix --database $fastaPath ./*.pepXML
# Open search if you ran Crystal-C
$philosopherPath peptideprophet --nonparam --expectscore --decoyprobs --masswidth 1000.0 --clevel -2 --decoy $decoyPrefix --combine --database $fastaPath ./*_c.pepXML
# Open search if you did NOT ran Crystal-C
$philosopherPath peptideprophet --nonparam --expectscore --decoyprobs --masswidth 1000.0 --clevel -2 --decoy $decoyPrefix --combine --database $fastaPath ./*.pepXML
# Non-specific closed search
$philosopherPath peptideprophet --nonparam --expectscore --decoyprobs --ppm --accmass --nontt --decoy $decoyPrefix --database $fastaPath ./*.pepXML
$philosopherPath proteinprophet --maxppmdiff 2000000 --output combined ./*.pep.xml
# 根据需求选择下面两个其中一条命令
# closed or non-specific closed search
$philosopherPath filter --sequential --razor --mapmods --tag $decoyPrefix --pepxml ./ --protxml ./combined.prot.xml
# Open search
$philosopherPath filter --sequential --razor --mapmods --tag $decoyPrefix --pepxml ./interact.pep.xml --protxml ./combined.prot.xml
# Perform quantification.
$philosopherPath freequant --dir $dataDirPath
# 生成报告
$philosopherPath report
$philosopherPath workspace --clean
参考资料:
1.https://msfragger.nesvilab.org/tutorial_linux.html