view SAINT_preprocessing.xml @ 72:5ec0b997fb13 draft

Uploaded
author bornea
date Sat, 27 Aug 2016 23:26:10 -0400
parents e382e24af909
children a51c0e6d7d99
line wrap: on
line source

<tool id="SAINT_preprocessing_v5" name="SAINT pre-processing">
  <description></description>
  <command interpreter="python">
    #if (str($type_select.type) == 'Scaffold'):
      SAINT_preprocessing.py $type_select.input $type_select.preybool $type_select.fasta_db $Inter_file $Prey_file 
      "
        #for $ba in $bait
         ${ba.bait1}
         ${ba.assign}
         ${ba.T_C}
        #end for
        "
      $Bait_file \$INSTALL_RUN_PATH/ $type_select.bait_bool $type_select.bait_file_in
    #elif (str($type_select.type) == 'MaxQuant'):
      SAINT_preprocessing_mq_pep.py $type_select.input $type_select.preybool $type_select.fasta_db $Inter_file $Prey_file 
        "
        #for $ba in $bait
          ${ba.bait1}
          ${ba.assign}
          ${ba.T_C}
        #end for
        "
      $Bait_file \$INSTALL_RUN_PATH/ $type_select.bait_bool $type_select.bait_file_in
    #elif (str($type_select.type) == 'Peptideshaker'):
      Protein_report_processing.py "$type_select.input" $Bait_file $type_select.use_metric $type_select.fasta_db $type_select.preybool 
        "
        #for $ba in $bait
          ${ba.bait1}
          ${ba.assign}
          ${ba.T_C}
        #end for
        "
      \$INSTALL_RUN_PATH/ $type_select.bait_bool $type_select.bait_file_in $Inter_file $Prey_file
    #elif (str($type_select.type) == 'mzIdentML'):
      mzID_process2.py "$type_select.input" $type_select.bait_file_in $type_select.preybool $type_select.fasta_db \$INSTALL_RUN_PATH/ 
        "
        #for $ba in $bait
          ${ba.bait1}
          ${ba.assign}
          ${ba.T_C}
        #end for
        "
      $type_select.bait_bool $Prey_file $Bait_file $Inter_file 
    #end if
  </command>
  <requirements>
    <requirement type="set_environment">INSTALL_RUN_PATH</requirement>
  </requirements>
  <inputs>
    <conditional name="type_select">
      <param type="select" name="type" label="Data Source">
        <option value="MaxQuant">MaxQuant</option>
        <option value="Scaffold">Scaffold</option>
        <option value="Peptideshaker">Peptideshaker</option> 
        <option value="mzIdentML">mzIdentML</option> 
      </param>
      <when value="MaxQuant">
        <param format="dat" name="input" type="data" label="MaxQuant peptides Output"/>
        <param type="boolean" name="preybool" checked="true" label="Create Prey File"/>
        <param type="data" name="fasta_db" format="fasta"  label="Provide Uniprot Fasta database" optional="true"/>
        <param name="bait_bool" type="boolean" checked="true" label="Are You Providing Your Own bait file?"/>
        <param type="data" format="dat" name="bait_file_in" label="Bait File" optional="true"/>
      </when>
      <when value="Scaffold">
        <param format="dat" name="input" type="data" label="Scaffold Output"/>
        <param type="boolean" name="preybool" checked="true" label="Create Prey File"/>
        <param type="data" name="fasta_db" format="fasta"  label="Provide Uniprot Fasta database" optional="true"/>
        <param name="bait_bool" type="boolean" checked="true" label="Are You Providing Your Own bait file?"/>
        <param type="data" format="dat" name="bait_file_in" label="Bait File" optional="true"/>
      </when>
      <when value="Peptideshaker">
        <param format="dat" name="input" type="data" label="Peptideshaker Output" multiple="true"/>
        <param type="select" name="use_metric" label="Select Report File Value for Quantification">
          <option value="Validated_Peptides">#Validated Peptides</option>
          <option value="Peptides">#Peptides</option>
          <option value="Unique">#Unique</option> 
          <option value="Validated_PSMs">#Validated PSMs</option>
          <option value="PSMs">#PSMs</option> 
        </param>
        <param type="boolean" name="preybool" checked="true" label="Create Prey File"/>
        <param type="data" name="fasta_db" format="fasta"  label="Provide Uniprot Fasta database" optional="true"/>
        <param name="bait_bool" type="boolean" checked="true" label="Are You Providing Your Own bait file?"/>
        <param type="data" format="dat" name="bait_file_in" label="Bait File" optional="true"/>
      </when>
      <when value="mzIdentML">
        <param format="dat" name="input" type="data" label="mzIdentML Files" multiple="true"/>
        <param type="boolean" name="preybool" checked="true" label="Create Prey File"/>
        <param type="data" name="fasta_db" format="fasta"  label="Provide Uniprot Fasta database" optional="true"/>
        <param name="bait_bool" type="boolean" checked="true" label="Are You Providing Your Own bait file?"/>
        <param type="data" format="dat" name="bait_file_in" label="Bait File" optional="true"/>
      </when>
    </conditional>
    <repeat name="bait" title="Bait Create">
      <param name="bait1" type="text" size="100" label="Bait"/>
      <param name="assign" type="text" size="100" label="Group Assignment"/>
      <param name="T_C" type="boolean" checked="true" label="Is this a Control?"/> 
    </repeat>
  </inputs>
  <outputs>
    <data format="txt" name="Inter_file" label="Inter File"/>    
    <data format="txt" name="Prey_file" label="Prey File" />
    <data format="txt" name="Bait_file" label="Bait File" />
  </outputs>
  <stdio>
    <regex match="Error: bad bait"
           source="stdout"
           level="fatal"
           description="Error: bad bait"/>
    <regex match="IOError: [Errno 2] No such file or directory: './tukeys_output.txt'"
           source="stderr"
           level="fatal"
           description="Error: Scaffold/MaxQuant mismatch. Check job settings."/>
    <regex match="bait_temp_file = open(sys.argv[10], 'r')"
           source="stderr"
           level="fatal"
           description="Error: Bait create settings mismatch. Check job settings."/>
    <regex match="Error|error"
	   source="stdout"
           level="fatal"
           description="Unknown error"/>
    <regex match="Error|error"
     source="stderr"
           level="fatal"
           description="Unknown error"/>
  </stdio> 

  <tests>
    <test>
      <param name="input" value="fa_gc_content_input.fa"/>
      <output name="out_file1" file="fa_gc_content_output.txt"/>
    </test>
  </tests>
  <help>
Pre-Processing
^^^^^^^^^^^^^^

This tool will read in a Scaffold *Samples Report* file (tab-delimited
txt file), multiple Peptideshaker *Protein Report* files or a MaxQuant 
*peptides.txt* file and process them to generate a *Bait File, Prey 
File,* and *Inter File* for SAINTexpress analysis.

--------------

**1) MaxQuant or Scaffold**

APOSTL is able to recognize either a Scaffold *Samples Report* file
(tab-delimited txt file) or the *peptides.txt* file output in the
MaxQuant *txt* output folder. No modifications should be made to these
files. Please designate which one is being provided.

**2) Scaffold or MaxQuant File Input**

Select the corresponding *Samples Report* or *peptides.txt* here.

**3) Create Prey File**

Select whether or not you would like APOSTL to generate a prey file.

When making a prey file, APOSTL queries a user provided FASTA database
(see below) in order to extract protein amino acid lengths and gene
names. This may take several minutes depending on your computer and if
your Galaxy distribution is cluster enabled. Some users may want to run
SAINTexpress using the same data set while changing which baits are
considered test or control. It is useful to toggle **Create Prey File**
off in order to save time by circumventing this step as the same prey
file can be used for both SAINTexpress runs.

**4) Provide Uniprot FASTA database**

Please specify a FASTA file containing the protein sequence. You can
download the latest release of Uniprot's reviewed sequences (Swiss-Prot)
http://www.uniprot.org/downloads. For computing efficiency,
please limit your FASTA file to your species of interest. It is
generally advised to use the same FASTA database that was used for your
database searches during peptide and protein ID.

**5) Are You Providing Your Own Bait File?**

Users have two options for providing a *Bait* file. If you would like to
create your own *Bait* file, select Yes and designate the file below. If
you would like to use the **Bait Create** tool to create a *Bait* file
within Galaxy, please select No and proceed to the **Bait Create** tool.

**6) Bait File**

If you selected Yes above, please specify the *Bait* file you would like
to provide. If you selected No above, ignore this argument and proceed
to the **Bait Create** tool.

    **Note:** Individual bait names must match to the sample names
    within your Scaffold or MaxQuant output exactly and must contain no
    whitespace characters (e.g. spaces) or dashes.

    **Note:** When using Peptideshaker output the baits in the baitfile 
    need to be in the same order as the files selected.

**7) Bait Create**

Using the **Bait Create** tool, you can create your *bait.txt* file.

    **Note:** Individual bait names must match to the sample names
    within your Scaffold or MaxQuant output exactly and must contain no
    whitespace characters (e.g. spaces) or dashes.

APOSTL uses this bait file to find the user's baits of interest within a
*Samples Report* or *peptides.txt* file and when preparing the
*inter.txt* file.

--------------

Once your parameters have been finalized, please press the Execute
button to start processing. This may take a few minutes. Once your
process has been completed, you will see your *Bait File, Prey File,*
and *Inter File* on the right hand side of your panel highlighted in
green.
  </help>
</tool>