Mercurial > repos > galaxyp > metanovo
view metanovo.xml @ 5:d6dcd3173bdf draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/metanovo commit fd12c123235f5067858bbd22b70fb0082d1e2199
author | galaxyp |
---|---|
date | Sat, 11 May 2024 16:46:46 +0000 |
parents | 7a5ff5359b13 |
children |
line wrap: on
line source
<tool id="metanovo" name="MetaNovo" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.09"> <description> Produce targeted databases for mass spectrometry analysis. </description> <macros> <token name="@TOOL_VERSION@">1.9.4</token> <token name="@VERSION_SUFFIX@">4</token> <token name="@SUBSTITUTION_RX@">[^\w\-\.]</token> <import>macros_modifications.xml</import> </macros> <xrefs> <xref type="bio.tools">metanovo</xref> </xrefs> <requirements> <requirement type="package" version="@TOOL_VERSION@">metanovo</requirement> </requirements> <command> <![CDATA[ #import re #set $mgf_dir = 'mgf_files' #set $fasta_dir = 'fasta_file' #set fasta_name = re.sub('@SUBSTITUTION_RX@', '_', str($input_fasta.element_identifier)) mkdir $mgf_dir && mkdir $fasta_dir && ln -s '$input_fasta' '$fasta_dir/$fasta_name' && #if $input_type.type == "collection" #for $input_mgf_item in $input_type.input_mgf_collection: #set mgf_name = re.sub('@SUBSTITUTION_RX@', '_', str($input_mgf_item.element_identifier)) ln -s '$input_mgf_item' '$mgf_dir/$mgf_name' && #end for #else #set mgf_name = re.sub('@SUBSTITUTION_RX@', '_', str($input_type.input_mgf.element_identifier)) ln -s '$input_type.input_mgf' '$mgf_dir/$mgf_name' && #end if ## the number of threads should be number of available threads-1 according to the docs threads=\${GALAXY_SLOTS:-3} && if [ \$threads -gt 1 ]; then (( threads-- )); fi && echo "THREAD_LIMIT=\$threads" >> config.sh && metanovo.sh config.sh ]]> </command> <configfiles> <configfile filename="config.sh"><![CDATA[#slurp #import re MGF_FOLDER=mgf_files #set fasta_name = re.sub('@SUBSTITUTION_RX@', '_', str($input_fasta.element_identifier)) FASTA_FILE=fasta_file/'$fasta_name' OUTPUT_FOLDER=. CHUNKSIZE=$processing_control.CHUNKSIZE JVM_Xmx='10000M' JVM_Xms='1024M' mn_specificity='$metanovo_parameters.mn_specificity' mn_enzymes='$metanovo_parameters.mn_enzymes' mn_max_missed_cleavages=$metanovo_parameters.mn_max_missed_cleavages dg_pepnovo=0 dg_pnovo=0 dg_novor=0 dg_directag=1 prec_tol=$spectrum_matching_parameters.prec_tol prec_ppm=$spectrum_matching_parameters.prec_ppm frag_tol=$spectrum_matching_parameters.frag_tol frag_ppm=$spectrum_matching_parameters.frag_ppm digestion=$spectrum_matching_parameters.digestion enzyme='$spectrum_matching_parameters.enzyme' specificity=$spectrum_matching_parameters.specificity mc='$spectrum_matching_parameters.mc' fixed_mods="$spectrum_matching_parameters.fixed_mods" variable_mods="$spectrum_matching_parameters.variable_mods" min_charge=$spectrum_matching_parameters.min_charge max_charge=$spectrum_matching_parameters.max_charge fi='$spectrum_matching_parameters.fi' ri='$spectrum_matching_parameters.ri' min_isotope='$spectrum_matching_parameters.min_isotope' max_isotope='$spectrum_matching_parameters.max_isotope' annotation_level=$spectrum_annotation.annotation_level annotation_high_resolution=$spectrum_annotation.annotation_high_resolution sequence_index_type=$sequence_matching.sequence_index_type sequence_matching_type=$sequence_matching.sequence_matching_type sequence_matching_x=$sequence_matching.sequence_matching_x import_peptide_length_min=$import_filters.import_peptide_length_min import_peptide_length_max=$import_filters.import_peptide_length_max import_precursor_mz_ppm=$import_filters.import_precursor_mz_ppm exclude_unknown_ptms=$import_filters.exclude_unknown_ptms ptm_score=$ptm_localization.ptm_score score_neutral_losses=$ptm_localization.score_neutral_losses ptm_sequence_matching_type=$ptm_localization.ptm_sequence_matching_type ptm_alignment=$ptm_localization.ptm_alignment useGeneMapping=$gene_annotation.useGeneMapping updateGeneMapping=$gene_annotation.updateGeneMapping simplify_groups=$protein_inference.simplify_groups simplify_score=$protein_inference.simplify_score simplify_enzymaticity=$protein_inference.simplify_enzymaticity simplify_evidence=$protein_inference.simplify_evidence simplify_uncharacterized=$protein_inference.simplify_uncharacterized psm_fdr=$validation_levels.psm_fdr peptide_fdr=$validation_levels.peptide_fdr protein_fdr=$validation_levels.protein_fdr group_psms=$validation_levels.group_psms group_peptides=$validation_levels.group_peptides merge_subgroups=$validation_levels.merge_subgroups protein_fraction_mw_confidence='$fraction_analysis.protein_fraction_mw_confidence' pepnovo_hitlist_length=1 pepnovo_estimate_charge=1 pepnovo_correct_prec_mass=1 pepnovo_discard_spectra=1 pepnovo_fragmentation_model='CID_IT_TRYP' pepnovo_generate_blast=0 directag_tic_cutoff=$directag.directag_tic_cutoff directag_max_peak_count=$directag.directag_max_peak_count directag_intensity_classes=$directag.directag_intensity_classes directag_adjust_precursor=$directag.directag_adjust_precursor directag_min_adjustment='$directag.directag_min_adjustment' directag_max_adjustment='$directag.directag_max_adjustment' directag_adjustment_step='$directag.directag_adjustment_step' directag_charge_states='$directag.directag_charge_states' directag_ms_charge_state='$directag.directag_ms_charge_state' directag_duplicate_spectra='$directag.directag_duplicate_spectra' directag_deisotoping='$directag.directag_deisotoping' directag_isotope_tolerance='$directag.directag_isotope_tolerance' directag_complement_tolerance='$directag.directag_complement_tolerance' directag_tag_length='$directag.directag_tag_length' directag_max_var_mods='$directag.directag_max_var_mods' directag_max_tag_count='$directag.directag_max_tag_count' directag_intensity_weight='$directag.directag_intensity_weight' directag_fidelity_weight='$directag.directag_fidelity_weight' directag_complement_weight='$directag.directag_complement_weight' novor_fragmentation=HCD novor_mass_analyzer=Trap ]]></configfile> </configfiles> <inputs> <conditional name="input_type"> <param name="type" type="select" label="MGF Input Type" help="Submit either a single file, or a collection of files."> <option selected="true" value="single">Single file</option> <option value="collection">Collection</option> </param> <when value="single"> <param name="input_mgf" type="data" format="mgf" optional="true" label="MGF File" /> </when> <when value="collection"> <param name="input_mgf_collection" type="data_collection" optional="true" label="MGF Collection" /> </when> </conditional> <param name="input_fasta" type="data" format="fasta" label="FASTA File" /> <section name="processing_control" expanded="False" title="Processing Control"> <param name="CHUNKSIZE" label="Size to split fasta for parallel processing" value="100000" type="integer" optional="true"/> </section> <section name="metanovo_parameters" expanded="False" title="MetaNovo Parameters"> <param argument="-mn_specificity" label="Enzyme Specificity" type="select"> <option selected="true" value="specific">specific</option> <option value="semi-specific">semi-specific</option> <option value="unspecific">unspecific</option> </param> <param argument="-mn_enzymes" label="Enzyme Rule" type="select"> <option value="Trypsin">Trypsin</option> <option selected="true" value="Trypsin, no P rule">Trypsin, no P rule</option> <option value="Whole protein">Whole protein</option> </param> <param argument="-mn_max_missed_cleavages" label="Number of enzymatic missed cleavages" value="2" type="integer" optional="true"/> </section> <section name="spectrum_matching_parameters" expanded="False" title="Spectrum Matching Parameters"> <param argument="-prec_tol" label="Precursor ion mass tolerance" value="10.0" type="float" optional="true"/> <param argument="-prec_ppm" label="Precursor ion tolerance unit" type="select"> <option value="0">Da</option> <option selected="true" value="1">ppm</option> </param> <param argument="-frag_tol" label="Fragment ion mass tolerance" value="0.05" type="float" optional="true"/> <param argument="-frag_ppm" label="Fragment ion tolerance unit" type="select"> <option selected="true" value="0">Da</option> <option value="1">ppm</option> </param> <param argument="-digestion" label="Digestion" type="select"> <option selected="true" value="0">Enzyme</option> <option value="1">Unspecific</option> <option value="2">Whole Protein</option> </param> <param argument="-enzyme" label="Enzyme" type="select" multiple="true"> <option value="Trypsin">Trypsin</option> <option selected="true" value="Trypsin (no P rule)">Trypsin (no P rule)</option> <option value="Arg-C">Arg-C</option> <option value="Arg-C (no P rule)">Arg-C (no P rule)</option> <option value="Arg-N">Arg-N</option> <option value="Glu-C">Glu-C</option> <option value="Lys-C">Lys-C</option> <option value="Lys-C (no P rule)">Lys-C (no P rule)</option> <option value="Lys-N">Lys-N</option> <option value="Asp-N">Asp-N</option> <option value="Asp-N (ambic)">Asp-N (ambic)</option> <option value="Chymotrypsin">Chymotrypsin</option> <option value="Chymotrypsin (no P rule)">Chymotrypsin (no P rule)</option> <option value="Pepsin A">Pepsin A</option> <option value="CNBr">CNBr</option> <option value="Thermolysin">Thermolysin</option> <option value="LysargiNase">LysargiNase</option> </param> <param argument="-specificity" label="Specificity" type="select"> <option selected="true" value="0">Specific</option> <option value="1">Semi-Specific</option> <option value="2">N-term Specific</option> <option value="3">C-term Specific</option> </param> <param argument="-mc" label="Number of allowed missed cleavages" value="2" type="text" optional="true" help="If more than one enzyme was used, please provide the missed cleavages for every enzyme in the same order, with a comma separated list, e.g. "2, 1"."/> <param argument="-fixed_mods" label="Fixed modifications as comma separated list" type="select" multiple="true"> <expand macro="fixed_modifications"/> </param> <param argument="-variable_mods" label="Variable modifications as comma separated list" type="select" multiple="true"> <expand macro="variable_modifications"/> </param> <param argument="-min_charge" label="Minimal charge to search for" value="2" type="integer" optional="true"/> <param argument="-max_charge" label="Maximal charge to search for" value="4" type="integer" optional="true"/> <param argument="-fi" label="Type of forward ion searched" value="b" type="text" optional="true"/> <param argument="-ri" label="Type of rewind ion searched" value="y" type="text" optional="true"/> <param argument="-min_isotope" label="Minimum precursor isotope" value="0" type="integer" optional="true"/> <param argument="-max_isotope" label="Maximum precursor isotope" value="1" type="integer" optional="true"/> </section> <section name="spectrum_annotation" expanded="False" title="Spectrum Annotation"> <param argument="-annotation_level" label="The intensity threshold to consider for annotation" value="0.75" type="float" optional="true" help="Using percentiles, 0.75 means that the 25% most intense peaks will be annotated."/> <param argument="-annotation_high_resolution" label="If true the most accurate peak will be selected within the m/z tolerance." truevalue="1" falsevalue="0" type="boolean" checked="true"/> </section> <section name="sequence_matching" expanded="False" title="Sequence Matching"> <param argument="-sequence_index_type" label="sequence_index_type (deprecated)" value="0" type="integer" optional="true"/> <param argument="-sequence_matching_type" label="The peptide to protein sequence matching type" type="select"> <option value="0">Character Sequence</option> <option value="1">Amino Acids</option> <option selected="true" value="2">Indistinguishable Amino Acids</option> </param> <param argument="-sequence_matching_x" label="The maximal share of Xs in a sequence, 0.25 means 25% of X's" value="0.25" type="float" optional="true"/> </section> <section name="import_filters" expanded="False" title="Import Filters"> <param argument="-import_peptide_length_min" label="The minimal peptide length to consider when importing identification files" value="8" type="integer" optional="true"/> <param argument="-import_peptide_length_max" label="The maximal peptide length to consider when importing identification files" value="30" type="integer" optional="true"/> <param argument="-import_precursor_mz_ppm" label="Maximal precursor ion deviation unit" type="select"> <option selected="true" value="0">Da</option> <option value="1">ppm</option> </param> <param argument="-exclude_unknown_ptms" label="Peptides presenting unrecognized PTMs will be excluded" truevalue="1" falsevalue="0" type="boolean" checked="true"/> </section> <section name="ptm_localization" expanded="False" title="PTM Localization"> <param argument="-ptm_score" label="The PTM probabilistic score to use for modification localization" type="select"> <option value="0">A-score</option> <option selected="true" value="1">PhosphoRS</option> <option value="2">None</option> </param> <param argument="-score_neutral_losses" label="Include neutral losses in spectrum annotation of the PTM score" truevalue="1" falsevalue="0" type="boolean" checked="false"/> <param argument="-ptm_sequence_matching_type" label="The modification to peptide sequence matching type" type="select"> <option value="0">Character Sequence</option> <option selected="true" value="1">Amino Acids</option> <option value="2">Indistinguishable Amino Acids</option> </param> <param argument="-ptm_alignment" label="Align peptide ambiguously localized PTMs on confident sites" truevalue="1" falsevalue="0" type="boolean" checked="true"/> </section> <section name="gene_annotation" expanded="False" title="Gene Annotation"> <param argument="-useGeneMapping" label="Use and save gene mappings along with the project" truevalue="1" falsevalue="0" type="boolean" checked="true" help="UniProt databases only"/> <param argument="-updateGeneMapping" label="Update gene mappings automatically from Ensembl" truevalue="1" falsevalue="0" type="boolean" checked="true" help="UniProt databases only"/> </section> <section name="protein_inference" expanded="False" title="Protein Inference"> <param argument="-simplify_groups" label="Simplify protein groups" truevalue="1" falsevalue="0" type="boolean" checked="true"/> <param argument="-simplify_score" label="Simplify protein groups based on the PeptideShaker target/decoy score" truevalue="1" falsevalue="0" type="boolean" checked="true"/> <param argument="-simplify_enzymaticity" label="Simplify protein groups based on the peptide enzymaticity" truevalue="1" falsevalue="0" type="boolean" checked="true"/> <param argument="-simplify_evidence" label="Simplify protein groups based on the UniProt protein evidence" truevalue="1" falsevalue="0" type="boolean" checked="true"/> <param argument="-simplify_uncharacterized" label="Simplify protein groups based on the protein characterization" truevalue="1" falsevalue="0" type="boolean" checked="true"/> </section> <section name="validation_levels" expanded="False" title="Validation Levels"> <param argument="-psm_fdr" label="FDR at the PSM level in percent" value="1" type="integer" optional="true"/> <param argument="-peptide_fdr" label="FDR at the peptide level in percent" value="1" type="integer" optional="true"/> <param argument="-protein_fdr" label="FDR at the protein level in percent" value="1" type="integer" optional="true"/> <param argument="-group_psms" label="Group PSMs by charge for scoring and validation" truevalue="1" falsevalue="0" type="boolean" checked="true"/> <param argument="-group_peptides" label="Group peptides by modification status for scoring and validation" truevalue="1" falsevalue="0" type="boolean" checked="true"/> <param argument="-merge_subgroups" label="Merge small PSM and peptide groups for scoring and validation" truevalue="1" falsevalue="0" type="boolean" checked="true"/> </section> <section name="fraction_analysis" expanded="False" title="Fraction Analysis"> <param argument="-protein_fraction_mw_confidence" label="Minimum confidence required for a protein in the fraction MW plot" value="95.0" type="float" optional="true"/> </section> <section name="directag" expanded="False" title="DirecTag"> <param argument="-directag_tic_cutoff" label="TIC cutoff in percent" value="85" type="integer" optional="true"/> <param argument="-directag_max_peak_count" label="Max peak count" value="400" type="integer" optional="true"/> <param argument="-directag_intensity_classes" label="Number of intensity classes" value="3" type="integer" optional="true"/> <param argument="-directag_adjust_precursor" label="Adjust precursor" truevalue="1" falsevalue="0" type="boolean" checked="false"/> <param argument="-directag_min_adjustment" label="Minimum precursor adjustment" value="-2.5" type="float" optional="true"/> <param argument="-directag_max_adjustment" label="Maximum precursor adjustment" value="2.5" type="float" optional="true"/> <param argument="-directag_adjustment_step" label="Precursor adjustment step" value="0.1" type="float" optional="true"/> <param argument="-directag_charge_states" label="Number of charge states considered" value="3" type="integer" optional="true"/> <param argument="-directag_ms_charge_state" label="Use charge state from M spectrum" truevalue="1" falsevalue="0" type="boolean" checked="false"/> <param argument="-directag_duplicate_spectra" label="Duplicate spectra per charge" truevalue="1" falsevalue="0" type="boolean" checked="true"/> <param argument="-directag_deisotoping" label="Deisotoping mode" type="select"> <option selected="true" value="0">No deisotoping</option> <option value="1">Precursor only</option> <option value="2">Precursor and candidate</option> </param> <param argument="-directag_isotope_tolerance" label="Isotope mz tolerance" value="0.25" type="float" optional="true"/> <param argument="-directag_complement_tolerance" label="Complement mz tolerance" value="0.5" type="float" optional="true"/> <param argument="-directag_tag_length" label="Tag length" value="4" type="integer" optional="true"/> <param argument="-directag_max_var_mods" label="Maximum variable modifications per sequence" value="2" type="integer" optional="true"/> <param argument="-directag_max_tag_count" label="Maximum tag count" value="5" type="integer" optional="true"/> <param argument="-directag_intensity_weight" label="Intensity score weight" value="1.0" type="float" optional="true"/> <param argument="-directag_fidelity_weight" label="Fidelity score weight" value="1.0" type="float" optional="true"/> <param argument="-directag_complement_weight" label="Complement score weight" value="1.0" type="float" optional="true"/> </section> </inputs> <outputs> <data name="output_fasta" format="fasta" from_work_dir="metanovo/metanovo.fasta" label="${tool.name} on ${on_string}: FASTA"/> <data name="output_csv" format="csv" from_work_dir="metanovo/metanovo.csv" label="${tool.name} on ${on_string}: CSV"/> </outputs> <tests> <test expect_num_outputs="2"> <param name="input_mgf" value="sample_data_1.mgf" ftype="mgf"/> <param name="input_fasta" value="sample_fasta_single.fasta" ftype="fasta"/> <output name="output_csv" ftype="csv"> <assert_contents> <!-- Check header. --> <has_text text=",index,Accession,Record,ID,PeptideCount,Peptides,ScanCount,Scans,Organism,Length,File,Sample sample_data_1 (msms),SAF sample_data_1,NSAF sample_data_1,Summed_NSAF,Protein_Prob,Organism_Prob,MSMS_Percent,Combined_Prob"/> </assert_contents> </output> <output name="output_fasta" ftype="fasta" file="sample_output_single.fasta"/> </test> <test expect_num_outputs="2"> <param name="type" value="collection"/> <param name="input_mgf_collection"> <collection type="list"> <element name="sample_data_1.mgf" value="sample_data_1.mgf" /> <element name="sample_data_2.mgf" value="sample_data_2.mgf" /> </collection> </param> <param name="input_fasta" value="sample_fasta_collection.fasta" ftype="fasta"/> <output name="output_csv" ftype="csv"> <assert_contents> <!-- Check header. --> <has_text text=",index,Accession,File,ID,Length,Organism,PeptideCount,Peptides,Record,SAF sample_data_1,SAF sample_data_2,Sample sample_data_1 (msms),Sample sample_data_2 (msms),ScanCount,Scans,NSAF sample_data_1,NSAF sample_data_2,Summed_NSAF,Protein_Prob,Organism_Prob,MSMS_Percent,Combined_Prob"/> </assert_contents> </output> <output name="output_fasta" ftype="fasta" file="sample_output_collection.fasta"/> </test> </tests> <help><![CDATA[ **MetaNovo** MetaNovo searches MS/MS data against a FASTA database of known proteins. Two outputs are produced: - MetaNovo Output FASTA: the matching proteins produced by the search. - MetaNovo Output CSV: information about the job and other useful metadata. Two inputs are required: an MGF file or files and a FASTA database file. Two different input types are available for the MGF input. The correct input configuration depends on the desired use case, as outlined below: ======================================================= ============= Use case Configuration ======================================================= ============= Single input MGF file, single output FASTA file **Single file** input with **Single dataset** selected Multiple input MGF files, multiple output FASTA files\* **Single file** input with **Multiple datasets** OR **Dataset collection** selected Multiple input MGF files, single output FASTA file **Collection** input ======================================================= ============= **\*** One for each MGF file. In the second use case, a separate MetaNovo job is spawned for each input MGF. In the third use case, a single MetaNovo job runs with all MGF files in the collection as input. If the third use case fails due to memory limitations, users are recommended to use the second option. The multiple output FASTA databases may be merged to generate a reduced, compact database. ]]> </help> <citations> <citation type="doi">10.1101/605550</citation> </citations> </tool>