Mercurial > repos > edward-kirton > roche454_toolsuite
changeset 0:f036c7107601
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author | edward-kirton |
---|---|
date | Tue, 07 Jun 2011 17:50:32 -0400 |
parents | |
children | 368a6ebebdde |
files | roche454/README roche454/runAssembly.xml roche454/runAssembly_cDNA.xml roche454/runAssembly_cDNA_wrapper.pl roche454/runAssembly_wrapper.pl roche454/runMapping.xml roche454/runMapping_cDNA.xml roche454/runMapping_cDNA_wrapper.pl roche454/runMapping_wrapper.pl roche454/sff_to_fastq.xml roche454/sff_to_fastq_converter.pl roche454/sfffile.xml roche454/suite_config.xml |
diffstat | 13 files changed, 1416 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roche454/README Tue Jun 07 17:50:32 2011 -0400 @@ -0,0 +1,6 @@ +- Newbler requires a license and the executables are not included here +- the Newbler tool config files include options to support multiple versions of Newbler; you'll need to add the paths to +the XML file if you wish to use this feature. Or, just remove this parameter entirely. Note the wrapper is expecting the +executable as the first argument. +- the SFF->Fastq converter requires the executable "fasta_qual_to_fastq" is installed; replace with whatever converter +you have installed in the perl script.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roche454/runAssembly.xml Tue Jun 07 17:50:32 2011 -0400 @@ -0,0 +1,215 @@ +<tool id="runAssembly" name="runAssembly" version="1.0.0" force_history_refresh='True'> +<description>De novo assembly of Roche/454 reads using Newbler</description> +<command interpreter='perl'>runAssembly_wrapper.pl +$newbler_metrics.extra_files_path +$newbler_metrics +$read_status +$trimmed_reads_fasta +$trimmed_reads_qual +$alignment_info +$all_contigs_fasta +$all_contigs_qual +$contigs_ace +$contigs_consed_ace +$contig_graph +$pair_align +$pair_status +$scaffolds_fasta +$scaffolds_qual +$scaffolds_agp +$tag_pair_align +$trim_status +$large_contigs_fasta +$large_contigs_qual +$newbler_exe -o $newbler_metrics.extra_files_path +-cpu 8 +$rip +-e $e +-mi $mi +-ml $ml +-minlen $minlen +$large +$pair +$info +$notrim +$tr +$ace +$no +$qo +$nor +$ud +-ss $ss +-sl $sl +-sc $sc +-ais $ais +-a $a +-mcf $mcf +-vs $vs +-vt $vt +-fi $fi +-fe $fe +-l $l +#for $i in $sff_paired_inputs +-p ${i.sff_paired_input} +#end for +#for $i in $sanger_paired_inputs +-p ${i.sanger_paired_input} +#end for +#for $i in $sff_inputs +${i.sff_input} +#end for +#for $i in $sanger_inputs +${i.sanger_input} +#end for +</command> +<inputs> + <!-- NEWBLER VERSION --> + <param name='newbler_exe' type='select' display='radio' label='Newbler version'> + <option value='runAssembly' selected='true'>default</option> + <!-- OTHER VERSIONS MAY BE INCLUDED HERE; OR SIMPLY EDIT TO REMOVE THIS ENTIRE SECTION + <option value='/jgi/tools/454/rig-DataProcessing_2.3/bin/runAssembly'>2.3</option> + <option value='/jgi/tools/454/rig-DataProcessing_2.4pre-20091204/bin/runAssembly'>2.4</option> + <option value='/home/copeland/local/x86_64/newbler/v2.5p1-internal-10Jun23-1/runAssembly'>2.5</option> + --> + </param> + + <!-- READSEQ INFILES --> + <repeat name="sff_inputs" title="Unpaired Reads Sff Files"> + <param name="sff_input" type="data" format="sff" label="SE Sff file"/> + </repeat> + <repeat name="sanger_inputs" title="Unpaired Reads Fasta Files"> + <param name="sanger_input" type="data" format="fasta" label="SE Fasta file"/> + </repeat> + <repeat name="sff_paired_inputs" title="Paired Reads Sff Files"> + <param name="sff_paired_input" type="data" format="sff" label="PE Sff file"/> + </repeat> + <repeat name="sanger_paired_inputs" title="Paired Reads Fasta Files"> + <param name="sanger_paired_input" type="data" format="fasta" label="PE Fasta file"/> + </repeat> + <param name='paired_reads' type='select' display='radio' label='[-paired_reads] If supplying paired reads (above), do you want paired-read info?'> + <option value='false'>no</option> + <option value='true'>[-paired_reads] yes</option> + </param> + <param name='pair' type='select' display='radio' label='[-pair] Output pairwise overlaps'> + <option value=''>no</option> + <option value='-pair'>[-pair] yes</option> + </param> + + <param name='l' type="integer" value='500' label="[-l] This option sets the minimum length for a contig to appear in the 454LargeContigs.fna file"/> + + <!-- OPTIONAL ARGUMENTS --> + <param name='mcf' type='data' format='tabular' optional='true' label='[-mcf] Specify non-default MID config file' /> + <param name='fi' type='data' format='txt' optional='true' label='[-fi] Include filter file to be specified' /> + <param name='fe' type='data' format='txt' optional='true' label='[-fe] Exclude filter file to be specified' /> + <param name='vt' type='data' format='fasta' optional='true' label="[-vt] This option specifies a vector trimming database, or FASTA file of sequences to be used to trim the ends of input reads (for cloning vectors, primers, adapters or other end sequences)" /> + <param name='vs' type='data' format='fasta' optional='true' label="[-vs] This option specifies a vector screening database, or FASTA file of sequences to be used to screen the input reads for contaminants. Reads that completely align against the screening database are trimmed completely (so that it is not used in the computation), but otherwise the read trimpoints are not changed" /> + + <!-- READ TRIMMING --> + <param name='minlen' type='integer' value='20' label='[-minlen] Minimum length of reads to use (15-45 allowed)'/> + <param name='notrim' type='boolean' truevalue='-notrim' falsevalue='' checked='false' label='[-notrim] Do not perform default quality and primer trimming of input reads'/> + <param name='tr' type='select' display='radio' label='[-tr] Output trimmed reads'> + <option value=''>no</option> + <option value='-tr'>[-tr] yes</option> + </param> + <param name='nor' type='boolean' truevalue='-nor' falsevalue='' label='[-nor] Turn off the automatic rescore function for read quality scores'/> + <param name='ud' type='boolean' truevalue='-ud' falsevalue='' label='[-ud] Treat each read separately, with no grouping of duplicates'/> + + <!-- ALIGNMENT PARAMETERS --> + <param name='ss' type='integer' value='12' label='[-ss] Seed step parameter - The number of bases between seed generation locations used in the exact k-mer matching part of the overlap detection. Allow values: 1 or greater'/> + <param name='sl' type='integer' value='16' label='[-sl] Seed length parameter - The number of bases used for each seed in the exact k-mer matching part of the overlap detection (i.e. the "k" value of the k-mer matching). Allowed values: 6-16'/> + <param name='sc' type='integer' value='1' label='[-sc] Seed count parameter - The number of seeds required in a window before an extension is made. Allowed values: 1 or greater'/> + <param name='ml' type="text" value='40' label="[-ml] Minimum overlap length - The minimum length of overlaps used for the pairwise alignment step. The value can either be a minimum length in bases or a percentage of read length. In the case of a percentage, simply include '%' immediately following the numeric value. Allowed values: 1 or greater"/> + <param name='mi' type="integer" value='90' label="[-mi] Minimum overlap identity - The percent identity of overlaps used for the pairwise alignment step. Allowed values: 0 or greater"/> + <param name='ais' type='integer' value='2' label='[-ais] Alignment identity score - When multiple overlaps are found, the per-overlap column identity score used to sort the overlaps for use in the progressive alignment. Allowed values: 0 or greater'/> + + <!-- ASSEMBLY OPTIONS --> + <param name='e' type="integer" value='0' label="[-e] This option tells the assembler that the expected depth of the data is at a certain level. The assembler has been optimized for datasets in the 10-50x oversampling size, and this option helps the assembler with datasets that have a higher oversampling level. A value of 0 resets the assembler computation to use its default algorithms"/> + <param name='large' type='boolean' truevalue='-large' falsevalue='' checked='false' label='[-large] Check if large or complex genome'/> + + <!-- OUTPUT OPTIONS --> + <param name='no' type='select' display='radio' label='[-no] Do complete assembly'> + <option value=''>do complete assembly</option> + <option value='-no'>[-no] do not assemble; do alignments only</option> + </param> + <param name='qo' type='boolean' truevalue='' falsevalue='-qo' checked='false' label='[-qo] Generate quick output for mapping and assembly. Disables signal distribution computation for calling consensus sequences and can decrease accuracy'/> + <param name='a' type="integer" value='100' label="[-a] This option sets the minimum length for a contig to appear in the 454AllContigs.fna file."/> + <param name='rip' type='boolean' truevalue='' falsevalue='-rip' checked='false' label='[-rip] Output each read in only one contig'/> + <param name='info' type='select' display='radio' label='Output Alignment Info'> + <option value='-info'>[-info] yes</option> + <option value='-infoall'>[-infoall] yes, including 0-coverage positions</option> + </param> + <param name='ace' type='select' display='radio' label='Produce Ace assembly file'> + <option value=''>no</option> + <option value='-ace'>[-ace] yes</option> + <option value='-ace -consed'>[-consed] yes, in consed dir</option> + </param> +</inputs> + +<outputs> + <data name='newbler_metrics' format='txt' /> + <data name='read_status' format='tabular' label='Read Status'/> + <data name='trimmed_reads_fasta' format='fasta' label='Trimmed Reads (Fasta)'> + <filter>tr == '-tr'</filter> + </data> + <data name='trimmed_reads_qual' format='qual454' label='Trimmed Reads (Qual)'> + <filter>tr == '-tr'</filter> + </data> + <!-- the following produced only if no != '-no' --> + <data name='alignment_info' format='tabular' label='Alignment Info'> + <filter>no != '-no'</filter> + </data> + <data name='all_contigs_fasta' format='fasta' label='All Contigs (Fasta)'> + <filter>no != '-no'</filter> + </data> + <data name='all_contigs_qual' format='qual454' label='All Contigs (Qual454)'> + <filter>no != '-no'</filter> + </data> + <data name='contigs_ace' format='ace' label='Contigs (Ace)'> + <filter>ace == '-ace' and no != '-no'</filter> + </data> + <data name='contigs_consed_ace' format='ace' label='Contigs (Consed/Ace)'> + <filter>ace == '-ace -consed' and no != '-no'</filter> + </data> + + <data name='contig_graph' format='txt' label='Contig Graph'/> + <data name='large_contigs_fasta' format='fasta' label='Large Contigs (Fasta)'> + <filter>no != '-no'</filter> + </data> + <data name='large_contigs_qual' format='qual454' label='Large Contigs (Qual454)'> + <filter>no != '-no'</filter> + </data> + <data name='pair_align' format='txt' label='Pairwise Alignments'> + <filter>pair == '-pair' and no != '-no'</filter> + </data> + <data name='pair_status' format='tabular' label='Paired-End Read Status'> + <filter>paired_reads == 'true' and no != '-no'</filter> + </data> + <data name='scaffolds_fasta' format='fasta' label='Scaffolds (Fasta)'> + <filter>paired_reads == 'true' and no != '-no'</filter> + </data> + <data name='scaffolds_qual' format='qual454' label='Scaffolds (Qual454)'> + <filter>paired_reads == 'true' and no != '-no'</filter> + </data> + <data name='scaffolds_agp' format='tabular' label='Scaffolds (Agp)'> + <filter>paired_reads == 'true' and no != '-no'</filter> + </data> + <data name='tag_pair_align' format='txt' label='Tag Pair Alignments'> + <filter>pair == '-pair' and paired_reads == 'true' and no != '-no'</filter> + </data> + <data name='trim_status' format='tabular' label='Trim Status'/> + +</outputs> +<help> + +**What it does** + +Assemble (Roche/454) reads using Newbler. + +Download the manual here: http://galaxy.jgi-psf.org/static/manuals/GSFLXSystemSoftwareManual_PartC_Assembler-Mapper-SFFTools.pdf + +.. class:: warningmark + +**Fasta Header Format** Fasta input must provide any pairing information in the header using the expected key=value format. Use the 'Sanger tab to Newbler Fasta' tool. + +</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roche454/runAssembly_cDNA.xml Tue Jun 07 17:50:32 2011 -0400 @@ -0,0 +1,235 @@ +<tool id="runAssembly_cDNA" name="runAssembly cDNA" version="1.0.0" force_history_refresh='True'> +<description>De novo assembly of Roche/454 cDNA reads using Newbler</description> +<command interpreter='perl'>runAssembly_cDNA_wrapper.pl +$newbler_metrics.extra_files_path +$newbler_metrics +$read_status +$trimmed_reads_fasta +$trimmed_reads_qual +$alignment_info +$all_contigs_fasta +$all_contigs_qual +$contigs_ace +$contigs_consed_ace +$contig_graph +$pair_align +$pair_status +$scaffolds_fasta +$scaffolds_qual +$scaffolds_agp +$tag_pair_align +$trim_status +$isotigs_ace +$isotigs_fasta +$isotigs_qual +$isotigs_agp +$isotigs_layout +$newbler_exe -o $newbler_metrics.extra_files_path +-cpu 8 +$rip +-e $e +-mi $mi +-ml $ml +-minlen $minlen +$large +$pair +$info +$notrim +$tr +$ace +$no +$qo +$nor +$ud +-ss $ss +-sl $sl +-sc $sc +-ais $ais +-a $a +-mcf $mcf +-vs $vs +-vt $vt +-fi $fi +-fe $fe +-cdna +-ig $ig +-it $it +-icc $icc +-icl $icl +#for $i in $sff_paired_inputs +-p ${i.sff_paired_input} +#end for +#for $i in $sanger_paired_inputs +-p ${i.sanger_paired_input} +#end for +#for $i in $sff_inputs +${i.sff_input} +#end for +#for $i in $sanger_inputs +${i.sanger_input} +#end for +</command> +<inputs> + <!-- NEWBLER VERSION --> + <param name='newbler_exe' type='select' display='radio' label='Newbler version'> + <option value='runAssembly' selected='true'>default</option> + <!-- OTHER VERSIONS MAY BE INCLUDED HERE; OR SIMPLY EDIT TO REMOVE THIS ENTIRE SECTION + <option value='/jgi/tools/454/rig-DataProcessing_2.3/bin/runAssembly'>2.3</option> + <option value='/jgi/tools/454/rig-DataProcessing_2.4pre-20091204/bin/runAssembly'>2.4</option> + <option value='/home/copeland/local/x86_64/newbler/v2.5p1-internal-10Jun23-1/runAssembly'>2.5</option> + --> + </param> + + <!-- READSEQ INFILES --> + <repeat name="sff_inputs" title="Unpaired Reads Sff Files"> + <param name="sff_input" type="data" format="sff" label="SE Sff file"/> + </repeat> + <repeat name="sanger_inputs" title="Unpaired Reads Fasta Files"> + <param name="sanger_input" type="data" format="fasta" label="SE Fasta file"/> + </repeat> + <repeat name="sff_paired_inputs" title="Paired Reads Sff Files"> + <param name="sff_paired_input" type="data" format="sff" label="PE Sff file"/> + </repeat> + <repeat name="sanger_paired_inputs" title="Paired Reads Fasta Files"> + <param name="sanger_paired_input" type="data" format="fasta" label="PE Fasta file"/> + </repeat> + <param name='paired_reads' type='select' display='radio' label='[-paired_reads] If supplying paired reads (above), do you want paired-read info?'> + <option value='false'>no</option> + <option value='true'>[-paired_reads] yes</option> + </param> + <param name='pair' type='select' display='radio' label='[-pair] Output pairwise overlaps'> + <option value=''>no</option> + <option value='-pair'>[-pair] yes</option> + </param> + + <param name='it' type='integer' value='100' label='[-it] Specify the maximum number of isotigs in an isogroup. Maximum is 10,000.'/> + <param name='ig' type='integer' value='500' label='[-ig] Specify the maximum number of contigs in an isogroup.'/> + <param name='icc' type='integer' value='100' label='[-icc] Specify the maximum number of contigs in an isotig. Maximum is 200 and corresponds to the recursion depth during graph traversal'/> + <param name='icl' type='integer' value='3' label='[-icl] Specify the minimum length a contig must be to be part of an isotig. Minimum is 3bp.'/> + + <!-- OPTIONAL ARGUMENTS --> + <param name='mcf' type='data' format='tabular' optional='true' label='[-mcf] Specify non-default MID config file' /> + <param name='fi' type='data' format='txt' optional='true' label='[-fi] Include filter file to be specified' /> + <param name='fe' type='data' format='txt' optional='true' label='[-fe] Exclude filter file to be specified' /> + <param name='vt' type='data' format='fasta' optional='true' label="[-vt] This option specifies a vector trimming database, or FASTA file of sequences to be used to trim the ends of input reads (for cloning vectors, primers, adapters or other end sequences)" /> + <param name='vs' type='data' format='fasta' optional='true' label="[-vs] This option specifies a vector screening database, or FASTA file of sequences to be used to screen the input reads for contaminants. Reads that completely align against the screening database are trimmed completely (so that it is not used in the computation), but otherwise the read trimpoints are not changed" /> + + <!-- READ TRIMMING --> + <param name='minlen' type='integer' value='20' label='[-minlen] Minimum length of reads to use (15-45 allowed)'/> + <param name='notrim' type='boolean' truevalue='-notrim' falsevalue='' checked='false' label='[-notrim] Do not perform default quality and primer trimming of input reads'/> + <param name='tr' type='select' display='radio' label='[-tr] Output trimmed reads'> + <option value=''>no</option> + <option value='-tr'>[-tr] yes</option> + </param> + <param name='nor' type='boolean' truevalue='-nor' falsevalue='' label='[-nor] Turn off the automatic rescore function for read quality scores'/> + <param name='ud' type='boolean' truevalue='-ud' falsevalue='' label='[-ud] Treat each read separately, with no grouping of duplicates'/> + + <!-- ALIGNMENT PARAMETERS --> + <param name='ss' type='integer' value='12' label='[-ss] Seed step parameter - The number of bases between seed generation locations used in the exact k-mer matching part of the overlap detection. Allow values: 1 or greater'/> + <param name='sl' type='integer' value='16' label='[-sl] Seed length parameter - The number of bases used for each seed in the exact k-mer matching part of the overlap detection (i.e. the "k" value of the k-mer matching). Allowed values: 6-16'/> + <param name='sc' type='integer' value='1' label='[-sc] Seed count parameter - The number of seeds required in a window before an extension is made. Allowed values: 1 or greater'/> + <param name='ml' type="text" value='40' label="[-ml] Minimum overlap length - The minimum length of overlaps used for the pairwise alignment step. The value can either be a minimum length in bases or a percentage of read length. In the case of a percentage, simply include '%' immediately following the numeric value. Allowed values: 1 or greater"/> + <param name='mi' type="integer" value='90' label="[-mi] Minimum overlap identity - The percent identity of overlaps used for the pairwise alignment step. Allowed values: 0 or greater"/> + <param name='ais' type='integer' value='2' label='[-ais] Alignment identity score - When multiple overlaps are found, the per-overlap column identity score used to sort the overlaps for use in the progressive alignment. Allowed values: 0 or greater'/> + + <!-- ASSEMBLY OPTIONS --> + <param name='e' type="integer" value='0' label="[-e] This option tells the assembler that the expected depth of the data is at a certain level. The assembler has been optimized for datasets in the 10-50x oversampling size, and this option helps the assembler with datasets that have a higher oversampling level. A value of 0 resets the assembler computation to use its default algorithms"/> + <param name='large' type='boolean' truevalue='-large' falsevalue='' checked='false' label='[-large] Check if large or complex genome'/> + + <!-- OUTPUT OPTIONS --> + <param name='no' type='select' display='radio' label='[-no] Do complete assembly'> + <option value=''>do complete assembly</option> + <option value='-no'>[-no] do not assemble; do alignments only</option> + </param> + <param name='qo' type='boolean' truevalue='' falsevalue='-qo' checked='false' label='[-qo] Generate quick output for mapping and assembly. Disables signal distribution computation for calling consensus sequences and can decrease accuracy'/> + <param name='a' type="integer" value='100' label="[-a] This option sets the minimum length for a contig to appear in the 454AllContigs.fna file."/> + <param name='rip' type='boolean' truevalue='' falsevalue='-rip' checked='false' label='[-rip] Output each read in only one contig'/> + <param name='info' type='select' display='radio' label='Output Alignment Info'> + <option value='-info'>[-info] yes</option> + <option value='-infoall'>[-infoall] yes, including 0-coverage positions</option> + </param> + <param name='ace' type='select' display='radio' label='Produce Ace assembly file'> + <option value=''>no</option> + <option value='-ace'>[-ace] yes</option> + <option value='-ace -consed'>[-consed] yes, in consed dir</option> + </param> +</inputs> + +<outputs> + <data name='newbler_metrics' format='txt' /> + <data name='read_status' format='tabular' label='Read Status'/> + <data name='trimmed_reads_fasta' format='fasta' label='Trimmed Reads (Fasta)'> + <filter>tr == '-tr'</filter> + </data> + <data name='trimmed_reads_qual' format='qual454' label='Trimmed Reads (Qual)'> + <filter>tr == '-tr'</filter> + </data> + <!-- the following produced only if no != '-no' --> + <data name='alignment_info' format='tabular' label='Alignment Info'> + <filter>no != '-no'</filter> + </data> + <data name='all_contigs_fasta' format='fasta' label='All Contigs (Fasta)'> + <filter>no != '-no'</filter> + </data> + <data name='all_contigs_qual' format='qual454' label='All Contigs (Qual454)'> + <filter>no != '-no'</filter> + </data> + <data name='contigs_ace' format='ace' label='Contigs (Ace)'> + <filter>ace == '-ace' and no != '-no'</filter> + </data> + <data name='contigs_consed_ace' format='ace' label='Contigs (Consed/Ace)'> + <filter>ace == '-ace -consed' and no != '-no'</filter> + </data> + + <data name='contig_graph' format='txt' label='Contig Graph'/> + <data name='pair_align' format='txt' label='Pairwise Alignments'> + <filter>pair == '-pair' and no != '-no'</filter> + </data> + <data name='pair_status' format='tabular' label='Paired-End Read Status'> + <filter>paired_reads == 'true' and no != '-no'</filter> + </data> + <data name='scaffolds_fasta' format='fasta' label='Scaffolds (Fasta)'> + <filter>paired_reads == 'true' and no != '-no'</filter> + </data> + <data name='scaffolds_qual' format='qual454' label='Scaffolds (Qual454)'> + <filter>paired_reads == 'true' and no != '-no'</filter> + </data> + <data name='scaffolds_agp' format='tabular' label='Scaffolds (Agp)'> + <filter>paired_reads == 'true' and no != '-no'</filter> + </data> + <data name='tag_pair_align' format='txt' label='Tag Pair Alignments'> + <filter>pair == '-pair' and paired_reads == 'true' and no != '-no'</filter> + </data> + <data name='trim_status' format='tabular' label='Trim Status'/> + + <data name='isotigs_ace' format='ace' label='Isotigs (Ace)'> + <filter>ace != '' and no != '-no'</filter> + </data> + <data name='isotigs_fasta' format='fasta' label='Isotigs (Fasta)'> + <filter>no != '-no'</filter> + </data> + <data name='isotigs_qual' format='qual454' label='Isotigs (Qual454)'> + <filter>no != '-no'</filter> + </data> + <data name='isotigs_agp' format='tabular' label='Isotigs (Agp)'> + <filter>no != '-no'</filter> + </data> + <data name='isotigs_layout' format='txt' label='Isotig Layout'> + <filter>no != '-no'</filter> + </data> + +</outputs> +<help> + +**What it does** + +Assemble (Roche/454) reads using Newbler. + +Download the manual here: http://galaxy.jgi-psf.org/static/manuals/GSFLXSystemSoftwareManual_PartC_Assembler-Mapper-SFFTools.pdf + +.. class:: warningmark + +**Fasta Header Format** Fasta input must provide any pairing information in the header using the expected key=value format. Use the 'Sanger tab to Newbler Fasta' tool. + +</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roche454/runAssembly_cDNA_wrapper.pl Tue Jun 07 17:50:32 2011 -0400 @@ -0,0 +1,94 @@ +#!/usr/bin/env perl + +use warnings; +use strict; +use File::Copy; + +# EXPECT 23 FILE HANDLES, SOME OF WHICH MAY BE 'None' +my $outdir=shift @ARGV; +my $newbler_metrics=shift @ARGV; +my $read_status=shift @ARGV; +my $trimmed_reads_fasta=shift @ARGV; +my $trimmed_reads_qual=shift @ARGV; +my $alignment_info=shift @ARGV; +my $all_contigs_fasta=shift @ARGV; +my $all_contigs_qual=shift @ARGV; +my $contigs_ace=shift @ARGV; +my $contigs_consed_ace=shift @ARGV; +my $contig_graph=shift @ARGV; +my $pair_align=shift @ARGV; +my $pair_status=shift @ARGV; +my $scaffolds_fasta=shift @ARGV; +my $scaffolds_qual=shift @ARGV; +my $scaffolds_agp=shift @ARGV; +my $tag_pair_align=shift @ARGV; +my $trim_status=shift @ARGV; +my $isotigs_ace=shift @ARGV; +my $isotigs_fasta=shift @ARGV; +my $isotigs_qual=shift @ARGV; +my $isotigs_agp=shift @ARGV; +my $isotigs_layout=shift @ARGV; + +# REMOVE PARAMETERS FOR OPTIONAL FILES WHICH WERE NOT PROVIDED + +my @cmd=removeUnusedOptions(@ARGV); + +# RUN COMMAND +# NOTE: FIRST ARG EXPECTED TO BE EXECUTABLE +my $stderr; +eval { $stderr=`@cmd 2>&1`; }; +if ( $@ ) { + print STDERR "Newbler ERROR: $stderr\n"; + `cat $outdir/assembly/454NewblerProgress.txt 1>&2`; + die($@); +} + +get_outfile("$outdir/454NewblerMetrics.txt", $newbler_metrics); +get_outfile("$outdir/454ReadStatus.txt", $read_status); +get_outfile("$outdir/454TrimmedReads.fna", $trimmed_reads_fasta); +get_outfile("$outdir/454TrimmedReads.qual", $trimmed_reads_qual); +get_outfile("$outdir/454AlignmentInfo.tsv", $alignment_info); +get_outfile("$outdir/454AllContigs.fna", $all_contigs_fasta); +get_outfile("$outdir/454AllContigs.qual", $all_contigs_qual); +get_outfile("$outdir/454Contigs.ace", $contigs_ace); +get_outfile("$outdir/consed/edit_dir/454Contigs.ace.1", $contigs_consed_ace); +get_outfile("$outdir/454ContigGraph.txt", $contig_graph); +get_outfile("$outdir/454PairAlign.txt", $pair_align); +get_outfile("$outdir/454PairStatus.txt", $pair_status); +get_outfile("$outdir/454Scaffolds.fna", $scaffolds_fasta); +get_outfile("$outdir/454Scaffolds.qual", $scaffolds_qual); +get_outfile("$outdir/454Scaffolds.txt", $scaffolds_agp); +get_outfile("$outdir/454TagPairAlign.txt", $tag_pair_align); +get_outfile("$outdir/454TrimStatus.txt", $trim_status); +get_outfile("$outdir/454Isotigs.ace", $isotigs_ace); +get_outfile("$outdir/454Isotigs.fna", $isotigs_fasta); +get_outfile("$outdir/454Isotigs.qual", $isotigs_qual); +get_outfile("$outdir/454Isotigs.txt", $isotigs_agp); +get_outfile("$outdir/454IsotigsLayout.txt", $isotigs_layout); +exit; + +# EVERY 'None' ARG AND IT'S PRECEEDING OPTION TAG ARE DISCARDED +sub removeUnusedOptions { + my @cmd=(); + my $prev; + foreach (@_) { + unless ($_ eq 'None') { + push @cmd, $prev if defined($prev); + $prev=$_; + } else { + $prev=undef; + } + } + push @cmd, $prev if defined($prev); + return @cmd; +} + +sub get_outfile { + my ($src, $dest)=@_; + # make sure dest defined and src exist; skip if dest is 'None' + if ( $dest and $dest ne 'None' and $src and -f $src ) { + move($src,$dest); + } +} + +__END__
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roche454/runAssembly_wrapper.pl Tue Jun 07 17:50:32 2011 -0400 @@ -0,0 +1,88 @@ +#!/usr/bin/env perl + +use warnings; +use strict; +use File::Copy; + +# EXPECT 20 FILE HANDLES, SOME OF WHICH MAY BE 'None' +my $outdir=shift @ARGV; +my $newbler_metrics=shift @ARGV; +my $read_status=shift @ARGV; +my $trimmed_reads_fasta=shift @ARGV; +my $trimmed_reads_qual=shift @ARGV; +my $alignment_info=shift @ARGV; +my $all_contigs_fasta=shift @ARGV; +my $all_contigs_qual=shift @ARGV; +my $contigs_ace=shift @ARGV; +my $contigs_consed_ace=shift @ARGV; +my $contig_graph=shift @ARGV; +my $pair_align=shift @ARGV; +my $pair_status=shift @ARGV; +my $scaffolds_fasta=shift @ARGV; +my $scaffolds_qual=shift @ARGV; +my $scaffolds_agp=shift @ARGV; +my $tag_pair_align=shift @ARGV; +my $trim_status=shift @ARGV; +my $large_contigs_fasta=shift @ARGV; +my $large_contigs_qual=shift @ARGV; + +# REMOVE PARAMETERS FOR OPTIONAL FILES WHICH WERE NOT PROVIDED + +my @cmd=removeUnusedOptions(@ARGV); + +# RUN COMMAND +# NOTE: FIRST ARG EXPECTED TO BE EXECUTABLE +my $stderr; +eval { $stderr=`@cmd 2>&1`; }; +if ( $@ ) { + print STDERR "Newbler ERROR: $stderr\n"; + `cat $outdir/assembly/454NewblerProgress.txt 1>&2`; + die($@); +} + +get_outfile("$outdir/454NewblerMetrics.txt", $newbler_metrics); +get_outfile("$outdir/454ReadStatus.txt", $read_status); +get_outfile("$outdir/454TrimmedReads.fna", $trimmed_reads_fasta); +get_outfile("$outdir/454TrimmedReads.qual", $trimmed_reads_qual); +get_outfile("$outdir/454AlignmentInfo.tsv", $alignment_info); +get_outfile("$outdir/454AllContigs.fna", $all_contigs_fasta); +get_outfile("$outdir/454AllContigs.qual", $all_contigs_qual); +get_outfile("$outdir/454Contigs.ace", $contigs_ace); +get_outfile("$outdir/consed/edit_dir/454Contigs.ace.1", $contigs_consed_ace); +get_outfile("$outdir/454ContigGraph.txt", $contig_graph); +get_outfile("$outdir/454PairAlign.txt", $pair_align); +get_outfile("$outdir/454PairStatus.txt", $pair_status); +get_outfile("$outdir/454Scaffolds.fna", $scaffolds_fasta); +get_outfile("$outdir/454Scaffolds.qual", $scaffolds_qual); +get_outfile("$outdir/454Scaffolds.txt", $scaffolds_agp); +get_outfile("$outdir/454TagPairAlign.txt", $tag_pair_align); +get_outfile("$outdir/454TrimStatus.txt", $trim_status); +get_outfile("$outdir/454LargeContigs.fna", $large_contigs_fasta); +get_outfile("$outdir/454LargeContigs.qual", $large_contigs_qual); +exit; + +# EVERY 'None' ARG AND IT'S PRECEEDING OPTION TAG ARE DISCARDED +sub removeUnusedOptions { + my @cmd=(); + my $prev; + foreach (@_) { + unless ($_ eq 'None') { + push @cmd, $prev if defined($prev); + $prev=$_; + } else { + $prev=undef; + } + } + push @cmd, $prev if defined($prev); + return @cmd; +} + +sub get_outfile { + my ($src, $dest)=@_; + # make sure dest defined and src exist; skip if dest is 'None' + if ( $dest and $dest ne 'None' and $src and -f $src ) { + move($src,$dest); + } +} + +__END__
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roche454/runMapping.xml Tue Jun 07 17:50:32 2011 -0400 @@ -0,0 +1,240 @@ +<tool id="runMapping" name="runMapping" version="1.0.0"> +<description>Map Roche/454 reads to a reference using Newbler</description> +<command interpreter='perl'>runMapping_wrapper.pl +$newbler_metrics.extra_files_path +$alignment_info +$all_contigs_fasta +$all_contigs_qual +$all_diffs +$all_struct_vars +$hc_diff +$hc_struct_vars +$mapping_qc +$newbler_metrics +$pair_align +$read_status +$ref_status +$tag_pair_align +$trim_status +$trimmed_reads_fasta +$trimmed_reads_qual +$contigs_ace +$large_contigs_fasta +$large_contigs_qual +$gene_status +$newbler_exe -o $newbler_metrics.extra_files_path +-cpu 8 +-a $a +-e $e +-mi $mi +-ml $ml +-minlen $minlen +$pair +$info +$notrim +$tr +$ace +$no +$qo +$nor +$ud +-ss $ss +-sl $sl +-sc $sc +-ais $ais +-rst $rst +-hsl $hsl +-mcf $mcf +-vs $vs +-vt $vt +-fi $fi +-fe $fe +-l $l +$ref_type +-ref +#for $i in $ref_inputs +${i.ref_input} +#end for +-read +#for $i in $sff_paired_inputs +-p ${i.sff_paired_input} +#end for +#for $i in $sanger_paired_inputs +-p ${i.sanger_paired_input} +#end for +#for $i in $sff_inputs +${i.sff_input} +#end for +#for $i in $sanger_inputs +${i.sanger_input} +#end for +</command> +<inputs> + <!-- NEWBLER VERSION --> + <param name='newbler_exe' type='select' display='radio' label='Newbler version'> + <option value='runMapping' selected='true'>default</option> + <!-- OTHER VERSIONS MAY BE INCLUDED HERE; OR SIMPLY EDIT TO REMOVE THIS ENTIRE SECTION + <option value='/jgi/tools/454/rig-DataProcessing_2.3/bin/runMapping'>2.3</option> + <option value='/jgi/tools/454/rig-DataProcessing_2.4pre-20091204/bin/runMapping'>2.4</option> + <option value='/home/copeland/local/x86_64/newbler/v2.5p1-internal-10Jun23-1/runMapping'>2.5</option> + --> + </param> + + <!-- READSEQ INFILES --> + <repeat name="sff_inputs" title="Unpaired Reads Sff Files"> + <param name="sff_input" type="data" format="sff" label="SE Sff file"/> + </repeat> + <repeat name="sanger_inputs" title="Unpaired Reads Fasta Files"> + <param name="sanger_input" type="data" format="fasta" label="SE Fasta file"/> + </repeat> + <repeat name="sff_paired_inputs" title="Paired Reads Sff Files"> + <param name="sff_paired_input" type="data" format="sff" label="PE Sff file"/> + </repeat> + <repeat name="sanger_paired_inputs" title="Paired Reads Fasta Files"> + <param name="sanger_paired_input" type="data" format="fasta" label="PE Fasta file"/> + </repeat> + <param name='paired_reads' type='select' display='radio' label='[-paired_reads] If supplying paired reads (above), do you want paired-read info?'> + <option value='false'>no</option> + <option value='true'>[-paired_reads] yes</option> + </param> + <param name='pair' type='select' display='radio' label='[-pair] Output pairwise overlaps'> + <option value=''>no</option> + <option value='-pair'>[-pair] yes</option> + </param> + + <!-- SOURCE DNA TYPE --> + <param name='l' type="integer" value='500' label="[-l] This option sets the minimum length for a contig to appear in the 454LargeContigs.fna file"/> + + <!-- INPUTS SPECIFIC TO MAPPING --> + <repeat name='ref_inputs' title='Reference Sequence'> + <param name='ref_input' type='data' format='fasta' label='Fasta file'/> + </repeat> + <param name='ref_type' type='select' display='radio' label='Reference type'> + <option value='-gref'>[-gref] Genomic reference sequence</option> + <option value='-cref'>[-cref] cDNA reference sequence</option> + </param> + <param name='rst' type='integer' value='12' label='[-rst] Repeat score threshold parameter. Allowed values: 0 or greater'/> + <param name='hsl' type='integer' value='70' label='[-hsl] Hit-per-seed limit parameter'/> + <param name='srv' type='boolean' truevalue='-srv' falsevalue='' checked='false' label='[-srv] Single read variant output'/> + + <!-- OPTIONAL ARGUMENTS --> + <!-- NYI + <param name='accno' type='data' format='tabular' optional='true' label='[-accno] Specify annotation data. Required only if reference sequence headers do not contain gene=NAME pairs' /> + <param name='annot' type='data' format='tabular' optional='true' label='[-annot] Supply gene, transcript, and protein information' /> + --> + <param name='mcf' type='data' format='tabular' optional='true' label='[-mcf] Specify non-default MID config file' /> + <param name='fi' type='data' format='txt' optional='true' label='[-fi] Include filter file to be specified' /> + <param name='fe' type='data' format='txt' optional='true' label='[-fe] Exclude filter file to be specified' /> + <param name='vt' type='data' format='fasta' optional='true' label="[-vt] This option specifies a vector trimming database, or FASTA file of sequences to be used to trim the ends of input reads (for cloning vectors, primers, adapters or other end sequences)" /> + <param name='vs' type='data' format='fasta' optional='true' label="[-vs] This option specifies a vector screening database, or FASTA file of sequences to be used to screen the input reads for contaminants. Reads that completely align against the screening database are trimmed completely (so that it is not used in the computation), but otherwise the read trimpoints are not changed" /> + + + <!-- READ TRIMMING --> + <param name='minlen' type='integer' value='20' label='[-minlen] Minimum length of reads to use (15-45 allowed)'/> + <param name='notrim' type='boolean' truevalue='-notrim' falsevalue='' checked='false' label='[-notrim] Do not perform default quality and primer trimming of input reads'/> + <param name='tr' type='select' display='radio' label='[-tr] Output trimmed reads'> + <option value=''>no</option> + <option value='-tr'>[-tr] yes</option> + </param> + <param name='nor' type='boolean' truevalue='-nor' falsevalue='' label='[-nor] Turn off the automatic rescore function for read quality scores'/> + <param name='ud' type='boolean' truevalue='-ud' falsevalue='' label='[-ud] Treat each read separately, with no grouping of duplicates'/> + + <!-- ALIGNMENT PARAMETERS --> + <param name='ss' type='integer' value='12' label='[-ss] Seed step parameter - The number of bases between seed generation locations used in the exact k-mer matching part of the overlap detection. Allow values: 1 or greater'/> + <param name='sl' type='integer' value='16' label='[-sl] Seed length parameter - The number of bases used for each seed in the exact k-mer matching part of the overlap detection (i.e. the "k" value of the k-mer matching). Allowed values: 6-16'/> + <param name='sc' type='integer' value='1' label='[-sc] Seed count parameter - The number of seeds required in a window before an extension is made. Allowed values: 1 or greater'/> + <param name='ml' type="text" value='40' label="[-ml] Minimum overlap length - The minimum length of overlaps used for the pairwise alignment step. The value can either be a minimum length in bases or a percentage of read length. In the case of a percentage, simply include '%' immediately following the numeric value. Allowed values: 1 or greater"/> + <param name='mi' type="integer" value='90' label="[-mi] Minimum overlap identity - The percent identity of overlaps used for the pairwise alignment step. Allowed values: 0 or greater"/> + <param name='ais' type='integer' value='2' label='[-ais] Alignment identity score - When multiple overlaps are found, the per-overlap column identity score used to sort the overlaps for use in the progressive alignment. Allowed values: 0 or greater'/> + + <!-- ASSEMBLY OPTIONS --> + <param name='e' type="integer" value='0' label="[-e] This option tells the assembler that the expected depth of the data is at a certain level. The assembler has been optimized for datasets in the 10-50x oversampling size, and this option helps the assembler with datasets that have a higher oversampling level. A value of 0 resets the assembler computation to use its default algorithms"/> + + <!-- OUTPUT OPTIONS --> + <param name='no' type='select' display='radio' label='[-no] Do complete assembly'> + <option value=''>do complete assembly</option> + <option value='-no'>[-no] do not assemble; do alignments only</option> + </param> + <param name='qo' type='boolean' truevalue='' falsevalue='-qo' checked='false' label='[-qo] Generate quick output for mapping and assembly. Disables signal distribution computation for calling consensus sequences and can decrease accuracy'/> + <param name='a' type="integer" value='100' label="[-a] This option sets the minimum length for a contig to appear in the 454AllContigs.fna file."/> + <param name='info' type='select' display='radio' label='Output Alignment Info'> + <option value='-info'>[-info] yes</option> + <option value='-infoall'>[-infoall] yes, including 0-coverage positions</option> + </param> + <param name='ace' type='select' display='radio' label='Produce Ace assembly file'> + <option value=''>no</option> + <option value='-ace'>[-ace] yes</option> + </param> +</inputs> + +<outputs> + <!-- the following are common to runMapping and runAssembly --> + <data name='newbler_metrics' format='txt' /> + <data name='read_status' format='tabular' label='Read Status'/> + <data name='trimmed_reads_fasta' format='fasta' label='Trimmed Reads (Fasta)'> + <filter>tr == '-tr'</filter> + </data> + <data name='trimmed_reads_qual' format='qual454' label='Trimmed Reads (Qual)'> + <filter>tr == '-tr'</filter> + </data> + <!-- the following produced only if no != '-no' --> + <data name='alignment_info' format='tabular' label='Alignment Info'/> + <data name='all_contigs_fasta' format='fasta' label='All Contigs (Fasta)'> + <filter>no != '-no'</filter> + </data> + <data name='all_contigs_qual' format='qual454' label='All Contigs (Qual454)'> + <filter>no != '-no'</filter> + </data> + <data name='contigs_ace' format='ace' label='Contigs (Ace)'> + <filter>ace == '-ace' and no != '-no'</filter> + </data> + <data name='large_contigs_fasta' format='fasta' label='Large Contigs (Fasta)'> + <filter>no != '-no'</filter> + </data> + <data name='large_contigs_qual' format='qual454' label='Large Contigs (Qual454)'> + <filter>no != '-no'</filter> + </data> + <data name='pair_align' format='txt' label='Pairwise Alignments'> + <filter>pair == '-pair' and no != '-no'</filter> + </data> + <data name='pair_status' format='tabular' label='Paired-End Read Status'> + <filter>paired_reads == 'true' and no != '-no'</filter> + </data> + <data name='scaffolds_fasta' format='fasta' label='Scaffolds (Fasta)'> + <filter>paired_reads == 'true' and no != '-no'</filter> + </data> + <data name='scaffolds_qual' format='qual454' label='Scaffolds (Qual454)'> + <filter>paired_reads == 'true' and no != '-no'</filter> + </data> + <data name='scaffolds_agp' format='tabular' label='Scaffolds (Agp)'> + <filter>paired_reads == 'true' and no != '-no'</filter> + </data> + <data name='tag_pair_align' format='txt' label='Tag Pair Alignments'> + <filter>pair == '-pair' and paired_reads == 'true' and no != '-no'</filter> + </data> + <data name='trim_status' format='tabular' label='Trim Status'/> + + <!-- THE FOLLOWING ARE LIMITED TO MAPPING --> + <data name='all_diffs' format='tabular' label='All Diffs'/> + <data name='all_struct_vars' format='tabular' label='All Struct Vars'/> + <data name='hc_diff' format='tabular' label='High Confidence Diff'/> + <data name='hc_struct_vars' format='tabular' label='High Confidence Struct Vars'/> + <data name='gene_status' format='tabular' label='Gene Status'/> + <data name='mapping_qc' format='xls' label='Mapping QC (Excel)'/> + <data name='ref_status' format='tabular' label='Ref Status'/> + +</outputs> +<help> + +**What it does** + +Map (Roche/454) reads to a reference using Newbler. + +Download the manual here: http://galaxy.jgi-psf.org/static/manuals/GSFLXSystemSoftwareManual_PartC_Assembler-Mapper-SFFTools.pdf + +.. class:: warningmark + +**Fasta Header Format** Fasta input must provide any pairing information in the header using the expected key=value format. Use the 'Sanger tab to Newbler Fasta' tool. + +</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roche454/runMapping_cDNA.xml Tue Jun 07 17:50:32 2011 -0400 @@ -0,0 +1,261 @@ +<tool id="runMapping_cDNA" name="runMapping cDNA" version="1.0.0"> +<description>Map Roche/454 reads to a reference using Newbler</description> +<command interpreter='perl'>runMapping_cDNA_wrapper.pl +$newbler_metrics.extra_files_path +$alignment_info +$all_contigs_fasta +$all_contigs_qual +$all_diffs +$all_struct_vars +$hc_diff +$hc_struct_vars +$mapping_qc +$newbler_metrics +$pair_align +$read_status +$ref_status +$tag_pair_align +$trim_status +$trimmed_reads_fasta +$trimmed_reads_qual +$contigs_ace +$gene_status +$isotigs_ace +$isotigs_fasta +$isotigs_qual +$isotigs_agp +$isotigs_layout +$newbler_exe -o $newbler_metrics.extra_files_path +-cpu 8 +-a $a +-e $e +-mi $mi +-ml $ml +-minlen $minlen +$pair +$info +$notrim +$tr +$ace +$no +$qo +$nor +$ud +-ss $ss +-sl $sl +-sc $sc +-ais $ais +-rst $rst +-hsl $hsl +-mcf $mcf +-vs $vs +-vt $vt +-fi $fi +-fe $fe +-cdna +$srv +-ig $ig +-it $it +-icc $icc +-icl $icl +$ref_type +-ref +#for $i in $ref_inputs +${i.ref_input} +#end for +-read +#for $i in $sff_paired_inputs +-p ${i.sff_paired_input} +#end for +#for $i in $sanger_paired_inputs +-p ${i.sanger_paired_input} +#end for +#for $i in $sff_inputs +${i.sff_input} +#end for +#for $i in $sanger_inputs +${i.sanger_input} +#end for +</command> +<inputs> + <!-- NEWBLER VERSION --> + <param name='newbler_exe' type='select' display='radio' label='Newbler version'> + <option value='runMapping' selected='true'>default</option> + <!-- OTHER VERSIONS MAY BE INCLUDED HERE; OR SIMPLY EDIT TO REMOVE THIS ENTIRE SECTION + <option value='/jgi/tools/454/rig-DataProcessing_2.3/bin/runMapping'>2.3</option> + <option value='/jgi/tools/454/rig-DataProcessing_2.4pre-20091204/bin/runMapping'>2.4</option> + <option value='/home/copeland/local/x86_64/newbler/v2.5p1-internal-10Jun23-1/runMapping'>2.5</option> + --> + </param> + + <!-- READSEQ INFILES --> + <repeat name="sff_inputs" title="Unpaired Reads Sff Files"> + <param name="sff_input" type="data" format="sff" label="SE Sff file"/> + </repeat> + <repeat name="sanger_inputs" title="Unpaired Reads Fasta Files"> + <param name="sanger_input" type="data" format="fasta" label="SE Fasta file"/> + </repeat> + <repeat name="sff_paired_inputs" title="Paired Reads Sff Files"> + <param name="sff_paired_input" type="data" format="sff" label="PE Sff file"/> + </repeat> + <repeat name="sanger_paired_inputs" title="Paired Reads Fasta Files"> + <param name="sanger_paired_input" type="data" format="fasta" label="PE Fasta file"/> + </repeat> + <param name='paired_reads' type='select' display='radio' label='[-paired_reads] If supplying paired reads (above), do you want paired-read info?'> + <option value='false'>no</option> + <option value='true'>[-paired_reads] yes</option> + </param> + <param name='pair' type='select' display='radio' label='[-pair] Output pairwise overlaps'> + <option value=''>no</option> + <option value='-pair'>[-pair] yes</option> + </param> + + <!-- SOURCE DNA TYPE --> + <param name='it' type='integer' value='100' label='[-it] Specify the maximum number of isotigs in an isogroup. Maximum is 10,000.'/> + <param name='ig' type='integer' value='500' label='[-ig] Specify the maximum number of contigs in an isogroup.'/> + <param name='icc' type='integer' value='100' label='[-icc] Specify the maximum number of contigs in an isotig. Maximum is 200 and corresponds to the recursion depth during graph traversal'/> + <param name='icl' type='integer' value='3' label='[-icl] Specify the minimum length a contig must be to be part of an isotig. Minimum is 3bp.'/> + + <!-- INPUTS SPECIFIC TO MAPPING --> + <repeat name='ref_inputs' title='Reference Sequence'> + <param name='ref_input' type='data' format='fasta' label='Fasta file'/> + </repeat> + <param name='ref_type' type='select' display='radio' label='Reference type'> + <option value='-gref'>[-gref] Genomic reference sequence</option> + <option value='-cref'>[-cref] cDNA reference sequence</option> + </param> + <param name='rst' type='integer' value='12' label='[-rst] Repeat score threshold parameter. Allowed values: 0 or greater'/> + <param name='hsl' type='integer' value='70' label='[-hsl] Hit-per-seed limit parameter'/> + <param name='srv' type='boolean' truevalue='-srv' falsevalue='' checked='false' label='[-srv] Single read variant output'/> + + <!-- OPTIONAL ARGUMENTS --> + <!-- NYI + <param name='accno' type='data' format='tabular' optional='true' label='[-accno] Specify annotation data. Required only if reference sequence headers do not contain gene=NAME pairs' /> + <param name='annot' type='data' format='tabular' optional='true' label='[-annot] Supply gene, transcript, and protein information' /> + --> + <param name='mcf' type='data' format='tabular' optional='true' label='[-mcf] Specify non-default MID config file' /> + <param name='fi' type='data' format='txt' optional='true' label='[-fi] Include filter file to be specified' /> + <param name='fe' type='data' format='txt' optional='true' label='[-fe] Exclude filter file to be specified' /> + <param name='vt' type='data' format='fasta' optional='true' label="[-vt] This option specifies a vector trimming database, or FASTA file of sequences to be used to trim the ends of input reads (for cloning vectors, primers, adapters or other end sequences)" /> + <param name='vs' type='data' format='fasta' optional='true' label="[-vs] This option specifies a vector screening database, or FASTA file of sequences to be used to screen the input reads for contaminants. Reads that completely align against the screening database are trimmed completely (so that it is not used in the computation), but otherwise the read trimpoints are not changed" /> + + <!-- READ TRIMMING --> + <param name='minlen' type='integer' value='20' label='[-minlen] Minimum length of reads to use (15-45 allowed)'/> + <param name='notrim' type='boolean' truevalue='-notrim' falsevalue='' checked='false' label='[-notrim] Do not perform default quality and primer trimming of input reads'/> + <param name='tr' type='select' display='radio' label='[-tr] Output trimmed reads'> + <option value=''>no</option> + <option value='-tr'>[-tr] yes</option> + </param> + <param name='nor' type='boolean' truevalue='-nor' falsevalue='' label='[-nor] Turn off the automatic rescore function for read quality scores'/> + <param name='ud' type='boolean' truevalue='-ud' falsevalue='' label='[-ud] Treat each read separately, with no grouping of duplicates'/> + + <!-- ALIGNMENT PARAMETERS --> + <param name='ss' type='integer' value='12' label='[-ss] Seed step parameter - The number of bases between seed generation locations used in the exact k-mer matching part of the overlap detection. Allow values: 1 or greater'/> + <param name='sl' type='integer' value='16' label='[-sl] Seed length parameter - The number of bases used for each seed in the exact k-mer matching part of the overlap detection (i.e. the "k" value of the k-mer matching). Allowed values: 6-16'/> + <param name='sc' type='integer' value='1' label='[-sc] Seed count parameter - The number of seeds required in a window before an extension is made. Allowed values: 1 or greater'/> + <param name='ml' type="text" value='40' label="[-ml] Minimum overlap length - The minimum length of overlaps used for the pairwise alignment step. The value can either be a minimum length in bases or a percentage of read length. In the case of a percentage, simply include '%' immediately following the numeric value. Allowed values: 1 or greater"/> + <param name='mi' type="integer" value='90' label="[-mi] Minimum overlap identity - The percent identity of overlaps used for the pairwise alignment step. Allowed values: 0 or greater"/> + <param name='ais' type='integer' value='2' label='[-ais] Alignment identity score - When multiple overlaps are found, the per-overlap column identity score used to sort the overlaps for use in the progressive alignment. Allowed values: 0 or greater'/> + + <!-- ASSEMBLY OPTIONS --> + <param name='e' type="integer" value='0' label="[-e] This option tells the assembler that the expected depth of the data is at a certain level. The assembler has been optimized for datasets in the 10-50x oversampling size, and this option helps the assembler with datasets that have a higher oversampling level. A value of 0 resets the assembler computation to use its default algorithms"/> + + <!-- OUTPUT OPTIONS --> + <param name='no' type='select' display='radio' label='[-no] Do complete assembly'> + <option value=''>do complete assembly</option> + <option value='-no'>[-no] do not assemble; do alignments only</option> + </param> + <param name='qo' type='boolean' truevalue='' falsevalue='-qo' checked='false' label='[-qo] Generate quick output for mapping and assembly. Disables signal distribution computation for calling consensus sequences and can decrease accuracy'/> + <param name='a' type="integer" value='100' label="[-a] This option sets the minimum length for a contig to appear in the 454AllContigs.fna file."/> + <param name='info' type='select' display='radio' label='Output Alignment Info'> + <option value='-info'>[-info] yes</option> + <option value='-infoall'>[-infoall] yes, including 0-coverage positions</option> + </param> + <param name='ace' type='select' display='radio' label='Produce Ace assembly file'> + <option value=''>no</option> + <option value='-ace'>[-ace] yes</option> + </param> +</inputs> + +<outputs> + <!-- the following are common to runMapping and runAssembly --> + <data name='newbler_metrics' format='txt' /> + <data name='read_status' format='tabular' label='Read Status'/> + <data name='trimmed_reads_fasta' format='fasta' label='Trimmed Reads (Fasta)'> + <filter>tr == '-tr'</filter> + </data> + <data name='trimmed_reads_qual' format='qual454' label='Trimmed Reads (Qual)'> + <filter>tr == '-tr'</filter> + </data> + <!-- the following produced only if no != '-no' --> + <data name='alignment_info' format='tabular' label='Alignment Info'/> + <data name='all_contigs_fasta' format='fasta' label='All Contigs (Fasta)'> + <filter>no != '-no'</filter> + </data> + <data name='all_contigs_qual' format='qual454' label='All Contigs (Qual454)'> + <filter>no != '-no'</filter> + </data> + <data name='contigs_ace' format='ace' label='Contigs (Ace)'> + <filter>ace == '-ace' and no != '-no'</filter> + </data> + <data name='pair_align' format='txt' label='Pairwise Alignments'> + <filter>pair == '-pair' and no != '-no'</filter> + </data> + <data name='pair_status' format='tabular' label='Paired-End Read Status'> + <filter>paired_reads == 'true' and no != '-no'</filter> + </data> + <data name='scaffolds_fasta' format='fasta' label='Scaffolds (Fasta)'> + <filter>paired_reads == 'true' and no != '-no'</filter> + </data> + <data name='scaffolds_qual' format='qual454' label='Scaffolds (Qual454)'> + <filter>paired_reads == 'true' and no != '-no'</filter> + </data> + <data name='scaffolds_agp' format='tabular' label='Scaffolds (Agp)'> + <filter>paired_reads == 'true' and no != '-no'</filter> + </data> + <data name='tag_pair_align' format='txt' label='Tag Pair Alignments'> + <filter>pair == '-pair' and paired_reads == 'true' and no != '-no'</filter> + </data> + <data name='trim_status' format='tabular' label='Trim Status'/> + + <!-- THE FOLLOWING ARE LIMITED TO MAPPING --> + <data name='all_diffs' format='tabular' label='All Diffs'/> + <data name='all_struct_vars' format='tabular' label='All Struct Vars'/> + <data name='hc_diff' format='tabular' label='High Confidence Diff'/> + <data name='hc_struct_vars' format='tabular' label='High Confidence Struct Vars'/> + <data name='gene_status' format='tabular' label='Gene Status'/> + <data name='mapping_qc' format='xls' label='Mapping QC (Excel)'/> + <data name='ref_status' format='tabular' label='Ref Status'/> + + <!-- ISOTIGS --> + <data name='isotigs_ace' format='ace' label='Isotigs (Ace)'> + <filter>ace != '' and no != '-no'</filter> + </data> + <data name='isotigs_fasta' format='fasta' label='Isotigs (Fasta)'> + <filter>no != '-no'</filter> + </data> + <data name='isotigs_qual' format='qual454' label='Isotigs (Qual454)'> + <filter>no != '-no'</filter> + </data> + <data name='isotigs_agp' format='tabular' label='Isotigs (Agp)'> + <filter>no != '-no'</filter> + </data> + <data name='isotigs_layout' format='txt' label='Isotig Layout'> + <filter>no != '-no'</filter> + </data> + +</outputs> +<help> + +**What it does** + +Map (Roche/454) reads to a reference using Newbler. + +Download the manual here: http://galaxy.jgi-psf.org/static/manuals/GSFLXSystemSoftwareManual_PartC_Assembler-Mapper-SFFTools.pdf + +.. class:: warningmark + +**Fasta Header Format** Fasta input must provide any pairing information in the header using the expected key=value format. Use the 'Sanger tab to Newbler Fasta' tool. + +</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roche454/runMapping_cDNA_wrapper.pl Tue Jun 07 17:50:32 2011 -0400 @@ -0,0 +1,96 @@ +#!/usr/bin/env/perl + +use warnings; +use strict; +use File::Copy; + +# EXPECT 21 FILE HANDLES, SOME OF WHICH MAY BE 'None' +my $outdir=shift @ARGV; +my $alignment_info=shift @ARGV; +my $all_contigs_fasta=shift @ARGV; +my $all_contigs_qual=shift @ARGV; +my $all_diffs=shift @ARGV +my $all_struct_vars=shift @ARGV +my $hc_diff=shift @ARGV +my $hc_struct_vars=shift @ARGV +my $mapping_qc=shift @ARGV +my $newbler_metrics=shift @ARGV; +my $pair_align=shift @ARGV; +my $read_status=shift @ARGV; +my $ref_status=shift @ARGV; +my $tag_pair_align=shift @ARGV; +my $trim_status=shift @ARGV; +my $trimmed_reads_fasta=shift @ARGV; +my $trimmed_reads_qual=shift @ARGV; +my $contigs_ace=shift @ARGV; +my $gene_status=shift @ARGV; +my $isotigs_ace=shift @ARGV; +my $isotigs_fasta=shift @ARGV; +my $isotigs_qual=shift @ARGV; +my $isotigs_agp=shift @ARGV; +my $isotigs_layout=shift @ARGV; + +# REMOVE PARAMETERS FOR OPTIONAL FILES WHICH WERE NOT PROVIDED + +my @cmd=removeUnusedOptions(@ARGV); + +# RUN COMMAND +# NOTE: FIRST ARG EXPECTED TO BE EXECUTABLE +my $stderr; +eval { $stderr=`@cmd 2>&1`; }; +if ( $@ ) { + print STDERR "Newbler ERROR: $stderr\n"; + `cat $outdir/assembly/454NewblerProgress.txt 1>&2`; + die($@); +} + +get_outfile("$outdir/454AlignmentInfo.tsv", $alignment_info); +get_outfile("$outdir/454AllContigs.fna", $all_contigs_fasta); +get_outfile("$outdir/454AllContigs.qual", $all_contigs_qual); +get_outfile("$outdir/454AllDiffs.txt", $all_diffs); +get_outfile("$outdir/454AllStructVars.txt", $all_struct_vars); +get_outfile("$outdir/454HCDiff.txt", $hc_diff); +get_outfile("$outdir/454HCStructVars.txt", $hc_struct_vars); +get_outfile("$outdir/454MappingQC.xls", $mapping_qc); +get_outfile("$outdir/454NewblerMetrics.txt", $newbler_metrics); +get_outfile("$outdir/454PairAlign.txt", $pair_align); +get_outfile("$outdir/454ReadStatus.txt", $read_status); +get_outfile("$outdir/454RefStatus.txt", $ref_status); +get_outfile("$outdir/454TagPairAlign.txt", $tag_pair_align); +get_outfile("$outdir/454TrimStatus.txt", $trim_status); +get_outfile("$outdir/454TrimmedReads.fna", $trimmed_reads_fasta); +get_outfile("$outdir/454TrimmedReads.qual", $trimmed_reads_qual); +get_outfile("$outdir/454Contigs.ace", $contigs_ace); +get_outfile("$outdir/454GeneStatus.txt", $gene_status); +get_outfile("$outdir/454Isotigs.ace", $isotigs_ace); +get_outfile("$outdir/454Isotigs.fna", $isotigs_fasta); +get_outfile("$outdir/454Isotigs.qual", $isotigs_qual); +get_outfile("$outdir/454Isotigs.txt", $isotigs_agp); +get_outfile("$outdir/454IsotigsLayout.txt", $isotigs_layout); +exit; + +# EVERY 'None' ARG AND IT'S PRECEEDING OPTION TAG ARE DISCARDED +sub removeUnusedOptions { + my @cmd=(); + my $prev; + foreach (@_) { + unless ($_ eq 'None') { + push @cmd, $prev if defined($prev); + $prev=$_; + } else { + $prev=undef; + } + } + push @cmd, $prev if defined($prev); + return @cmd; +} + +sub get_outfile { + my ($src, $dest)=@_; + # make sure dest defined and src exist; skip if dest is 'None' + if ( $dest and $dest ne 'None' and $src and -f $src ) { + move($src,$dest); + } +} + +__END__
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roche454/runMapping_wrapper.pl Tue Jun 07 17:50:32 2011 -0400 @@ -0,0 +1,89 @@ +#!/jgi/tools/bin/perl -w + +use strict; +use File::Copy; + +# EXPECT 21 FILE HANDLES, SOME OF WHICH MAY BE 'None' +my $outdir=shift @ARGV; +my $alignment_info=shift @ARGV; +my $all_contigs_fasta=shift @ARGV; +my $all_contigs_qual=shift @ARGV; +my $all_diffs=shift @ARGV +my $all_struct_vars=shift @ARGV +my $hc_diff=shift @ARGV +my $hc_struct_vars=shift @ARGV +my $mapping_qc=shift @ARGV +my $newbler_metrics=shift @ARGV; +my $pair_align=shift @ARGV; +my $read_status=shift @ARGV; +my $ref_status=shift @ARGV; +my $tag_pair_align=shift @ARGV; +my $trim_status=shift @ARGV; +my $trimmed_reads_fasta=shift @ARGV; +my $trimmed_reads_qual=shift @ARGV; +my $contigs_ace=shift @ARGV; +my $large_contigs_fasta=shift @ARGV; +my $large_contigs_qual=shift @ARGV; +my $gene_status=shift @ARGV; + +# REMOVE PARAMETERS FOR OPTIONAL FILES WHICH WERE NOT PROVIDED + +my @cmd=removeUnusedOptions(@ARGV); + +# RUN COMMAND +# NOTE: FIRST ARG EXPECTED TO BE EXECUTABLE +my $stderr; +eval { $stderr=`@cmd 2>&1`; }; +if ( $@ ) { + print STDERR "Newbler ERROR: $stderr\n"; + `cat $outdir/assembly/454NewblerProgress.txt 1>&2`; + die($@); +} + +get_outfile("$outdir/454AlignmentInfo.tsv", $alignment_info); +get_outfile("$outdir/454AllContigs.fna", $all_contigs_fasta); +get_outfile("$outdir/454AllContigs.qual", $all_contigs_qual); +get_outfile("$outdir/454AllDiffs.txt", $all_diffs); +get_outfile("$outdir/454AllStructVars.txt", $all_struct_vars); +get_outfile("$outdir/454HCDiff.txt", $hc_diff); +get_outfile("$outdir/454HCStructVars.txt", $hc_struct_vars); +get_outfile("$outdir/454MappingQC.xls", $mapping_qc); +get_outfile("$outdir/454NewblerMetrics.txt", $newbler_metrics); +get_outfile("$outdir/454PairAlign.txt", $pair_align); +get_outfile("$outdir/454ReadStatus.txt", $read_status); +get_outfile("$outdir/454RefStatus.txt", $ref_status); +get_outfile("$outdir/454TagPairAlign.txt", $tag_pair_align); +get_outfile("$outdir/454TrimStatus.txt", $trim_status); +get_outfile("$outdir/454TrimmedReads.fna", $trimmed_reads_fasta); +get_outfile("$outdir/454TrimmedReads.qual", $trimmed_reads_qual); +get_outfile("$outdir/454Contigs.ace", $contigs_ace); +get_outfile("$outdir/454LargeContigs.fna", $large_contigs_fasta); +get_outfile("$outdir/454LargeContigs.qual", $large_contigs_qual); +get_outfile("$outdir/454GeneStatus.txt", $gene_status); +exit; + +# EVERY 'None' ARG AND IT'S PRECEEDING OPTION TAG ARE DISCARDED +sub removeUnusedOptions { + my @cmd=(); + my $prev; + foreach (@_) { + unless ($_ eq 'None') { + push @cmd, $prev if defined($prev); + $prev=$_; + } else { + $prev=undef; + } + } + push @cmd, $prev if defined($prev); + return @cmd; +} + +sub get_outfile { + my ($src, $dest)=@_; + # make sure dest defined and src exist; skip if dest is 'None' + if ( $dest and $dest ne 'None' and $src and -f $src ) { + move($src,$dest); + } +} + +__END__
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roche454/sff_to_fastq.xml Tue Jun 07 17:50:32 2011 -0400 @@ -0,0 +1,11 @@ +<tool id="sff_to_fastq" name="Sff to Fastq Converter" version="1.0.0"> + <description>Convert SFF to Fastq</description> + <command interpreter='perl'>sff_to_fastq_converter.pl $input $output.extra_files_path $output</command> + <inputs> + <param name="input" type="data" format="sff" label="Roche/454 Sff"/> + </inputs> + <outputs> + <data name="output" format="fastq"/> + </outputs> + <help>Convert Sff to Fastq</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roche454/sff_to_fastq_converter.pl Tue Jun 07 17:50:32 2011 -0400 @@ -0,0 +1,31 @@ +#!/usr/bin/env perl + +use warnings; +use strict; +use Getopt::Long; +use File::Basename; + +# VALIDATE INPUT +die("Expected 3 args") unless @ARGV == 3; +my ($sff, $extra_files_path, $fastq) = @ARGV; + +# DEFINE PATHS +mkdir($extra_files_path) unless -d $extra_files_path; +my $base = basename($sff); +my $fasta = "$extra_files_path/$base.fasta"; +my $qual = "$extra_files_path/$base.qual"; + +# GENERATE FASTA, QUAL, FASTQ +my $outf; +my $out; +eval { $out=`sffinfo -seq $sff > $fasta` }; +die("ERROR: $out") if $@; +print $out; +eval { $out=`sffinfo -qual $sff > $qual` }; +die("ERROR: $out") if $@; +print $out; +eval { $out=`fasta_qual_to_fastq $fasta $qual $fastq` }; +die("ERROR: $out") if $@; +print $out; +unlink($fasta, $qual); +exit 0;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roche454/sfffile.xml Tue Jun 07 17:50:32 2011 -0400 @@ -0,0 +1,29 @@ +<tool id="sfffile" name="Sff File" version="1.0.0"> +<description>Select reads to include or exclude from one or more input Sff files</description> +<command>sfffile -$include_or_exclude $list_file -o $output +#for $i in $inputs +${i.input} +#end for +</command> +<inputs> + <repeat name="inputs" title="Input Files"> + <param name="input" type="data" format="sff" label="Sff file"/> + </repeat> + <param name="list_file" type="data" format="text" label="List of read IDs"/> + <param name="include_or_exclude" type="select" label="Include or exclude the named reads?"> + <option value='i' selected='true'>include</option> + <option value='e'>exclude</option> + </param> +</inputs> + +<outputs> + <data name="output" format="sff"/> +</outputs> + +<help> + +**What it does** + +This tool creates an Sff file, either including or excluding named reads. +</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/roche454/suite_config.xml Tue Jun 07 17:50:32 2011 -0400 @@ -0,0 +1,21 @@ +<suite id="roche454_toolsuite" name="Suite of Newbler tools" version="1.0.0"> + <description>This suite contains Roche/454's Newbler and sff-fastq converter</description> + <tool id="runAssembly" name="runAssembly" version="1.0.0"> + <description>Assemble 454 gDNA reads</description> + </tool> + <tool id="runAssembly_cDNA" name="runAssembly cDNA" version="1.0.0"> + <description>Assemble 454 cDNA reads</description> + </tool> + <tool id="runMapping" name="runMapping" version="1.0.0"> + <description>Map 454 cDNA reads to reference</description> + </tool> + <tool id="runMapping_cDNA" name="runMapping cDNA" version="1.0.0"> + <description>Map 454 cDNA reads to reference</description> + </tool> + <tool id="sff_to_fastq" name="Sff to Fastq Converter" version="1.0.0"> + <description>Convert SFF to Fastq</description> + </tool> + <tool id="sfffile" name="Sff File" version="1.0.0"> + <description>Utility to make Sff files</description> + </tool> +</suite>