Miscellaneous |
Version lineage of this tool (guids ordered most recent to oldest) |
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_gstacks/stacks2_gstacks/2.55+galaxy4 |
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_gstacks/stacks2_gstacks/2.55+galaxy3 (this tool) |
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_gstacks/stacks2_gstacks/2.55+galaxy2 |
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_gstacks/stacks2_gstacks/2.55+galaxy1 |
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_gstacks/stacks2_gstacks/2.55+galaxy0 |
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_gstacks/stacks2_gstacks/2.53+galaxy0 |
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_gstacks/stacks2_gstacks/2.4+galaxy1 |
toolshed.g2.bx.psu.edu/repos/iuc/stacks2_gstacks/stacks2_gstacks/2.4+galaxy0 |
stacks2_gstacks |
Requirements (dependencies defined in the <requirements> tag set) |
name | version | type |
stacks | 2.55 | package |
python | 3.7 | package |
findutils | 4.6.0 | package |
samtools | 1.13 | package |
Additional information about this tool |
#from os.path import splitext #import re #def clean_ext($identifier) #while $identifier.endswith(('.1', '.2', '.fa', '.fq', '.fasta', '.fastq', '.gz', '.gzip', '.sam', '.bam')) #set $identifier = splitext($identifier)[0] #end while $identifier#slurp #end def #def fastq_input_foo( $sample, $read_direction="", $infix="" ) #set $name = $clean_ext($sample.element_identifier) #if $sample.is_collection: #set $cur_sample=$sample[$read_direction] #else: #set $cur_sample=$sample #end if #if $cur_sample.is_of_type('fastqsanger') #set $ext = "fastq" #set $inputype = "fastq" #else if $cur_sample.is_of_type('fastqsanger.gz') #set $ext = "fastq.gz" #set $inputype = "gzfastq" #else if $cur_sample.is_of_type('fasta') #set $ext = "fasta" #set $inputype = "fasta" #else if $cur_sample.is_of_type('fasta.gz') #set $ext = "fasta.gz" #set $inputype = "gzfasta" #else #set $inputype = "UNKNOWN" #end if #set $data_path = "stacks_inputs/"+$name+$infix+"."+$ext #set $link_cmd = "ln -s '%s' '%s' &&" % ($cur_sample, $data_path) #return ($link_cmd, $data_path, $name, $inputype) #end def ## fastq_input_batch determine link command, access path(s), and input type ## for batch tools ## ## inputs ## - sample data set / pair ## - type "single" / "paired" ## return (link_command, fwd_path, rev_path, inputype) ## - link_command bash command(s) to link the data sets ## - fwd_path file name of the link to the forward data set ## - rev_path file name of the link to the forward data set (if type=paired) ## - inputype input type as used in stacks ([gz]fast(a|q)) #def fastq_input_batch($sample, $type) #if $type == "single" #set ($link_cmd, $path, $name, $inputype) = $fastq_input_foo($sample, "", "") #return ($link_cmd, $path, "", $inputype) #else: #set ($fwd_link_cmd, $fwd_path, $name, $inputype) = $fastq_input_foo($sample, "forward", ".1") #set ($rev_link_cmd, $rev_path, $name, $inputype) = $fastq_input_foo($sample, "reverse", ".2") #return ( $fwd_link_cmd+$rev_link_cmd, $fwd_path, $rev_path, $inputype) #end if #end def ## fastq_input_nonbatch determine link command, access path(s), and input type ## for non-batch tools (procrad, shortreads, denovomap the former need R[12]_ ## and the latter needs .[12]) ## ## inputs ## - samples list of data set / pair ## - type "single" / "paired" ## - infix_pattern pattern for the infix of the files (needs to contain %d which is replaced by 1/2) ## return (link_command, inputype) ## - link_command bash command(s) to link the data sets ## - inputype input type as used in stacks ([gz]fast(a|q)) #def fastq_input_nonbatch( $samples, $type, $infix_pattern ) #set $link_command = "" #for $sample in $samples #if $type == "single" #set ($lc, $path, $name, $inputype) = $fastq_input_foo($sample, "", "") #set link_command += lc #else: #set ($lc, $path, $name, $inputype) = $fastq_input_foo($sample, "forward", $infix_pattern % (1)) #set link_command += lc #set ($lc, $path, $name, $inputype) = $fastq_input_foo($sample, "reverse", $infix_pattern % (2)) #set link_command += lc #end if #end for #return ($link_command, $inputype) #end def mkdir bam_inputs stacks_outputs && ## annoyingly gstacks creates stacks_output/gstacks.log ## instead of just writing to stderr as the other tools ## hence we do not use the tokens and return populations.log as log file and take the stderr #if $output_log ln -s '$output_log' stacks_outputs/gstacks.log && #end if #if $mode_cond.mode_select == "denovo" and not $popmap: ## since collections have no len .. yet #try: #set count = len($input_bam) #except: #set count = len($input_bam.keys()) #end try #if count == 1: #for $bam in $input_bam: ln -s '$bam' bam_inputs/catalog.bam && #end for #else >&2 echo "exactly one (merged) bam file is needed in denovo mode if no population map is given" && exit 1 && #end if #else #set $bamlist = "" #for $bam in $input_bam: #if $bam.is_of_type('bam') #set $filename = $clean_ext($bam.element_identifier)+".bam" ln -s '$bam' bam_inputs/$filename && #set bamlist += " -B 'bam_inputs/"+$filename+"'" #end if #end for #end if gstacks #if $mode_cond.mode_select == "denovo": -P bam_inputs $mode_cond.ignore_pe_reads #if $mode_cond.advanced_cond.advanced_select == "yes": --kmer-length $mode_cond.advanced_cond.kmer_length --max-debruijn-reads $mode_cond.advanced_cond.max_debruijn_reads --min-kmer-cov $mode_cond.advanced_cond.min_kmer_cov $mode_cond.advanced_cond.write_alignments #end if #else: #if $popmap -I bam_inputs #else $bamlist #end if #if $mode_cond.paired_cond.paired_select == '' $mode_cond.paired_cond.rm_unpaired_reads $mode_cond.paired_cond.rm_pcr_duplicates #else: $mode_cond.paired_cond.paired_select #end if #if $mode_cond.advanced_cond.advanced_select == "yes": --min-mapq $mode_cond.advanced_cond.min_mapq --max-clipped $mode_cond.advanced_cond.max_clipped --max-insert-len $mode_cond.advanced_cond.max_insert_len $mode_cond.advanced_cond.details --phasing-cooccurrences-thr-range $mode_cond.advanced_cond.phasing_cooccurrences_thr_min,$mode_cond.advanced_cond.phasing_cooccurrences_thr_max $mode_cond.advanced_cond.phasing_dont_prune_hets #end if #end if #if $popmap -M '$popmap' #end if -O stacks_outputs -t \${GALAXY_SLOTS:-1} ##Model options: --model $model_cond.model --var-alpha $model_cond.var_alpha --gt-alpha $model_cond.gt_alpha ## the bam files generated by gstacks (--write-alignments) are seemingly buggy ## (https://groups.google.com/d/msg/stacks-users/CazwJY1DPGA/7vuahiB2GgAJ) ## so we fix them temporarily by piping them through samtools view (disabling all ## exit codes and stderr output) this adds the samtools requirement ## for later versions where this is fixed the output bam files could just be moved ## to stacks_outputs if this is still necessary #if $mode_cond.mode_select == "denovo" and $mode_cond.advanced_cond.advanced_select == "yes" and $mode_cond.advanced_cond.write_alignments #if $popmap: && for b in stacks_outputs/*alns.bam; do (samtools view --no-PG -b "\$b" || true) 2> /dev/null > tmp && mv tmp "\$b"; done #else && (samtools view --no-PG -b stacks_outputs/alignments.bam || true) 2> /dev/null > tmp && mv tmp stacks_outputs/alignments.bam #end if #end if ## the catalog.calls output is a gzip-ed vcf extract it ## to make it usable in Galaxy (with the downside that we ## need to gzip it again for downstream calls like populations) && gunzip -c stacks_outputs/catalog.calls > stacks_outputs/catalog.calls.vcf ## TODO extract individual distributions from stacks_outputs/gstacks.log.distribs ## alternative extra tool ## for i in \$(stacks-dist-extract stacks_outputs/gstacks.log.distribs) ## do ## stacks-dist-extract stacks_outputs/gstacks.log.distribs $i > stacks_outputs/gstacks.log.\$i.tsv ## done ## TODO make optional output collection
Functional tests |
name | inputs | outputs | required files |
Test-1 |
input_bam: ['tsv2bam/PopA_01.matches.bam', 'tsv2bam/PopA_02.matches.bam'] popmap: denovo_map/popmap_cstacks.tsv mode_cond|mode_select: denovo add_log_distribs: True add_log: True |
name: value name: value |
tsv2bam/PopA_01.matches.bam tsv2bam/PopA_02.matches.bam denovo_map/popmap_cstacks.tsv value |
Test-2 |
input_bam: ['tsv2bam/PopA_01.matches.bam', 'tsv2bam/PopA_02.matches.bam'] popmap: denovo_map/popmap_cstacks.tsv mode_cond|advanced_cond|write_alignments: True mode_cond|advanced_cond|advanced_select: yes mode_cond|mode_select: denovo add_log: True |
name: value |
tsv2bam/PopA_01.matches.bam tsv2bam/PopA_02.matches.bam denovo_map/popmap_cstacks.tsv value |
Test-3 |
input_bam: tsv2bam/PopA_01.matches.bam mode_cond|ignore_pe_reads: True mode_cond|advanced_cond|kmer_length: 23 mode_cond|advanced_cond|max_debruijn_reads: 666 mode_cond|advanced_cond|min_kmer_cov: 3 mode_cond|advanced_cond|write_alignments: True mode_cond|advanced_cond|advanced_select: yes mode_cond|mode_select: denovo model_cond|var_alpha: 0.1 model_cond|gt_alpha: 0.1 model_cond|model: marukihigh add_log: True |
name: value name: value |
tsv2bam/PopA_01.matches.bam value |
Test-4 |
input_bam: ['tsv2bam/PopA_01.bam', 'tsv2bam/PopA_02.bam'] mode_cond|paired_cond|rm_pcr_duplicates: True mode_cond|paired_cond|paired_select: mode_cond|mode_select: refbased add_log: True |
tsv2bam/PopA_01.bam tsv2bam/PopA_02.bam |
|
Test-5 |
input_bam: ['tsv2bam/PopA_01.bam', 'tsv2bam/PopA_02.bam'] popmap: denovo_map/popmap_cstacks.tsv mode_cond|paired_cond|paired_select: --unpaired mode_cond|advanced_cond|min_mapq: 23 mode_cond|advanced_cond|max_clipped: 0.23 mode_cond|advanced_cond|max_insert_len: 666 mode_cond|advanced_cond|details: True mode_cond|advanced_cond|phasing_cooccurrences_thr_min: 2 mode_cond|advanced_cond|phasing_cooccurrences_thr_max: 3 mode_cond|advanced_cond|phasing_dont_prune_hets: True mode_cond|advanced_cond|advanced_select: yes mode_cond|mode_select: refbased model_cond|var_alpha: 0.1 model_cond|gt_alpha: 0.1 model_cond|model: snp add_log: True |
name: value |
tsv2bam/PopA_01.bam tsv2bam/PopA_02.bam denovo_map/popmap_cstacks.tsv value |
Test-6 |
input_bam: ['tsv2bam/PopA_01.bam', 'tsv2bam/PopA_02.bam'] mode_cond|paired_cond|paired_select: --ignore-pe-reads mode_cond|mode_select: refbased add_log: True |
name: value |
tsv2bam/PopA_01.bam tsv2bam/PopA_02.bam value |