| Miscellaneous |
| Version lineage of this tool (guids ordered most recent to oldest) |
| toolshed.g2.bx.psu.edu/repos/devteam/samtools_stats/samtools_stats/2.0.9 (this tool) |
| toolshed.g2.bx.psu.edu/repos/devteam/samtools_stats/samtools_stats/2.0.8 |
| toolshed.g2.bx.psu.edu/repos/devteam/samtools_stats/samtools_stats/2.0.7 |
| toolshed.g2.bx.psu.edu/repos/devteam/samtools_stats/samtools_stats/2.0.6 |
| toolshed.g2.bx.psu.edu/repos/devteam/samtools_stats/samtools_stats/2.0.5 |
| toolshed.g2.bx.psu.edu/repos/devteam/samtools_stats/samtools_stats/2.0.4 |
| toolshed.g2.bx.psu.edu/repos/devteam/samtools_stats/samtools_stats/2.0.3 |
| toolshed.g2.bx.psu.edu/repos/devteam/samtools_stats/samtools_stats/2.0.2+galaxy2 |
| toolshed.g2.bx.psu.edu/repos/devteam/samtools_stats/samtools_stats/2.0.2+galaxy1 |
| toolshed.g2.bx.psu.edu/repos/devteam/samtools_stats/samtools_stats/2.0.2 |
| toolshed.g2.bx.psu.edu/repos/devteam/samtools_stats/samtools_stats/2.0.1 |
| toolshed.g2.bx.psu.edu/repos/devteam/samtools_stats/samtools_stats/2.0 |
| samtools_stats |
| Requirements (dependencies defined in the <requirements> tag set) |
| name | version | type |
| samtools | 1.22 | package |
| Additional information about this tool |
##compute the number of ADDITIONAL threads to be used by samtools (-@)
addthreads=\${GALAXY_SLOTS:-1} && (( addthreads-- )) &&
##prepare input and indices
ln -s '$input' infile &&
#if $input.is_of_type('bam'):
#if str( $input.metadata.bam_index ) != "None":
ln -s '${input.metadata.bam_index}' infile.bai &&
#else:
samtools index infile infile.bai &&
#end if
#elif $input.is_of_type('cram'):
#if str( $input.metadata.cram_index ) != "None":
ln -s '${input.metadata.cram_index}' infile.crai &&
#else:
samtools index infile infile.crai &&
#end if
#end if
## Make the user-selected reference genome, if any, accessible through
## a shell variable $reffa, index the reference if necessary, and make
## the fai-index file available through a shell variable $reffai.
## For a cached genome simply sets the shell variables to point to the
## genome file and its precalculated index.
## For a genome from the user's history, if that genome is a plain
## fasta file, the code creates a symlink in the pwd, creates the fai
## index file next to it, then sets the shell variables to point to the
## symlink and its index.
## For a fasta.gz dataset from the user's history, it tries the same,
## but this will only succeed if the file got compressed with bgzip.
## For a regular gzipped file samtools faidx will fail, in which case
## the code falls back to decompressing to plain fasta before
## reattempting the indexing.
## Indexing of a bgzipped file produces a regular fai index file *and*
## a compressed gzi file. The former is identical to the fai index of
## the uncompressed fasta.
## If the user has not selected a reference (it's an optional parameter
## in some samtools wrappers), a cheetah boolean use_ref is set to
## False to encode that fact.
#set use_ref=True
#if $addref_cond.addref_select == "history":
#if $addref_cond.ref.is_of_type('fasta'):
reffa="reference.fa" &&
ln -s '${addref_cond.ref}' \$reffa &&
samtools faidx \$reffa &&
#else:
reffa="reference.fa.gz" &&
ln -s '${addref_cond.ref}' \$reffa &&
{
samtools faidx \$reffa ||
{
echo "Failed to index compressed reference. Trying decompressed ..." 1>&2 &&
gzip -dc \$reffa > reference.fa &&
reffa="reference.fa" &&
samtools faidx \$reffa;
}
} &&
#end if
reffai=\$reffa.fai &&
#elif $addref_cond.addref_select == "cached":
## in case of cached the absolute path is used which allows to read
## a cram file without specifying the reference
reffa='${addref_cond.ref.fields.path}' &&
reffai=\$reffa.fai &&
#else
#set use_ref=False
#end if
samtools stats
#if $coverage_cond.coverage_select == 'yes':
--coverage ${coverage_cond.coverage_min},${coverage_cond.coverage_max},${coverage_cond.coverage_step}
#end if
${remove_dups}
#if str( $filter_by_flags.filter_flags ) == "filter":
#set $filter = $filter_by_flags.require_flags
#set $flags = 0
#if $filter
#set $flags = sum(map(int, str($filter).split(',')))
#end if
--required-flag $flags
#set $filter = $filter_by_flags.exclude_flags
#set $flags = 0
#if $filter
#set $flags = sum(map(int, str($filter).split(',')))
#end if
--filtering-flag $flags
#end if
#if str($gc_depth):
--GC-depth ${gc_depth}
#end if
#if str($insert_size):
--insert-size ${insert_size}
#end if
## #if $read_group
## -I '$read_group'
## #end if
#if str($read_length):
--read-length ${read_length}
#end if
#if str($most_inserts):
--most-inserts ${most_inserts}
#end if
#if str($trim_quality):
--trim-quality ${trim_quality}
#end if
#if $use_ref:
--ref-seq "\$reffa"
#end if
## currently not implemented in Galaxy
## generates STR_VALUE.bamstat where STR is given by -P and VALUE is a value of the TAG given by -S
## needs some discover data sets action...
## -P, --split-prefix STR
## -S, --split TAG
$sparse
#if $cond_region.select_region == 'tab':
-t '$cond_region.targetregions'
#end if
$remove_overlaps
#if str($cov_threshold):
-g $cov_threshold
#end if
-@ \$addthreads
infile
#if $cond_region.select_region == 'text':
#for $i, $x in enumerate($cond_region.regions_repeat):
'${x.region}'
#end for
#end if
> '$output'
#if $split_output_cond.split_output_selector == "yes":
#set outputs_to_split = str($split_output_cond.generate_tables).split(',')
&& mkdir split
#for s in str($split_output_cond.generate_tables).split(','):
&& name=`cat '$output' | grep '\^$s' | cut -d'.' -f 1 | sed 's/^# //'`
&& awk '/\^/{out=0} /\^$s/{out=1} {if(out==1){print $0}}' '$output' | sed 's/Use `grep .*` to extract this part.//' | sed 's/$s\t//' > "split/\$name.tab"
#end for
#end if
| Functional tests |
| name | inputs | outputs | required files |
| Test-1 |
input: 1_map_cigar.sam addref_cond|ref: test.fa addref_cond|addref_select: history |
name: value |
1_map_cigar.sam test.fa value |
| Test-2 |
input: 2_equal_cigar_full_seq.sam addref_cond|ref: test addref_cond|addref_select: cached |
name: value |
2_equal_cigar_full_seq.sam value |
| Test-3 |
input: 5_insert_cigar.sam insert_size: 0 addref_cond|ref: test.fa addref_cond|addref_select: history |
name: value |
5_insert_cigar.sam test.fa value |
| Test-4 |
input: 11_target.sam addref_cond|addref_select: no cond_region|targetregions: 11.stats.targets cond_region|select_region: tab |
name: value |
11_target.sam 11.stats.targets value |
| Test-5 |
input: 11_target.bam addref_cond|addref_select: no cond_region|regions_repeat_0|region: ref1:10-24 cond_region|regions_repeat_1|region: ref1:30-46 cond_region|regions_repeat_2|region: ref1:39-56 cond_region|select_region: text |
name: value |
11_target.bam value |
| Test-6 |
input: 11_target.sam addref_cond|addref_select: no cond_region|targetregions: 11.stats.targets cond_region|select_region: tab cov_threshold: 4 |
name: value |
11_target.sam 11.stats.targets value |
| Test-7 |
input: 11_target.bam addref_cond|addref_select: no cond_region|regions_repeat_0|region: ref1:10-24 cond_region|regions_repeat_1|region: ref1:30-46 cond_region|regions_repeat_2|region: ref1:39-56 cond_region|select_region: text cov_threshold: 4 |
name: value |
11_target.bam value |
| Test-8 |
input: 12_overlaps.bam addref_cond|addref_select: no cond_region|targetregions: 12_3reads.bed cond_region|select_region: tab |
name: value |
12_overlaps.bam 12_3reads.bed value |
| Test-9 |
input: 12_overlaps.bam addref_cond|addref_select: no cond_region|targetregions: 12_3reads.bed cond_region|select_region: tab remove_overlaps: True |
name: value |
12_overlaps.bam 12_3reads.bed value |
| Test-10 |
input: 12_overlaps.bam addref_cond|addref_select: no cond_region|targetregions: 12_2reads.bed cond_region|select_region: tab |
name: value |
12_overlaps.bam 12_2reads.bed value |
| Test-11 |
input: 12_overlaps.bam addref_cond|addref_select: no cond_region|targetregions: 12_2reads.bed cond_region|select_region: tab remove_overlaps: True |
name: value |
12_overlaps.bam 12_2reads.bed value |
| Test-12 |
input: samtools_stats_input.bam addref_cond|ref: samtools_stats_ref.fa addref_cond|addref_select: history |
name: value |
samtools_stats_input.bam samtools_stats_ref.fa value |
| Test-13 |
input: samtools_stats_input.bam split_output_cond|generate_tables: ['SN', 'MPC', 'GCC'] split_output_cond|split_output_selector: yes addref_cond|ref: samtools_stats_ref.fa addref_cond|addref_select: history |
samtools_stats_input.bam samtools_stats_ref.fa |