0
|
1 <tool id="clc_mapper" name="CLC Mapper" version="0.0.2">
|
|
2 <description>Maps reads giving a SAM/BAM file</description>
|
|
3 <requirements>
|
|
4 <requirement type="binary">clc_mapper</requirement>
|
|
5 <requirement type="binary">clc_cas_to_sam</requirement>
|
|
6 <requirement type="binary">samtools</requirement>
|
|
7 <requirement type="package" version="0.1.19">samtools</requirement>
|
|
8 </requirements>
|
|
9 <version_command>\${CLC_ASSEMBLY_CELL:-/mnt/apps/clcBio/clc-assembly-cell-4.1.0-linux_64/}clc_mapper | grep -i version</version_command>
|
|
10 <command>echo Mapping reads with clc_mapper...
|
|
11 && \${CLC_ASSEMBLY_CELL:-/mnt/apps/clcBio/clc-assembly-cell-4.1.0-linux_64/}clc_mapper
|
|
12 #for $ref in $references
|
|
13 #if str($ref.ref_type)=="circular"
|
|
14 -d -z "$ref.ref_file"
|
|
15 #else
|
|
16 -d "$ref.ref_file"
|
|
17 #end if
|
|
18 #end for
|
|
19 #for $rg in $read_group
|
|
20 ##--------------------------------------
|
|
21 #if str($rg.segments.type) == "paired"
|
|
22 -p $rg.segments.placement $rg.segments.dist_mode $rg.segments.min_size $rg.segments.max_size -q -i "$rg.segments.filename1" "$rg.segments.filename2"
|
|
23 #end if
|
|
24 ##--------------------------------------
|
|
25 #if str($rg.segments.type) == "interleaved"
|
|
26 -p $rg.segments.placement $rg.segments.dist_mode $rg.segments.min_size $rg.segments.max_size -q "$rg.segments.filename"
|
|
27 #end if
|
|
28 ##--------------------------------------
|
|
29 #if str($rg.segments.type) == "none"
|
|
30 -p no -q
|
|
31 #for $f in $rg.segments.filenames
|
|
32 "$f"
|
|
33 #end for
|
|
34 #end if
|
|
35 ##--------------------------------------
|
|
36 #end for
|
|
37 -o "temp_job.cas"
|
|
38 --cpus \${GALAXY_SLOTS:-4}
|
|
39 ## TODO - filtering out the progress lines seems to mess up the multiple commands
|
|
40 ## | grep -v "^Progress: "
|
|
41 ##===========================================
|
|
42 ## TODO - I've required all the input in Sanger FASTQ format (or FASTA) so can
|
|
43 ## use the offset 33, rather then the CLCbio default of 64 which is only for
|
|
44 ## obsolete Illumina FASTQ files. Really need this option per input file...
|
|
45 && echo Converting CAS file to BAM with clc_cas_to_sam...
|
|
46 && /mnt/apps/clcBio/clc-assembly-cell-4.1.0-linux_64/clc_cas_to_sam --cas "temp_job.cas" -o "temp_job.bam" --no-progress --qualityoffset 33
|
|
47 && rm "temp_job.cas"
|
|
48 ##===========================================
|
|
49 && echo Sorting BAM file with samtools...
|
|
50 && samtools sort "temp_job.bam" "temp_sorted"
|
|
51 && mv "temp_sorted.bam" "$out_bam"
|
|
52 && echo Indexing BAM file with samtools...
|
|
53 && samtools index "$out_bam"</command>
|
|
54 <stdio>
|
|
55 <!-- Assume anything other than zero is an error -->
|
|
56 <exit_code range="1:" />
|
|
57 <exit_code range=":-1" />
|
|
58 </stdio>
|
|
59 <!-- Job splitting with merge via clc_join_mappings? -->
|
|
60 <inputs>
|
|
61 <!-- Support linear and circular references (-z) -->
|
|
62 <repeat name="references" title="Reference Sequence" min="1">
|
|
63 <param name="ref_file" type="data" format="fasta" required="true" label="Reference sequence(s) (FASTA)" />
|
|
64 <param name="ref_type" type="select" label="Reference type">
|
|
65 <option value="linear">Linear (e.g. most chromosomes)</option>
|
|
66 <option value="circular">Circular (e.g. bacterial chromosomes, mitochondria)</option>
|
|
67 </param>
|
|
68 </repeat>
|
|
69 <repeat name="read_group" title="Read Group" min="1">
|
|
70 <conditional name="segments">
|
|
71 <param name="type" type="select" label="Are these paired reads?">
|
|
72 <option value="paired">Paired reads (as two files)</option>
|
|
73 <option value="interleaved">Paired reads (as one interleaved file)</option>
|
|
74 <option value="none">Unpaired reads (single or orphan reads)</option>
|
|
75 </param>
|
|
76 <when value="paired">
|
|
77 <param name="placement" type="select" label="Pairing type (segment placing)">
|
|
78 <option value="fb">---> <--- (e.g. Sanger capillary or Solexa/Illumina paired-end library)</option>
|
|
79 <option value="bf"><--- ---> (e.g. Solexa/Illumina mate-pair library)</option>
|
|
80 <option value="ff">---> ---></option>
|
|
81 <option value="bb"><--- <---</option>
|
|
82 </param>
|
|
83 <param name="dist_mode" type="select" label="How is the fragment distance measured?">
|
|
84 <option value="ss">Start to start (e.g. Sanger capillary or Solexa/Illumina libraries)</option>
|
|
85 <option value="se">Start to end</option>
|
|
86 <option value="es">End to start</option>
|
|
87 <option value="ee">End to end</option>
|
|
88 </param>
|
|
89 <!-- TODO - min/max validation done via the <code> tag? -->
|
|
90 <param name="min_size" type="integer" optional="false" min="0" value=""
|
|
91 label="Minimum size of 'good' DNA templates in the library preparation" />
|
|
92 <param name="max_size" type="integer" optional="false" min="0" value=""
|
|
93 label="Maximum size of 'good' DNA templates in the library preparation" />
|
|
94 <param name="filename1" type="data" format="fastqsanger,fasta" required="true" label="Read file one"
|
|
95 help="FASTA or Sanger FASTQ accepted." />
|
|
96 <param name="filename2" type="data" format="fastqsanger,fasta" required="true" label="Read file two"
|
|
97 help="FASTA or Sanger FASTQ accepted." />
|
|
98 </when>
|
|
99 <when value="interleaved">
|
|
100 <param name="placement" type="select" label="Pairing type (segment placing)">
|
|
101 <option value="fb">---> <--- (e.g. Sanger capillary or Solexa/Illumina paired-end library)</option>
|
|
102 <option value="bf"><--- ---> (e.g. Solexa/Illumina mate-pair library)</option>
|
|
103 <option value="ff">---> ---></option>
|
|
104 <option value="bb"><-- <--</option>
|
|
105 </param>
|
|
106 <param name="dist_mode" type="select" label="How is the fragment distance measured?">
|
|
107 <option value="ss">Start to start (e.g. Sanger capillary or Solexa/Illumina libraries)</option>
|
|
108 <option value="se">Start to end</option>
|
|
109 <option value="es">End to start</option>
|
|
110 <option value="ee">End to end</option>
|
|
111 </param>
|
|
112 <!-- TODO - min/max validation done via the <code> tag? -->
|
|
113 <param name="min_size" type="integer" optional="false" min="0" value=""
|
|
114 label="Minimum size of 'good' DNA templates in the library preparation" />
|
|
115 <param name="max_size" type="integer" optional="false" min="0" value=""
|
|
116 label="Maximum size of 'good' DNA templates in the library preparation" />
|
|
117 <param name="filename" type="data" format="fastqsanger,fasta" required="true" label="Interleaved read file"
|
|
118 help="FASTA or Sanger FASTQ accepted."/>
|
|
119 </when>
|
|
120 <when value="none">
|
|
121 <param name="filenames" type="data" format="fastqsanger,fasta" multiple="true" required="true" label="Read file(s)"
|
|
122 help="Multiple files allowed, for example several files of orphan reads. FASTA or Sanger FASTQ accepted." />
|
|
123 </when>
|
|
124 </conditional>
|
|
125 </repeat>
|
|
126 <!-- Length fraction (-l), default 0.5 -->
|
|
127 <!-- Similarity (-s), default 0.8 -->
|
|
128 <!-- Option for unmapped reads via clc_unmapped_reads ? -->
|
|
129 </inputs>
|
|
130 <outputs>
|
|
131 <data name="out_bam" format="bam" label="CLCbio mapping (BAM)" />
|
|
132 </outputs>
|
|
133 <tests>
|
|
134 <!-- CLC's SAM header @PG and @RG lines include filenames so will change -->
|
|
135 <test>
|
|
136 <param name="ref_file" value="NC_010642.fna" ftype="fasta" />
|
|
137 <param name="ref_type" value="circular" />
|
|
138 <param name="read_group_0|segments|type" value="interleaved" />
|
|
139 <param name="read_group_0|segments|placement" value="fb" />
|
|
140 <param name="read_group_0|segments|dist_mode" value="ss" />
|
|
141 <param name="read_group_0|segments|min_size" value="1" />
|
|
142 <param name="read_group_0|segments|max_size" value="1000" />
|
|
143 <param name="read_group_0|segments|dist_mode" value="ss" />
|
|
144 <param name="read_group_0|segments|filename" value="SRR639755_mito_pairs.fastq.gz" ftype="fastqsanger" />
|
|
145 <output name="out_fasta" file="SRR639755_mito_pairs_vs_NC_010642_clc.bam" ftype="bam" lines_diff="4"/>
|
|
146 </test>
|
|
147 </tests>
|
|
148 <help>
|
|
149
|
|
150 **What it does**
|
|
151
|
|
152 Runs the CLCbio tool ``clc_mapper`` which produces a proprietary binary
|
|
153 CAS format file, which is immediately processed using ``clc_cas_to_sam``
|
|
154 to generate a self-contained standard BAM file, which is then sorted
|
|
155 and indexed using ``samtools``.
|
|
156
|
|
157
|
|
158 **Citation**
|
|
159
|
|
160 If you use this Galaxy tool in work leading to a scientific publication please
|
|
161 cite this wrapper as:
|
|
162
|
|
163 Peter J.A. Cock (2013), Galaxy wrapper for the CLC Assembly Cell suite from CLCbio
|
|
164 http://toolshed.g2.bx.psu.edu/view/peterjc/clc_assembly_cell
|
|
165
|
|
166 This wrapper is available to install into other Galaxy Instances via the Galaxy
|
|
167 Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/clc_assembly_cell
|
|
168 </help>
|
|
169 </tool>
|