Mercurial > repos > artbio > repenrich2
comparison repenrich2.xml @ 0:4905a332a094 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 73721d980c1f422dc880d80f61e44d270992e537
author | artbio |
---|---|
date | Sat, 20 Apr 2024 11:56:53 +0000 |
parents | |
children | 6d59fbca2db4 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4905a332a094 |
---|---|
1 <tool id="repenrich2" name="RepEnrich" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
2 <description>Repeat Element Profiling</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="repenrich_requirements"/> | |
7 <stdio> | |
8 <exit_code range="1:" level="fatal" description="Tool exception" /> | |
9 </stdio> | |
10 <command detect_errors="exit_code"><![CDATA[ | |
11 #import re | |
12 ## uncompress fastq.gz or fastqsanger.gz is not required with bowtie2 | |
13 #if $seq_method.seq_method_list == "single-read": | |
14 ln -f -s '$seq_method.input_fastq' 'input.fastq' && | |
15 #elif $seq_method.seq_method_list == 'paired_collection': | |
16 ln -f -s '$seq_method.input_fastq.forward' 'input.fastq' && | |
17 ln -f -s '$seq_method.input_fastq.reverse' 'input_2.fastq' && | |
18 #else: | |
19 ln -f -s '$seq_method.input_fastq' 'input.fastq' && | |
20 ln -f -s '$seq_method.input2_fastq' 'input_2.fastq' && | |
21 #end if | |
22 | |
23 #if $refGenomeSource.genomeSource == "history": | |
24 bowtie2-build --threads \${GALAXY_SLOTS:-4} -f $refGenomeSource.genome genome 1>/dev/null && | |
25 ln -s -f '$refGenomeSource.genome' 'genome.fa' && | |
26 #set index_path = 'genome' | |
27 #else: | |
28 #set index_path = $refGenomeSource.index.fields.path | |
29 bowtie-inspect $index_path > genome.fa && | |
30 #end if | |
31 | |
32 python $__tool_directory__/RepEnrich2_setup.py | |
33 --annotation_file '$repeatmasker' | |
34 --genomefasta 'genome.fa' | |
35 --cpus "\${GALAXY_SLOTS:-4}" && | |
36 | |
37 #if $seq_method.seq_method_list == "single-read": | |
38 bowtie2 -x $index_path -p \${GALAXY_SLOTS:-4} input.fastq | |
39 | samtools sort -@ "\${GALAXY_SLOTS:-4}" -T tmp -O bam -o aligned.bam 2>&1 && | |
40 samtools view -@ "\${GALAXY_SLOTS:-4}" -F 4 -b -q 38 aligned.bam -o unique.bam && | |
41 samtools view -@ "\${GALAXY_SLOTS:-4}" -h -F 4 -b aligned.bam \ | |
42 | samtools view -@ "\${GALAXY_SLOTS:-4}" -U -b -q 38 - \ | |
43 | bedtools bamtofastq -i /dev/stdin -fq multimap.fastq && | |
44 #else: | |
45 bowtie2 -x $index_path -p \${GALAXY_SLOTS:-4} -1 input.fastq -2 input_2.fastq | |
46 | samtools sort -@ "\${GALAXY_SLOTS:-4}" -T tmp -O bam -o aligned.bam 2>&1 && | |
47 samtools view -@ "\${GALAXY_SLOTS:-4}" -f 3 -b -q 38 aligned.bam -o unique.bam && | |
48 samtools view -@ "\${GALAXY_SLOTS:-4}" -f 3 -b aligned.bam \ | |
49 | samtools view -@ "\${GALAXY_SLOTS:-4}" -U -b -q 38 - \ | |
50 | samtools sort -@ "\${GALAXY_SLOTS:-4}" -n - - | |
51 | bedtools bamtofastq -i /dev/stdin -fq multimap_1.fastq -fq2 multimap_2.fastq && | |
52 #end if | |
53 samtools index unique.bam && | |
54 | |
55 | |
56 python $__tool_directory__/RepEnrich2.py | |
57 --annotation_file $repeatmasker | |
58 --alignment_bam unique.bam | |
59 --cpus "\${GALAXY_SLOTS:-4}" | |
60 #if $seq_method.seq_method_list == "single-read": | |
61 --fastqfile multimap.fastq | |
62 #else: | |
63 --fastqfile multimap_1.fastq | |
64 --fastqfile2 multimap_2.fastq | |
65 #end if | |
66 ]]></command> | |
67 <!-- basic error handling --> | |
68 <inputs> | |
69 <conditional name="seq_method"> | |
70 <param help="Paired-end or single-read sequencing" label="Sequencing method" name="seq_method_list" type="select"> | |
71 <option selected="True" value="single-read">Single-read sequencing</option> | |
72 <option value="paired-end">Paired-end sequencing</option> | |
73 <option value="paired_collection">Paired-end Dataset Collection</option> | |
74 </param> | |
75 <when value="single-read"> | |
76 <param format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" label="Single-reads" name="input_fastq" type="data" help="accepted formats: fastq, fastqsanger" /> | |
77 </when> | |
78 <when value="paired-end"> | |
79 <param format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" label="1st paired-end sequencing dataset" name="input_fastq" type="data" help="accepted formats: fastq, fastqsanger" /> | |
80 <param format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" label="2nd paired-end sequencing dataset" name="input2_fastq" type="data" help="accepted formats: fastq, fastqsanger" /> | |
81 </when> | |
82 <when value="paired_collection"> | |
83 <param name="input_fastq" format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" type="data_collection" collection_type="paired" label="Paired Collection" help="Must be of datatype "fastqsanger" or "fasta"" /> | |
84 </when> | |
85 </conditional> | |
86 <conditional name="refGenomeSource"> | |
87 <param help="Built-ins were indexed using default options" label="Will you select a reference genome from your history or use a built-in index?" name="genomeSource" type="select"> | |
88 <option value="indexed">Use a built-in index</option> | |
89 <option value="history">Use one from the history</option> | |
90 </param> | |
91 <when value="indexed"> | |
92 <param help="if your genome of interest is not listed - contact instance administrator" label="Select a DNA reference index" name="genome" type="select"> | |
93 <options from_data_table="bowtie2_indexes" /> | |
94 </param> | |
95 </when> | |
96 <when value="history"> | |
97 <param format="fasta" label="Select a fasta file, to serve as index reference" name="genome" type="data" /> | |
98 </when> | |
99 </conditional> | |
100 | |
101 <param format="txt" label="RepeatMasker description file" name="repeatmasker" type="data" help="see help section"/> | |
102 </inputs> | |
103 | |
104 <outputs> | |
105 <data format="tabular" name="class_fraction_counts" label="RepEnrich on ${on_string}: class fraction counts" from_work_dir="class_fraction_counts.tsv" /> | |
106 <data format="tabular" name="family_fraction_counts" label="RepEnrich on ${on_string}: family fraction counts" from_work_dir="family_fraction_counts.tsv" /> | |
107 <data format="tabular" name="fraction_counts" label="RepEnrich on ${on_string}: fraction counts" from_work_dir="fraction_counts.tsv" /> | |
108 </outputs> | |
109 | |
110 <tests> | |
111 <test> | |
112 <param name="seq_method_list" value="single-read"/> | |
113 <param name="input_fastq" value="chrY-500k.R1.fastqsanger.gz" ftype="fastq.gz"/> | |
114 <param name="genomeSource" value="history"/> | |
115 <param name="genome" value="chrY-1-500k.fa" ftype="fasta"/> | |
116 <param name="repeatmasker" value="chrY-1-500k.fa.out" ftype="txt"/> | |
117 <output name="class_fraction_counts" file="chrY_single_class_fraction_counts.tab" ftype="tabular"/> | |
118 <output name="family_fraction_counts" file="chrY_single_family_fraction_counts.tab" ftype="tabular"/> | |
119 <output name="fraction_counts" file="chrY_single_fraction_counts.tab" ftype="tabular"/> | |
120 </test> | |
121 <test> | |
122 <param name="seq_method_list" value="paired-end"/> | |
123 <param name="input_fastq" value="chrY-500k.R1.fastqsanger.gz" ftype="fastq.gz"/> | |
124 <param name="input2_fastq" value="chrY-500k.R2.fastqsanger.gz" ftype="fastq.gz"/> | |
125 <param name="genomeSource" value="history"/> | |
126 <param name="genome" value="chrY-1-500k.fa" ftype="fasta"/> | |
127 <param name="repeatmasker" value="chrY-1-500k.fa.out" ftype="txt"/> | |
128 <output name="class_fraction_counts" file="chrY_paired_class_fraction_counts.tab" ftype="tabular"/> | |
129 <output name="family_fraction_counts" file="chrY_paired_family_fraction_counts.tab" ftype="tabular"/> | |
130 <output name="fraction_counts" file="chrY_paired_fraction_counts.tab" ftype="tabular"/> | |
131 </test> | |
132 </tests> | |
133 | |
134 <help> | |
135 | |
136 **What it does** | |
137 | |
138 Reads are mapped to the genome using the Bowtie2 aligner. Reads mapping uniquely to the | |
139 genome are assigned to subfamilies of repetitive elements based on their degree of overlap | |
140 to RepeatMasker annotated genomic instances of each repetitive element subfamily. | |
141 | |
142 Reads mapping to multiple locations are separately mapped to repetitive element assemblies | |
143 – referred to as repetitive element psuedogenomes – built from RepeatMasker annotated | |
144 genomic instances of repetitive element subfamilies. | |
145 | |
146 RepEnrich then return tables of counts merged from both strategies, that can be further | |
147 processed in statistical analysis for differential expression. For information on the method, | |
148 see the `original publication`_. | |
149 | |
150 .. _original publication: https://bmcgenomics.biomedcentral.com/articles/10.1186/1471-2164-15-583 | |
151 | |
152 **Inputs** | |
153 | |
154 *Reference genome* : reference genome in fasta format | |
155 | |
156 *Sequencing dataset*: Single-reads or Paired-end sequencing datasets in fastq format. | |
157 | |
158 *RepeatMasker description file*: a txt repeatmasker file which can be downloaded from | |
159 https://www.repeatmasker.org/genomicDatasets/RMGenomicDatasets.html | |
160 | |
161 This file looks like: | |
162 | |
163 <![CDATA[ | |
164 | |
165 SW perc perc perc query position in query matching repeat position in repeat | |
166 | |
167 score div. del. ins. sequence begin end (left) repeat class/family begin end (left) ID | |
168 | |
169 16 20.2 5.9 0.0 chrM 1211 1261 (18263) + (TTTTA)n Simple_repeat 1 54 (0) 84486 | |
170 | |
171 13 23.9 2.2 2.2 chrM 2014 2059 (17465) + (TTA)n Simple_repeat 1 46 (0) 84487 | |
172 | |
173 24 18.8 5.3 2.6 chrM 3924 3999 (15525) + (TAT)n Simple_repeat 1 78 (0) 84488 | |
174 | |
175 18 4.5 0.0 0.0 chrM 5961 5983 (13541) + (AT)n Simple_repeat 1 23 (0) 84489 | |
176 | |
177 13 25.9 4.0 4.0 chrM 6247 6320 (13204) + (ATTTAT)n Simple_repeat 1 74 (0) 84490 | |
178 | |
179 11 14.6 7.5 2.4 chrM 8783 8822 (10702) + (CTAATT)n Simple_repeat 1 42 (0) 84491 | |
180 | |
181 17 19.0 0.0 8.6 chrM 9064 9126 (10398) + A-rich Low_complexity 1 58 (0) 84492 | |
182 | |
183 13 21.0 5.9 1.9 chrM 11723 11773 (7751) + (ATA)n Simple_repeat 1 53 (0) 84493 | |
184 | |
185 66 20.4 12.3 12.3 chrM 12823 13001 (6523) C LSU-rRNA_Cel rRNA (1) 2431 2253 84494 | |
186 | |
187 16 16.6 0.0 2.9 chrM 14361 14396 (5128) + (ATT)n Simple_repeat 1 35 (0) 84495 | |
188 | |
189 44 2.4 0.0 0.0 chrM 15966 16007 (3517) + (TA)n Simple_repeat 1 42 (0) 84496 | |
190 | |
191 35 5.3 0.0 0.0 chrM 16559 16597 (2927) + (AT)n Simple_repeat 1 39 (0) 84497 | |
192 | |
193 36 2.9 0.0 0.0 chrM 16922 16956 (2568) + (AT)n Simple_repeat 1 35 (0) 84498 | |
194 | |
195 37 0.0 0.0 0.0 chrM 17040 17071 (2453) + (TA)n Simple_repeat 1 32 (0) 84499 | |
196 | |
197 20 4.3 0.0 0.0 chrM 17417 17440 (2084) + (T)n Simple_repeat 1 24 (0) 84500 | |
198 | |
199 31 6.9 6.3 1.5 chrM 17451 17513 (2011) + (TA)n Simple_repeat 1 66 (0) 84501 | |
200 | |
201 26 17.0 0.0 0.0 chrM 19469 19514 (10) + A-rich Low_complexity 1 46 (0) 84502 | |
202 | |
203 ]]> | |
204 | |
205 Users may filter this file so that it contains only desired items (for instance only satellites, repeats and transposons) | |
206 | |
207 **Outputs** | |
208 | |
209 (1) Fraction counts, (2) Family fraction counts and (3) Class fraction counts are returned | |
210 in tabular format for further statistical tests, differential expression analysis or graphics. | |
211 | |
212 **RepEnrich2** | |
213 | |
214 .. class:: warningmark | |
215 | |
216 the repenrich2 Galaxy wrapper was derived from the repenrich Galaxy wrapper | |
217 | |
218 repenrich2 uses bowtie2 for all alignment operations. We refer exclusively to our | |
219 `GitHub repository`_ for code review. | |
220 | |
221 .. _GitHub repository: https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 | |
222 | |
223 **Execution time** | |
224 | |
225 .. class:: warningmark | |
226 | |
227 This tool includes time-consuming steps to index the reference genome, index repeat | |
228 sequences and to align reads to these indexes. | |
229 | |
230 </help> | |
231 | |
232 <citations> | |
233 <citation type="doi">10.1186/1471-2164-15-583</citation> | |
234 </citations> | |
235 </tool> |