comparison umi-tools_whitelist.xml @ 10:3adbf2fa0928 draft

"planemo upload commit 28e58376e1d70e38276873a7d5e2ab44db88c2c0"
author iuc
date Tue, 27 Aug 2019 17:11:52 -0400
parents 095c349b4343
children 262026eb36a5
comparison
equal deleted inserted replaced
9:0c721837cbcf 10:3adbf2fa0928
1 <tool id="umi_tools_whitelist" name="UMI-tools whitelist" version="@VERSION@.1"> 1 <tool id="umi_tools_whitelist" name="UMI-tools whitelist" version="@VERSION@.0">
2 <description>Extract cell barcodes from FASTQ files</description> 2 <description>Extract cell barcodes from FASTQ files</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements" /> 6 <expand macro="requirements" />
7 <command detect_errors="exit_code"><![CDATA[ 7 <command detect_errors="exit_code"><![CDATA[
8 @COMMAND_LINK@ 8 @COMMAND_LINK@
9 9
10 umi_tools whitelist 10 umi_tools whitelist
11 --bc-pattern='$bc_pattern' 11 --bc-pattern='$bc_pattern'
12 --extract-method='$extract_method'
12 --subset-reads='$subset_reads' 13 --subset-reads='$subset_reads'
13 #if $input_type.type == 'single': 14 #if $input_type.type == 'single':
14 #if $gz: 15 #if $gz:
15 --stdin=input_single.gz 16 --stdin=input_single.gz
16 #else 17 #else
68 <param name="bc_pattern" argument="--bc-pattern" type="text" label="Barcode pattern for first read" 69 <param name="bc_pattern" argument="--bc-pattern" type="text" label="Barcode pattern for first read"
69 help="Use this option to specify the format of the UMI/barcode. Use Ns to 70 help="Use this option to specify the format of the UMI/barcode. Use Ns to
70 represent the random positions and Xs to indicate the bc positions. 71 represent the random positions and Xs to indicate the bc positions.
71 Bases with Ns will be extracted and added to the read name. Remaining 72 Bases with Ns will be extracted and added to the read name. Remaining
72 bases, marked with an X will be reattached to the read."> 73 bases, marked with an X will be reattached to the read.">
74 <expand macro="barcode_sanitizer" />
75 </param>
76 <param name="extract_method" argument="--extract-method" type="select" label="Barcode Extraction Method"
77 help="If bracketed expressions are used in the above barcode pattern, then set this to 'regex'. Otherwise leave as 'string'" >
78 <option value="string" selected="true" />
79 <option value="regex" />
73 </param> 80 </param>
74 <param name="method" argument="--method" type="select" label="Count reads or UMIs" 81 <param name="method" argument="--method" type="select" label="Count reads or UMIs"
75 help="Many published protocols rank CBs by the number of reads the CBs appear in. However you could also use the number of unique UMIs a CB is associated with. Note that this is still and approximation to the number of transcripts captured because the same UMI could be associated with two different transcripts and be counted as independent." > 82 help="Many published protocols rank CBs by the number of reads the CBs appear in. However you could also use the number of unique UMIs a CB is associated with. Note that this is still and approximation to the number of transcripts captured because the same UMI could be associated with two different transcripts and be counted as independent." >
76 <option value="reads" selected="true" /> 83 <option value="reads" selected="true" />
77 <option value="umis" /> 84 <option value="umis" />
78 </param> 85 </param>
79
80 <param argument="--3prime" name="prime3" type="boolean" label="Is barcode on 3' end of the read?" 86 <param argument="--3prime" name="prime3" type="boolean" label="Is barcode on 3' end of the read?"
81 truevalue="--3prime" falsevalue="" 87 truevalue="--3prime" falsevalue=""
82 help="By default the barcode is assumed to be on the 5' end of the read, but 88 help="By default the barcode is assumed to be on the 5' end of the read, but
83 use this option to specify that it is on the 3' end instead." /> 89 use this option to specify that it is on the 3' end instead." />
84 <param name="subset_reads" argument="--subset-reads" type="integer" min="0" value="0" label="Use the first N reads to automatically identify the true cell barcodes." /> 90 <param name="subset_reads" argument="--subset-reads" type="integer" min="0" value="0" label="Use the first N reads to automatically identify the true cell barcodes." />
89 </param> 95 </param>
90 <when value="defaults"/> 96 <when value="defaults"/>
91 <when value="advanced"> 97 <when value="advanced">
92 <param name="set_cell_number" type="integer" min="0" value="0" label="Specify the number of cell barcodes to accept" /> 98 <param name="set_cell_number" type="integer" min="0" value="0" label="Specify the number of cell barcodes to accept" />
93 <param name="expect_cells" type="integer" min="0" value="0" label="Prior expectation on the upper limit on the number of cells sequenced" /> 99 <param name="expect_cells" type="integer" min="0" value="0" label="Prior expectation on the upper limit on the number of cells sequenced" />
94 <param name="error_correct_thresh" type="integer" min="0" value="0" label="Hamming distance for correction of barcodes to whilelist barcodes. Set to zero to generate no error correcting metrics." /> 100 <param name="error_correct_thresh" type="integer" min="0" value="0" label="Hamming distance for correction of barcodes to whitelist barcodes. Set to zero to generate no error correcting metrics." />
95 </when> 101 </when>
96 </conditional> 102 </conditional>
97 <param argument="--log" type="boolean" label="Output log?" truevalue="--log" falsevalue="" 103 <param argument="--log" type="boolean" label="Output log?" truevalue="--log" falsevalue=""
98 help="Choose if you want to generate a text file containing logging information." /> 104 help="Choose if you want to generate a text file containing logging information." />
99
100 </inputs> 105 </inputs>
101 <outputs> 106 <outputs>
102 <data name="out_whitelist" format="tabular" label="${tool.name} on ${on_string}: Whitelist"/> 107 <data name="out_whitelist" format="tabular" label="${tool.name} on ${on_string}: Whitelist"/>
103 <data name="out_log" format="txt" label="${tool.name} on ${on_string}: logfile" > 108 <data name="out_log" format="txt" label="${tool.name} on ${on_string}: logfile" >
104 <filter>log</filter> 109 <filter>log</filter>
106 <data name="out_html_report" format="html" label="${tool.name} on ${on_string}: Webpage" /> 111 <data name="out_html_report" format="html" label="${tool.name} on ${on_string}: Webpage" />
107 <data name="out_thresh" format="tabular" label="${tool.name} on ${on_string}: TSV Cell Thresholds" /> 112 <data name="out_thresh" format="tabular" label="${tool.name} on ${on_string}: TSV Cell Thresholds" />
108 </outputs> 113 </outputs>
109 <tests> 114 <tests>
110 <test expect_num_outputs="3"> 115 <test expect_num_outputs="3">
111 <param name="type" value="single" /> 116 <conditional name="input_type" >
112 <param name="input_single" value="t_R2.fastq.gz" ftype="fastq" /> 117 <param name="type" value="single" />
118 <param name="input_single" value="t_R2.fastq.gz" ftype="fastq.gz" />
119 </conditional>
113 <param name="bc_pattern" value="CCCCCCCCNNNNNNNN" /> 120 <param name="bc_pattern" value="CCCCCCCCNNNNNNNN" />
114 <param name="method" value="reads" /> 121 <param name="method" value="reads" />
115 <param name="prime3" value="true" /> 122 <param name="prime3" value="true" />
116 <output name="out_whitelist" file="out_wl_single.txt" lines_diff="40" /> 123 <output name="out_whitelist" file="out_wl_single.txt" lines_diff="40" />
117 <output name="out_thresh" file="out_wl_single.tresh.tab" /> 124 <output name="out_thresh" file="out_wl_single.tresh.tab" />
118 <output name="out_html_report" file="out_wl_single.html" /> 125 <output name="out_html_report" file="out_wl_single.html" />
119 </test> 126 </test>
120 <test expect_num_outputs="4"> 127 <test expect_num_outputs="4">
121 <param name="type" value="paired" /> 128 <conditional name="input_type" >
122 <param name="input_read1" value="t_R1.fastq.gz" ftype="fastq" /> 129 <param name="type" value="paired" />
123 <param name="input_read2" value="t_R2.fastq.gz" ftype="fastq" /> 130 <param name="input_read1" value="t_R1.fastq.gz" ftype="fastq.gz" />
131 <param name="input_read2" value="t_R2.fastq.gz" ftype="fastq.gz" />
132 </conditional>
124 <param name="barcode_select" value="first_read_only" /> 133 <param name="barcode_select" value="first_read_only" />
125 <param name="bc_pattern" value="CCCNNNNNNNNXXXXX" /> 134 <param name="bc_pattern" value="CCCNNNNNNNNXXXXX" />
126 <param name="bc_pattern2" value="CCCCCCCCNNNNNNNN" /> 135 <param name="bc_pattern2" value="CCCCCCCCNNNNNNNN" />
127 <param name="method" value="reads" /> 136 <param name="method" value="reads" />
128 <param name="prime3" value="false" /> 137 <param name="prime3" value="false" />
134 <output name="out_log" file="out_wl_paired.log" lines_diff="40" /> 143 <output name="out_log" file="out_wl_paired.log" lines_diff="40" />
135 <output name="out_html_report" file="out_wl_paired.html" /> 144 <output name="out_html_report" file="out_wl_paired.html" />
136 <output name="out_thresh" file="out_wl_paired.tresh.tab" /> 145 <output name="out_thresh" file="out_wl_paired.tresh.tab" />
137 </test> 146 </test>
138 <test expect_num_outputs="4"> <!-- As previous, identical outputs but paired collection input --> 147 <test expect_num_outputs="4"> <!-- As previous, identical outputs but paired collection input -->
139 <param name="type" value="paired_collection" /> 148 <conditional name="input_type" >
140 <param name="input_readpair" > 149 <param name="type" value="paired_collection" />
141 <collection type="paired"> 150 <param name="input_readpair" >
142 <element name="forward" ftype="fastq.gz" value="t_R1.fastq.gz" /> 151 <collection type="paired">
143 <element name="reverse" ftype="fastq.gz" value="t_R2.fastq.gz" /> 152 <element name="forward" ftype="fastq.gz" value="t_R1.fastq.gz" />
144 </collection> 153 <element name="reverse" ftype="fastq.gz" value="t_R2.fastq.gz" />
145 </param> 154 </collection>
155 </param>
156 </conditional>
146 <param name="barcode_select" value="first_read_only" /> 157 <param name="barcode_select" value="first_read_only" />
147 <param name="bc_pattern" value="CCCNNNNNNNNXXXXX" /> 158 <param name="bc_pattern" value="CCCNNNNNNNNXXXXX" />
148 <param name="bc_pattern2" value="CCCCCCCCNNNNNNNN" /> 159 <param name="bc_pattern2" value="CCCCCCCCNNNNNNNN" />
149 <param name="method" value="reads" /> 160 <param name="method" value="reads" />
150 <param name="prime3" value="false" /> 161 <param name="prime3" value="false" />
155 <output name="out_whitelist" file="out_wl_paired.txt" /> 166 <output name="out_whitelist" file="out_wl_paired.txt" />
156 <output name="out_log" file="out_wl_paired.log" lines_diff="40" /> 167 <output name="out_log" file="out_wl_paired.log" lines_diff="40" />
157 <output name="out_html_report" file="out_wl_paired.html" /> 168 <output name="out_html_report" file="out_wl_paired.html" />
158 <output name="out_thresh" file="out_wl_paired.tresh.tab" /> 169 <output name="out_thresh" file="out_wl_paired.tresh.tab" />
159 </test> 170 </test>
171 <!-- Error report on not accepting regex and lt and gt symbols -->
172 <test expect_num_outputs="3">
173 <conditional name="input_type" >
174 <param name="type" value="single" />
175 <param name="input_single" value="testYYY.40k.fastq.gz" ftype="fastq.gz" />
176 </conditional>
177 <param name="bc_pattern" value="(?P&#60;cell_1&#62;.{8,10})(?P&#60;discard_1&#62;ACTGGCCTGCGA){s&#60;=3}(?P&#60;cell_2&#62;.{9})(?P&#60;discard_2&#62;GGTAGCGGTGACA){s&#60;=3}(?P&#60;cell_3&#62;.{9})(?P&#60;umi_1&#62;.{8})T{3}.*" />
178 <param name="extract_method" value="regex" />
179 <param name="method" value="umis" />
180 <param name="prime3" value="true" />
181 <output name="out_whitelist" file="out_wl_user.single.txt" />
182 <output name="out_thresh" file="out_wl_user.single.tresh.tab" />
183 <output name="out_html_report" file="out_wl_user.single.html" />
184 </test>
160 </tests> 185 </tests>
161 <help><![CDATA[ 186 <help><![CDATA[
162 187
163 188
164 UMI-tools whitelist - Extract barcodes from fastq 189 UMI-tools whitelist - Extract barcodes from fastq