annotate cutadapt.xml @ 0:8b064ea16722

Initial version with multiple adapter support
author Lance Parsons <lparsons@princeton.edu>
date Fri, 13 May 2011 15:54:01 -0400
parents
children f6b94b76d16b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
1 <tool id="cutadapt" name="Remove adapter sequences" version="0.9.3">
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
2 <description>from high-throughput sequence data</description>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
3 <requirements>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
4 <requirement type="python-module">cutadapt</requirement>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
5 </requirements>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
6
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
7 <command interpreter="python">cutadapt_galaxy_wrapper.py
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
8 #if $input.extension.startswith( "fastq"):
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
9 --format=fastq
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
10 #else
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
11 --format=$input.extension
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
12 #end if
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
13 #for $a in $adapters
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
14 -a '${a.adapter_source.adapter}'
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
15 #end for
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
16 #for $aa in $anywhere_adapters
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
17 -b '${aa.anywhere_adapter_source.anywhere_adapter}'
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
18 #end for
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
19 -e $error_rate
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
20 -n $count
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
21 -O $overlap
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
22 #if str($min) != '0':
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
23 -m $min
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
24 #end if
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
25 #if str($max) != '0':
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
26 -M $max
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
27 #end if
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
28 --input='$input'
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
29 --output='$output'
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
30 > $report
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
31 </command>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
32 <inputs>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
33 <param format="fastqsanger, fasta" name="input" type="data" optional="false" label="Fastq file to trim" length="100"/>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
34
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
35 <repeat name="adapters" title="3' Adapters">
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
36 <conditional name="adapter_source">
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
37 <param name="adapter_source_list" type="select" label="Source" >
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
38 <option value="prebuilt" selected="true">Standard (select from the list below)</option>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
39 <option value="user">Enter custom sequence</option>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
40 </param>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
41
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
42 <when value="user">
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
43 <param name="adapter" size="30" label="Enter custom 3' adapter sequence" type="text" value="AATTGGCC" help="Sequence of an adapter that was ligated to the 3' end. The adapter itself and anything that follows is trimmed. If multiple adapters are specified, only the best matching adapter is trimmed."/>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
44 </when>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
45
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
46 <when value="prebuilt">
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
47 <param name="adapter" type="select" label="Choose 3' adapter" help="Sequence of an adapter that was ligated to the 3' end. The adapter itself and anything that follows is trimmed. If multiple adapters are specified, only the best matching adapter is trimmed.">
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
48 <options from_file="fastx_clipper_sequences.txt">
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
49 <column name="name" index="1"/>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
50 <column name="value" index="0"/>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
51 </options>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
52 </param>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
53 </when>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
54 </conditional>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
55 </repeat>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
56
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
57 <repeat name="anywhere_adapters" title="5' or 3' (Anywhere) Adapters" help="Sequence of an adapter that was ligated to the 5' or 3' end. If the adapter is found within the read or overlapping the 3' end of the read, the behavior is the same as for the -a option. If the adapter overlaps the 5' end (beginning of the read), the initial portion of the read matching the adapter is trimmed, but anything that follows is kept. If multiple -a or -b options are given, only the best matching adapter is trimmed.">
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
58 <conditional name="anywhere_adapter_source">
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
59 <param name="anywhere_adapter_source_list" type="select" label="Source">
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
60 <option value="prebuilt" selected="true">Standard (select from the list below)</option>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
61 <option value="user">Enter custom sequence</option>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
62 </param>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
63
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
64 <when value="user">
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
65 <param name="anywhere_adapter" size="30" label="Enter custom 5' or 3' adapter sequence" type="text" value="AATTGGCC" help="Sequence of an adapter that was ligated to the 5' or 3' end. If the adapter is found within the read or overlapping the 3' end of the read, the behavior is the same as for the -a option. If the adapter overlaps the 5' end (beginning of the read), the initial portion of the read matching the adapter is trimmed, but anything that follows is kept. If multiple -a or -b options are given, only the best matching adapter is trimmed."/>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
66 </when>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
67 <when value="prebuilt">
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
68 <param name="anywhere_adapter" type="select" label="Choose 5' or 3' adapter" help="Sequence of an adapter that was ligated to the 5' or 3' end. If the adapter is found within the read or overlapping the 3' end of the read, the behavior is the same as for the -a option. If the adapter overlaps the 5' end (beginning of the read), the initial portion of the read matching the adapter is trimmed, but anything that follows is kept. If multiple -a or -b options are given, only the best matching adapter is trimmed.">
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
69 <options from_file="fastx_clipper_sequences.txt">
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
70 <column name="name" index="1"/>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
71 <column name="value" index="0"/>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
72 </options>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
73 </param>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
74 </when>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
75 </conditional>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
76 </repeat>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
77
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
78 <param name="error_rate" type="float" min="0" max="1" value="0.1" label="Maximum error rate" help="Maximum allowed error rate (no. of errors divided by the length of the matching region)." />
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
79 <param name="count" type="integer" min="1" value="1" label="Match times" help="Try to remove adapters at most COUNT times. Useful when an adapter gets appended multiple times." />
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
80 <param name="overlap" type="integer" min="1" value="3" label="Minimum overlap length" help="Minimum overlap length. If the overlap between the adapter and the sequence is shorter than LENGTH, the read is not modified." />
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
81 <!--<param name="discard" type="boolean" checked="false" label="Discard Trimmed Reads" help="Discard reads that contain the adapter instead of trimming them. Use the 'Minimum overlap length' option in order to avoid throwing away too many randomly matching reads!" />-->
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
82 <param name="min" type="integer" min="0" optional="true" value="0" label="Minimum length" help="Discard trimmed reads that are shorter than LENGTH. Reads that are too short even before adapter removal are also discarded. In colorspace, an initial primer is not counted. Value of 0 means no minimum length." />
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
83 <param name="max" type="integer" min="0" optional="true" value="0" label="Maximum length" help="Discard trimmed reads that are longer than LENGTH. Reads that are too long even before adapter removal are also discarded. In colorspace, an initial primer is not counted. Value of 0 means no maximum length." />
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
84 </inputs>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
85 <outputs>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
86 <data format="txt" name="report" label="${tool.name} on ${on_string} (Report)" />
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
87 <data format="input" name="output" metadata_source="input"/>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
88 </outputs>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
89
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
90 <tests>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
91 <test>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
92 <param name="input" value="fa_gc_content_input.fa"/>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
93 <output name="out_file1" file="fa_gc_content_output.txt"/>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
94 </test>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
95 </tests>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
96
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
97 <help>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
98 This tool removes adapter sequences from DNA high-throughput
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
99 sequencing data. This is usually necessary when the read length of the
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
100 machine is longer than the molecule that is sequenced, such as in
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
101 microRNA data.
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
102
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
103 The tool is based on the opensource cutadapt_ tool.
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
104
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
105 -----
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
106
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
107 **Algorithm**
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
108
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
109 cutadapt uses a simple semi-global alignment algorithm, without any special optimizations.
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
110 For speed, the algorithm is implemented as a Python extension module in calignmodule.c.
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
111
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
112 The program is sufficiently fast for my purposes, but speedups should be simple to achieve.
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
113
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
114
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
115 **Partial adapter matches**
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
116
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
117 Cutadapt correctly deals with partial adapter matches. As an example, suppose
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
118 your adapter sequence is "ADAPTER" (specified via 3' Adapters parameter).
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
119 If you have these input sequences:
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
120
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
121 ::
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
122
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
123 MYSEQUENCEADAPTER
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
124 MYSEQUENCEADAP
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
125 MYSEQUENCEADAPTERSOMETHINGELSE
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
126
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
127 All of them will be trimmed to "MYSEQUENCE". If the sequence starts with an
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
128 adapter, like this:
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
129
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
130 ::
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
131
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
132 ADAPTERSOMETHING
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
133
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
134 It will be empty after trimming.
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
135
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
136 When the allowed error rate is sufficiently high, errors in
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
137 the adapter sequence are allowed. For example, ADABTER (1 mismatch), ADAPTR (1 deletion),
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
138 and ADAPPTER (1 insertion) will all be recognized if the error rate is set to 0.15.
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
139
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
140
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
141 **Allowing adapters anywhere**
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
142
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
143 Cutadapt assumes that any adapter specified via the *3` Adapters* parameter
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
144 was ligated to the 3' end of the sequence. This is the correct assumption for
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
145 at least the SOLiD and Illumina small RNA protocols and probably others.
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
146
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
147 If, on the other hand, your adapter can also be ligated to the 5' end (on
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
148 purpose or by accident), you should tell cutadapt so by using the *5' or 3' (Anywhere)
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
149 Adapters parameter. It will then use a different alignment algorithm and
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
150 correctly trim adapters that appear in the beginning of a read. An adapter
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
151 specified this way will also be found if it appears only partially in the
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
152 beginning of a read. For example, these sequences
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
153
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
154 ::
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
155
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
156 ADAPTERMYSEQUENCE
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
157 PTERMYSEQUENCE
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
158
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
159 will be trimmed to "MYSEQUENCE". Note that the regular algorithm would trim
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
160 the first read to an empty sequence.
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
161
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
162 This parameter currently does not work with color space data.
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
163
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
164
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
165 .. _cutadapt: http://code.google.com/p/cutadapt/
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
166 </help>
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
167
8b064ea16722 Initial version with multiple adapter support
Lance Parsons <lparsons@princeton.edu>
parents:
diff changeset
168 </tool>