Repository 'cutadapt'
hg clone https://toolshed.g2.bx.psu.edu/repos/lparsons/cutadapt

Changeset 39:fe74900d6dc7 (2024-05-17)
Previous changeset 38:b1c926deaa2d (2024-04-15) Next changeset 40:aa784cb3810d (2024-07-08)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/cutadapt commit a5b6cb44f81abe57a4269bded1fa4d41f462f9d5
modified:
cutadapt.xml
macros.xml
added:
test-data/cutadapt_out1_pair_adapters.fq.gz
test-data/cutadapt_out2_pair_adapters.fq.gz
test-data/cutadapt_rest_json.txt
removed:
test-data/cutadapt_rest.json
b
diff -r b1c926deaa2d -r fe74900d6dc7 cutadapt.xml
--- a/cutadapt.xml Mon Apr 15 06:31:32 2024 +0000
+++ b/cutadapt.xml Fri May 17 13:32:03 2024 +0000
[
b'@@ -7,17 +7,18 @@\n     <expand macro=\'xrefs\'/>\n     <expand macro=\'requirements\' />\n     <version_command>cutadapt --version</version_command>\n-\n     <command detect_errors="exit_code"><![CDATA[\n-## Link in the input and output files, so Cutadapt can tell their type\n-\n #import re\n-#set read1 = "input_f"\n-#set read2 = "input_r"\n-#set paired = False\n+\n+## set things up for handling inputs and outputs in single- vs paired-end modes\n #set library_type = str($library.type)\n+#if $library_type == \'single\':\n+    #set paired = False\n+#else:\n+    #set paired = True\n+#end if\n+\n #if $library_type == \'paired\':\n-    #set paired = True\n     #set input_1 = $library.input_1\n     #set input_2 = $library.input_2\n     ## Avoid the paired read input files sharing the same name, else the program still runs but \n@@ -28,8 +29,7 @@\n         #set read1 = read1 + "_1"\n         #set read2 = read2 + "_2"\n     #end if\n-#else if $library_type == \'paired_collection\'\n-    #set paired = True\n+#elif $library_type == \'paired_collection\'\n     #set input_1 = $library.input_1.forward\n     #set input_2 = $library.input_1.reverse\n     #set read1 = re.sub(\'[^\\w\\-\\s]\', \'_\', str($library.input_1.name)) + "_1"\n@@ -50,7 +50,6 @@\n     #set ext=ext+".bz2"\n #end if\n \n-\n #set read1 = $read1 + $ext\n #set out1 = "out1" + $ext\n #set rest_output = "rest_output" + $ext\n@@ -58,7 +57,6 @@\n #set too_short_output = "too_short_output" + $ext\n #set too_long_output = "too_long_output" + $ext\n #set untrimmed_output = "untrimmed_output" + $ext\n-ln -f -s \'${input_1}\' \'$read1\' &&\n \n #if $paired:\n     #if $input_2.is_of_type("fastq", "fastq.gz", "fastq.bz2"):\n@@ -76,35 +74,52 @@\n     #set too_short_paired_output = "too_short_paired_output" + $ext2\n     #set too_long_paired_output = "too_long_paired_output" + $ext2\n     #set untrimmed_paired_output = "untrimmed_paired_output" + $ext2\n-    ln -f -s \'${input_2}\' \'$read2\' &&\n #end if\n \n-## Run Cutadapt\n-\n+## Link in the input and output files, so Cutadapt can tell their type\n+ln -f -s \'$input_1\' \'$read1\' &&\n+#if $paired:\n+    ln -f -s \'$input_2\' \'$read2\' &&\n+#end if\n+## Create dedicated output folder if needed\n #if \'multiple_output\' in $output_selector:\n     mkdir split &&\n #end if\n \n+## Run Cutadapt\n cutadapt\n \n -j=\\${GALAXY_SLOTS:-4}\n \n-#if \'json_stats\' in $output_selector:\n-    --json stats.json\n-#end if\n+## Read1 trimming\n+#set ADAPTER_ARGUMENT="-a"\n+#for $a in $library.r1.adapters\n+    @adapter_cli@\n+#end for\n+#set ADAPTER_ARGUMENT="-b"\n+#for $a in $library.r1.anywhere_adapters\n+    @adapter_cli@\n+#end for\n+#set ADAPTER_ARGUMENT="-g"\n+#for $a in $library.r1.front_adapters\n+    @adapter_cli@\n+#end for\n \n-#if str( $library.type ) == "single":\n-    @read1_options@\n-    #if \'multiple_output\' in $output_selector:\n-        --output=\'split/{name}.${input_1.ext}\'\n-    #else:\n-        --output=\'$out1\'\n-    #end if\n-#else:\n-    @read1_options@\n-    @read2_options@\n-    --output=\'$out1\'\n-    --paired-output=\'$out2\'\n+#if $paired:\n+    ## Read2 trimming\n+    #set ADAPTER_ARGUMENT="-A"\n+    #for $a in $library.r2.adapters2\n+        @adapter_cli@\n+    #end for\n+    #set ADAPTER_ARGUMENT="-B"\n+    #for $a in $library.r2.anywhere_adapters2\n+        @adapter_cli@\n+    #end for\n+    #set ADAPTER_ARGUMENT="-G"\n+    #for $a in $library.r2.front_adapters2\n+        @adapter_cli@\n+    #end for\n+    $library.pair_adapters\n #end if\n \n --error-rate=$adapter_options.error_rate\n@@ -116,53 +131,64 @@\n --action=$adapter_options.action\n $adapter_options.revcomp\n \n+#if $other_trimming_options.cut != 0:\n+    --cut=$other_trimming_options.cut\n+#end if\n+#if $paired and $other_trimming_options.cut2 != 0:\n+    -U $other_trimming_options.cut2\n+#end if\n+#if str($other_trimming_options.quality_cutoff) != \'0\':\n+    --quality-cutoff=$other_trimming_options.quality_cutoff\n+#end if\n+#if $paired and str($other_trimming_options.quality_cutoff2) != \'\':\n+    -Q $other_trimming_options.quality_cutoff2\n+#end if\n+#if str($other_trimming_options.nextseq_trim) != \'0\':\n+    --nextseq-trim=$othe'..b'o on. The adapters are then always removed in pairs from a read pair.\n+\n+For example, if you specify the following two 3\'-end adapters for the R1 reads:\n+\n+- ``AAAAA``\n+- ``GGGGG``\n+\n+and these two 3\'-end adapters for the R2 reads:\n+\n+- ``CCCC``\n+- ``TTTT``\n \n------------\n+then, with this option enabled, the tool will trim a pair of reads only if:\n+\n+- either ``AAAAA`` is found in R1 and ``CCCCC`` is found in R2,\n+- or ``GGGG`` is found in R1 and ``TTTT`` is found in R2.\n+\n+Two limitations exist in this mode:\n+\n+1. You need to provide equal numbers of R1 and R2 adapters of each type to allow pair formation, or  the tool run will fail.\n+2. The algorithm identifies the best-matching R1 adapter first and then checks whether it can find its corresponding R2 adapter. If not, the read pair remains unchanged, even though it is, in theory, possible that a different R1 adapter that does not fit as well would have had a corresponding R2 adapter present, i.e., some legitimate adapter pairs might remain unhandled.\n+\n+This mode is useful, for example, for `demultiplexing Illumina unique dual indices (UDIs)`_.\n+\n+-----\n+\n+Outputs\n+*******\n \n - Trimmed reads\n \n@@ -1429,10 +1584,13 @@\n     * Report\n     * Info file\n \n+-----------\n \n **Report**\n \n-Cutadapt can output per-adapter statistics if you select to output the report above.\n+-----------\n+\n+Cutadapt can output per-adapter statistics if you select to generate the report above.\n \n Example:\n \n@@ -1461,9 +1619,12 @@\n         Read 1:        24,090 bp\n         Read 2:        24,081 bp\n \n+-----------\n \n **Info file**\n \n+-----------\n+\n The info file contains information about the found adapters. The output is a tab-separated text file. Each line corresponds to one read of the input file.\n \n Columns contain the following data:\n@@ -1493,13 +1654,10 @@\n \n If the --times option is used and greater than 1, each read can appear more than once in the info file. There will be one line for each found adapter, all with identical read names. Only for the first of those lines will the concatenation of columns 5-7 be identical to the original read sequence (and accordingly for columns 9-11). For subsequent lines, the shown sequence are the ones that were used in subsequent rounds of adapter trimming, that is, they get successively shorter.\n \n-\n --------------------\n \n-**Rename Reads**\n-\n---------------------\n-\n+Renaming Reads\n+**************\n \n The --rename option expects a template string such as {id} extra_info {adapter_name} as a parameter. It can contain regular text and placeholders that consist of a name enclosed in curly braces ({placeholdername}).\n \n@@ -1523,27 +1681,17 @@\n In addition, it is possible to write a placeholder as {r1.placeholdername} or {r2.placeholdername}, which always takes the replacement value from R1 or R2, respectively.\n The {r1.placeholder} and {r2.placeholder} notation is available for all placeholders except {rn} and {id} because the read ID needs to be identical for both reads.\n \n---------------------\n-\n-**More Information**\n-\n---------------------\n-\n-See the excellent `Cutadapt documentation`_\n-\n-.. _Cutadapt: https://cutadapt.readthedocs.io/en/stable/\n-.. _`Cutadapt documentation`: https://cutadapt.readthedocs.io/en/latest/index.html\n-.. _`Illumina TruSeq Adapters De-Mystified`: http://tucf-genomics.tufts.edu/documents/protocols/TUCF_Understanding_Illumina_TruSeq_Adapters.pdf\n-\n-\n---------------------\n+-----\n \n **Galaxy Wrapper Development**\n \n---------------------\n+Original author: Lance Parsons <lparsons@princeton.edu>\n+\n+-----\n \n-Author: Lance Parsons <lparsons@princeton.edu>\n-\n+.. _`Cutadapt documentation`: https://cutadapt.readthedocs.io\n+.. _`Illumina TruSeq Adapters De-Mystified`: http://tucf-genomics.tufts.edu/documents/protocols/TUCF_Understanding_Illumina_TruSeq_Adapters.pdf\n+.. _`demultiplexing Illumina unique dual indices (UDIs)`: https://cutadapt.readthedocs.io/en/stable/guide.html#unique-dual-indices\n     ]]></help>\n     <expand macro="citations" />\n </tool>\n'
b
diff -r b1c926deaa2d -r fe74900d6dc7 macros.xml
--- a/macros.xml Mon Apr 15 06:31:32 2024 +0000
+++ b/macros.xml Fri May 17 13:32:03 2024 +0000
[
b'@@ -1,9 +1,9 @@\n <macros>\n     <token name="@TOOL_VERSION@">4.8</token>\n-    <token name="@VERSION_SUFFIX@">0</token>\n+    <token name="@VERSION_SUFFIX@">1</token>\n     <token name="@FASTQ_TYPES@">fastq.bz2,fastq.gz,fastq,fasta.bz2,fasta.gz,fasta</token>\n     <xml name="edam_ontology">\n-        <edam_topics>                                                                                  \n+        <edam_topics>\n             <edam_topic>topic_0632</edam_topic>\n         </edam_topics>\n         <edam_operations>\n@@ -25,237 +25,127 @@\n             <xref type=\'bio.tools\'>cutadapt</xref>\n         </xrefs>\n     </xml>\n-\n     <!-- parametrized token - you need to set `$ADAPTER_ARGUMENT` -->\n     <token name="@adapter_cli@">\n         #if $a.adapter_source.adapter_source_list == \'builtin\':\n             $ADAPTER_ARGUMENT \'${a.adapter_source.adapter.fields.name}\'=\'${a.adapter_source.adapter}${a.single_noindels}\'\n-        #else if $a.adapter_source.adapter_source_list == \'file\':\n+        #elif $a.adapter_source.adapter_source_list == \'file\':\n             $ADAPTER_ARGUMENT file:\'${a.adapter_source.adapter_file}${a.single_noindels}\'\n-        #else if str($a.adapter_source.adapter_name) != "":\n+        #elif str($a.adapter_source.adapter_name) != "":\n             $ADAPTER_ARGUMENT \'${a.adapter_source.adapter_name}\'=\'${a.adapter_source.adapter}${a.single_noindels}\'\n-        #else\n+        #else:\n             $ADAPTER_ARGUMENT \'${a.adapter_source.adapter}${a.single_noindels}\'\n-         #end if\n-    </token>\n-\n-     <token name="@read1_options@"><![CDATA[\n-\n-        ## Read1 trimming\n-\n-        #set ADAPTER_ARGUMENT="-a"\n-        #for $a in $library.r1.adapters\n-            @adapter_cli@\n-        #end for\n-        \n-        #set ADAPTER_ARGUMENT="-b"\n-        #for $a in $library.r1.anywhere_adapters\n-            @adapter_cli@\n-        #end for\n-        \n-        #set ADAPTER_ARGUMENT="-g"\n-        #for $a in $library.r1.front_adapters\n-            @adapter_cli@\n-        #end for\n-\n-        #if str($cut) != \'0\':\n-            -u $cut\n-        #end if\n-\n-        ## Additional Outputs\n-\n-        #if \'info_file\' in $output_selector:\n-            --info-file=$info_file\n-        #end if\n-        #if \'rest_file\' in $output_selector:\n-            --rest-file=\'${rest_output}\'\n-        #end if\n-        #if \'wildcard_file\' in $output_selector:\n-            --wildcard-file=\'${wild_output}\'\n-        #end if\n-        #if \'too_short_file\' in $output_selector:\n-            --too-short-output=\'${too_short_output}\'\n-        #end if\n-        #if \'too_long_file\' in $output_selector:\n-            --too-long-output=\'${too_long_output}\'\n-        #end if\n-        #if \'untrimmed_file\' in $output_selector:\n-            --untrimmed-output=\'${untrimmed_output}\'\n-        #end if\n-\n-    ]]></token>\n-\n-    <token name="@read2_options@"><![CDATA[\n-\n-        ## Read2 trimming\n-\n-        #set ADAPTER_ARGUMENT="-A"\n-        #for $a in $library.r2.adapters2\n-            @adapter_cli@\n-        #end for\n-        #set ADAPTER_ARGUMENT="-B"\n-        #for $a in $library.r2.anywhere_adapters2\n-            @adapter_cli@\n-        #end for\n-        #set ADAPTER_ARGUMENT="-G"\n-        #for $a in $library.r2.front_adapters2\n-            @adapter_cli@\n-        #end for\n-\n-        #if str($library.r2.cut2) != \'0\':\n-            -U $library.r2.cut2\n         #end if\n-\n-        ## Additional Outputs - Read 2\n-\n-        #if \'too_short_file\' in $output_selector:\n-            --too-short-paired-output=\'${too_short_paired_output}\'\n-        #end if\n-        #if \'too_long_file\' in $output_selector:\n-            --too-long-paired-output=\'${too_long_paired_output}\'\n-        #end if\n-        #if \'untrimmed_file\' in $output_selector:\n-            --untrimmed-paired-output=\'${untrimmed_paired_output}\'\n-        #end if\n-\n-    ]]></token>\n-\n-        <xml name="adapter_sanitizer">\n-            <sanitizer>\n-                <valid initial="string.digits">\n-                    <add value="A"/><!--standard nucleoti'..b'f a \'^\' character is prepended (\'anchoring\'), the adapter is only found if it is a prefix of the read. To search for a linked adapter, separate the 2 sequences with 3 dots (ADAPTER1...ADAPTER2), see Help below.">\n-                    <expand macro="adapter_conditional" argument="-G" adapter_type="5\'"/>\n-                </repeat>\n-                <repeat name="anywhere_adapters2" title="5\' or 3\' (Anywhere) Adapters" help="Sequence of an adapter that may be ligated to the 5\' or 3\' end of the second read in each pair. Both types of matches as described under under 3\' und 5\' Adapters are allowed. If the first base of the read is part of the match, the behavior is as with 5\' Adapters, otherwise as with 3\' Adapters. This option is mostly for rescuing failed library preparations - do not use if you know which end your adapter was ligated to!">\n-                    <expand macro="adapter_conditional" argument="-B" adapter_type="5\' or 3\'"/>\n-                </repeat>\n-\n-                <!-- read modification -->\n-                <param name="cut2" argument="-U" type="integer" value="0" optional="true" label="Cut bases from the second read in each pair." help="Remove bases from the beginning or end of each read before trimming adapters. If positive, the bases are removed from the beginning of each read. If negative, the bases are removed from the end of each read." />\n-                <param name="quality_cutoff2" argument="-Q" type="text" optional="true" label="Optional separate quality cutoff for Read 2" help="Trim low-quality bases from 5\' and/or 3\' ends of each read before adapter removal. If one value is given, only the 3\' end is trimmed. If two comma-separated cutoffs are given, the 5\' end is trimmed with the first cutoff, the 3\' end with the second. Leave blank to use the same value as for Read 1">\n-                    <sanitizer>\n-                        <valid initial="string.digits"><add value="," /></valid>\n-                    </sanitizer>\n-                    <validator type="regex">[0-9]+(,[0-9])?</validator>\n-                </param>\n-                <!-- read filtering-->\n-                <param name="minimum_length2" type="integer" min="0" value="" optional="true" label="Minimum length (R2)" />\n-                <param name="maximum_length2" type="integer" min="0" value="" optional="true" label="Maximum length (R2)" />\n-            </section>\n-        </xml>\n-\n-        <xml name="inherit_format_1">\n-            <actions>\n-                <conditional name="library.type">\n-                    <when value="single">\n-                        <action type="format">\n-                            <option type="from_param" name="library.input_1" param_attribute="ext" />\n-                        </action>\n-                    </when>\n-                    <when value="paired">\n-                        <action type="format">\n-                            <option type="from_param" name="library.input_1" param_attribute="ext" />\n-                        </action>\n-                    </when>\n-                </conditional>\n-            </actions>\n-        </xml>\n-\n-        <xml name="inherit_format_2">\n-            <actions>\n-                <conditional name="library.type">\n-                    <when value="paired">\n-                        <action type="format">\n-                            <option type="from_param" name="library.input_2" param_attribute="ext" />\n-                        </action>\n-                    </when>\n-                </conditional>\n-            </actions>\n-        </xml>\n-\n+        </actions>\n+    </xml>\n+    <xml name="inherit_format_2">\n+        <actions>\n+            <conditional name="library.type">\n+                <when value="paired">\n+                    <action type="format">\n+                        <option type="from_param" name="library.input_2" param_attribute="ext" />\n+                    </action>\n+                </when>\n+            </conditional>\n+        </actions>\n+    </xml>\n </macros>\n'
b
diff -r b1c926deaa2d -r fe74900d6dc7 test-data/cutadapt_out1_pair_adapters.fq.gz
b
Binary file test-data/cutadapt_out1_pair_adapters.fq.gz has changed
b
diff -r b1c926deaa2d -r fe74900d6dc7 test-data/cutadapt_out2_pair_adapters.fq.gz
b
Binary file test-data/cutadapt_out2_pair_adapters.fq.gz has changed
b
diff -r b1c926deaa2d -r fe74900d6dc7 test-data/cutadapt_rest.json
--- a/test-data/cutadapt_rest.json Mon Apr 15 06:31:32 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,90 +0,0 @@
-{
-  "tag": "Cutadapt report",
-  "schema_version": [0, 1],
-  "cutadapt_version": "3.5",
-  "python_version": "3.9.7",
-  "command_line_arguments": [
-    "-j=1",
-    "--json",
-    "stats.json",
-    "-a",
-    "AAAGATG",
-    "--rest-file=rest_output.fa",
-    "--output=out1.fa",
-    "--error-rate=0.1",
-    "--times=1",
-    "--overlap=3",
-    "--action=trim",
-    "cutadapt_rest_fa.fa"
-  ],
-  "cores": 1,
-  "input": {
-    "path1": "cutadapt_rest_fa.fa",
-    "path2": null,
-    "paired": false
-  },
-  "read_counts": {
-    "input": 5,
-    "filtered": {
-      "too_short": null,
-      "too_long": null,
-      "too_many_n": null,
-      "too_many_expected_errors": null,
-      "casava_filtered": null,
-      "discard_trimmed": null,
-      "discard_untrimmed": null
-    },
-    "output": 5,
-    "reverse_complemented": null,
-    "read1_with_adapter": 5,
-    "read2_with_adapter": null
-  },
-  "basepair_counts": {
-    "input": 97,
-    "input_read1": 97,
-    "input_read2": null,
-    "quality_trimmed": null,
-    "quality_trimmed_read1": null,
-    "quality_trimmed_read2": null,
-    "poly_a_trimmed": null,
-    "poly_a_trimmed_read1": null,
-    "poly_a_trimmed_read2": null,
-    "output": 35,
-    "output_read1": 35,
-    "output_read2": null
-  },
-  "adapters_read1": [
-    {
-      "name": "1",
-      "total_matches": 5,
-      "on_reverse_complement": null,
-      "linked": false,
-      "five_prime_end": null,
-      "three_prime_end": {
-        "type": "regular_three_prime",
-        "sequence": "AAAGATG",
-        "error_rate": 0.1,
-        "indels": true,
-        "error_lengths": [7],
-        "matches": 5,
-        "adjacent_bases": {
-          "A": 0,
-          "C": 0,
-          "G": 5,
-          "T": 0,
-          "": 0
-        },
-        "dominant_adjacent_base": null,
-        "trimmed_lengths": [
-          {"len": 7, "expect": 0.0, "counts": [1]},
-          {"len": 12, "expect": 0.0, "counts": [1]},
-          {"len": 14, "expect": 0.0, "counts": [2]},
-          {"len": 15, "expect": 0.0, "counts": [1]}
-        ]
-      }
-    }
-  ],
-  "adapters_read2": null,
-  "poly_a_trimmed_read1": null,
-  "poly_a_trimmed_read2": null
-}
b
diff -r b1c926deaa2d -r fe74900d6dc7 test-data/cutadapt_rest_json.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cutadapt_rest_json.txt Fri May 17 13:32:03 2024 +0000
[
@@ -0,0 +1,69 @@
+  "input": {
+    "path1": "cutadapt_rest_fa.fa",
+    "path2": null,
+    "paired": false
+  },
+  "read_counts": {
+    "input": 5,
+    "filtered": {
+      "too_short": 0,
+      "too_long": null,
+      "too_many_n": null,
+      "too_many_expected_errors": null,
+      "casava_filtered": null,
+      "discard_trimmed": null,
+      "discard_untrimmed": null
+    },
+    "output": 5,
+    "reverse_complemented": null,
+    "read1_with_adapter": 5,
+    "read2_with_adapter": null
+  },
+  "basepair_counts": {
+    "input": 97,
+    "input_read1": 97,
+    "input_read2": null,
+    "quality_trimmed": null,
+    "quality_trimmed_read1": null,
+    "quality_trimmed_read2": null,
+    "poly_a_trimmed": null,
+    "poly_a_trimmed_read1": null,
+    "poly_a_trimmed_read2": null,
+    "output": 35,
+    "output_read1": 35,
+    "output_read2": null
+  },
+  "adapters_read1": [
+    {
+      "name": "1",
+      "total_matches": 5,
+      "on_reverse_complement": null,
+      "linked": false,
+      "five_prime_end": null,
+      "three_prime_end": {
+        "type": "regular_three_prime",
+        "sequence": "AAAGATG",
+        "error_rate": 0.1,
+        "indels": true,
+        "error_lengths": [7],
+        "matches": 5,
+        "adjacent_bases": {
+          "A": 0,
+          "C": 0,
+          "G": 5,
+          "T": 0,
+          "": 0
+        },
+        "dominant_adjacent_base": null,
+        "trimmed_lengths": [
+          {"len": 7, "expect": 0.0, "counts": [1]},
+          {"len": 12, "expect": 0.0, "counts": [1]},
+          {"len": 14, "expect": 0.0, "counts": [2]},
+          {"len": 15, "expect": 0.0, "counts": [1]}
+        ]
+      }
+    }
+  ],
+  "adapters_read2": null,
+  "poly_a_trimmed_read1": null,
+  "poly_a_trimmed_read2": null