Repository 'read_it_and_keep'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/read_it_and_keep

Changeset 0:554aa2a63f04 (2022-01-28)
Next changeset 1:1563b58905f4 (2022-03-17)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/read-it-and-keep commit 4b41e2742ba5f9c957e13a188ca49e60e16ae13b"
added:
all_fasta.loc.sample
read-it-and-keep.xml
test-data/output_test1.fastq.gz
test-data/output_test2.fastq.gz
test-data/output_test3.fastq.gz
test-data/reference.fasta
test-data/test1.fastq.gz
test-data/test2.fastq.gz
test-data/test3.fastq.gz
tool_data_table_conf.xml.sample
trim_reference.py
b
diff -r 000000000000 -r 554aa2a63f04 all_fasta.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/all_fasta.loc.sample Fri Jan 28 18:47:34 2022 +0000
b
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id> <dbkey> <display_name> <file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa
+#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa
+#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
b
diff -r 000000000000 -r 554aa2a63f04 read-it-and-keep.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/read-it-and-keep.xml Fri Jan 28 18:47:34 2022 +0000
[
b'@@ -0,0 +1,162 @@\n+<tool id="read_it_and_keep" name="Read It and Keep" version="@TOOL_VERSION@+galaxy0" profile="20.09">\n+    <macros>\n+        <token name="@FASTQ_FORMATS@">fastq,fastq.gz,fastqsanger,fastqsanger.gz</token>\n+        <token name="@TOOL_VERSION@">0.1.0</token>\n+    </macros>\n+    <requirements>\n+        <requirement type="package" version="@TOOL_VERSION@">read-it-and-keep</requirement>\n+        <requirement type="package">python</requirement>\n+    </requirements>\n+    <command detect_errors="exit_code"><![CDATA[\n+        #if $ref_source.source == "history"\n+            ln -s \'$ref_source.ref_fasta\' ref${trim_reference}.fasta &&\n+        #elif $ref_source.source == "builtin"\n+            ln -s \'$ref_source.ref_fasta_builtin.path\' ref${trim_reference}.fasta &&\n+        #end if\n+        #if $trim_reference\n+            python \'$__tool_directory__/trim_reference.py\' ref${trim_reference}.fasta ref.fasta &&\n+        #end if\n+        #if $reads.read_type == "paired"\n+            ln -s \'$reads.read1\' read1.fastq &&\n+            ln -s \'$reads.read2\' read2.fastq &&\n+        #elif $reads.read_type == \'paired_collection\'\n+            ln -s \'$reads.paired_reads.forward\' read1.fastq &&\n+            ln -s \'$reads.paired_reads.reverse\' read2.fastq &&\n+        #elif $reads.read_type == \'single\'\n+            ln -s \'$reads.single_read1\' read1.fastq &&\n+        #end if\n+        readItAndKeep\n+            --tech $sequencing_tech\n+            --ref_fasta ref.fasta\n+            --min_map_length $adv.min_map_length\n+            --min_map_length_pc $adv.min_map_length_pc\n+            --reads1 read1.fastq\n+            #if $reads.read_type != "single"\n+                --reads2 read2.fastq\n+            #end if\n+            -o output\n+            #if $reads.read_type == \'single\'\n+                && mv output.reads.fastq.gz output.reads_1.fastq.gz\n+            #end if\n+    ]]></command>\n+    <inputs>\n+        <conditional name="reads">\n+            <param type="select" label="Read type" name="read_type">\n+                <option value="paired" selected="true">Paired end</option>\n+                <option value="paired_collection">Paired collection</option>\n+                <option value="single">Single ended</option>\n+            </param>\n+            <when value="paired">\n+                <param type="data" format="@FASTQ_FORMATS@" name="read1" label="Read1" />\n+                <param type="data" format="@FASTQ_FORMATS@" name="read2" label="Read2" />\n+            </when>\n+            <when value="paired_collection">\n+                <param type="data_collection" collection_type="paired" format="@FASTQ_FORMATS@" name="paired_reads" label="Reads" />\n+            </when>\n+            <when value="single">\n+                <param type="data" format="@FASTQ_FORMATS@" name="single_read1" label="Read1" />\n+            </when>\n+        </conditional>\n+        <conditional name="ref_source">\n+            <param type="select" label="Reference genome source" name="source">\n+                <option value="history" selected="true">History</option>\n+                <option value="builtin">Built-in</option>\n+            </param>\n+            <when value="history">\n+                <param type="data" format="fasta" name="ref_fasta" label="Reference genome" />\n+            </when>\n+            <when value="builtin">\n+                <param type="select" name="ref_fasta_builtin" label="Reference genome">\n+                    <options from_data_table="all_fasta" />\n+                </param>\n+            </when>\n+        </conditional>\n+        <param type="boolean" name="trim_reference" label="Trim trailing As from the reference sequence" checked="true" truevalue="_untrimmed" falsevalue="" help="Remove all As at the end of the reference sequence to ensure that the reference has no poly-A tail (see Note in the general help section below)" />\n+        <param type="select" argument="--tech" name="sequencing_tech" label="Sequencing technology">\n+            <opti'..b' or reads["read_type"] == "paired"</filter>\n+        </data>\n+        <data name="output_reads2" format="fastqsanger.gz" label="Filtered reads ${on_string} - reads2" from_work_dir="output.reads_2.fastq.gz">\n+            <filter>reads["read_type"] == "paired"</filter>\n+        </data>\n+        <collection type="paired" format="fastqsanger.gz" name="output_collection" label="Filtered reads ${on_string}">\n+            <filter>reads["read_type"] == "paired_collection"</filter>\n+            <data name="forward" format="fastqsanger.gz" from_work_dir="output.reads_1.fastq.gz" />\n+            <data name="reverse" format="fastqsanger.gz" from_work_dir="output.reads_2.fastq.gz" />\n+        </collection>\n+    </outputs>\n+    <tests>\n+        <test expect_num_outputs="2">\n+            <conditional name="reads">\n+                <param name="read_type" value="paired" />\n+                <param name="read1" value="test1.fastq.gz" ftype="fastqsanger.gz" />\n+                <param name="read2" value="test2.fastq.gz" ftype="fastqsanger.gz" />\n+            </conditional>\n+            <conditional name="ref_source">\n+                <param name="source" value="history" />\n+                <param name="ref_fasta" value="reference.fasta" ftype="fasta" />\n+            </conditional>\n+            <param name="sequencing_tech" value="illumina" />\n+            <output name="output_reads1" value="output_test1.fastq.gz" />\n+            <output name="output_reads2" value="output_test2.fastq.gz" />\n+        </test>\n+        <test expect_num_outputs="3">\n+            <conditional name="reads">\n+                <param name="read_type" value="paired_collection" />\n+                <param name="paired_reads">\n+                    <collection type="paired">\n+                        <element name="forward" value="test1.fastq.gz" ftype="fastqsanger.gz" />\n+                        <element name="reverse" value="test2.fastq.gz" ftype="fastqsanger.gz" />    \n+                    </collection>\n+                </param>\n+            </conditional>\n+            <conditional name="ref_source">\n+                <param name="source" value="history" />\n+                <param name="ref_fasta" value="reference.fasta" ftype="fasta" />\n+            </conditional>\n+            <param name="sequencing_tech" value="illumina" />\n+            <output_collection name="output_collection">\n+                <element name="forward" value="output_test1.fastq.gz" />\n+                <element name="reverse" value="output_test2.fastq.gz" />\n+            </output_collection>\n+        </test>\n+        <test expect_num_outputs="1">\n+            <conditional name="reads">\n+                <param name="read_type" value="single" />\n+                <param name="single_read1" value="test3.fastq.gz" ftype="fastqsanger.gz"/>\n+            </conditional>\n+            <conditional name="ref_source">\n+                <param name="source" value="history" />\n+                <param name="ref_fasta" value="reference.fasta" ftype="fasta" />\n+            </conditional>\n+            <param name="sequencing_tech" value="ont" />\n+            <output name="output_reads1" value="output_test3.fastq.gz" />\n+        </test>\n+    </tests>\n+    <help><![CDATA[\n+ReadItAndKeep\n+-------------\n+\n+ReadItAndKeep is a tool for filtering viral sequence data to remove host reads, developed for cleaning\n+SARS-CoV-2 sequencing data. It maps reads against the SARS-CoV-2 viral genome (with the poly-A tail removed)\n+and only keeps those that map well.\n+\n+**Note**: If the reference genome supplied contains a poly-A tail, reads that contain part of a poly-A tail will map\n+to the refence, no matter what species they originate from. If you are not sure if the reference you are using has had\n+trailing A\'s trimmed, enable the `Trim trailing As` option.\n+\n+Input can be either Illumina or Oxford Nanopore reads.\n+    ]]></help>\n+    <citations>\n+        <citation type="doi">10.1101/2022.01.21.477194</citation>\n+    </citations>\n+</tool>\n'
b
diff -r 000000000000 -r 554aa2a63f04 test-data/output_test1.fastq.gz
b
Binary file test-data/output_test1.fastq.gz has changed
b
diff -r 000000000000 -r 554aa2a63f04 test-data/output_test2.fastq.gz
b
Binary file test-data/output_test2.fastq.gz has changed
b
diff -r 000000000000 -r 554aa2a63f04 test-data/output_test3.fastq.gz
b
Binary file test-data/output_test3.fastq.gz has changed
b
diff -r 000000000000 -r 554aa2a63f04 test-data/reference.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reference.fasta Fri Jan 28 18:47:34 2022 +0000
b
b'@@ -0,0 +1,430 @@\n+>MN908947.3 Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1, complete genome\n+ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAA\n+CGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAAC\n+TAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTG\n+TTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTC\n+CCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTAC\n+GTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGG\n+CTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGAT\n+GCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCAGTACGGTC\n+GTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATACCAGTGGCTTACCGCAAGGTTCT\n+TCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTA\n+GGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTG\n+TTACCCGTGAACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGG\n+CCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTCATGCACTTTG\n+TCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCGTGAACATGAGCATGAAATTG\n+CTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAA\n+ATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAA\n+CCAAGGGTTGAAAAGAAAAAGCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCAC\n+CAAATGAATGCAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCA\n+GACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGAAGGTGCCACT\n+ACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAG\n+GACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCATTCTTCGTAAGGGTGGTCG\n+CACTATTGCCTTTGGAGGCTGTGTGTTCTCTTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCA\n+CGTGCTAGCGCTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACA\n+ACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGA\n+GATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGAT\n+TATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAAAAGGAAAAGCTAAAAAAG\n+GTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCG\n+TGTTGTACGATCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGGCC\n+GCTATAACAATACTAGATGGAATTTCACAGTATTCACTGAGACTCATTGATGCTATGATGTTCACATCTG\n+ATTTGGCTACTAACAATCTAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTG\n+GCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTGAAGAGAAGTTT\n+AAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAA\n+TTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCTTTAAGCTTGT\n+AAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATTGGTGGAGCTAAACTTAAAGCCTTGAATTTA\n+GGTGAAACATTTGTCACGCACTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCC\n+TACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTT\n+AACAGAGGAAGTTGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAA\n+GCTCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACACAGAAAAGT\n+ACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCTTCACACTCAAAGGCGGTGCACCAACAAA\n+GGTTACTTTTGGTGATGACACTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTT\n+GATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAA\n+ATGAGTTCGCCTGTGTTGTGGCAGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACC\n+ACTGGGCATTGATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGGTGAGTTTAAA\n+TTGGCTTCACATATGTATTGTTCTTTCTACCCTCCAGATGAGGATGAAGAAGAAGGTGATTGTGAAGAAG\n+AAGAGTTTGAGCCATCAACTCAATATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGAATT\n+TGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATTGGTTAGATGATGATAGTCAA\n+CAAACTGTTGGTCAACAAGACGGCAGTGAGGACAATCAGACAACTACTATTCAAACAATTGTTGAGGTTC\n+AACCTCAATTAGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTAGTGGTTATTT\n+AAAACTTACTGACAATGTATACATTAAAAATGCAGACATTGTGGAAGAAGCTAAAAAGGTAAAACCAACA\n+GTGGTTGTTAATGCAGCCAATGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGGCTA\n+CTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGACCACTTAAAGTGGGTGGTAG\n+TTGTGTTTTAAGCGGACACAATCTTGCTAAACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGT\n+GAAGACATTCAACTTCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTTGCACCATTAT\n+TATCAGCTGGTATTTTTGGTGCTGACCCTATACATTCTTTAAGAGTTTGTGTAGATACTGTTCGCAC'..b'GACTATTACCAGCTGTACTCA\n+ACTCAATTGAGTACAGACACTGGTGTTGAACATGTTACCTTCTTCATCTACAATAAAATTGTTGATGAGC\n+CTGAAGAACATGTCCAAATTCACACAATCGACGGTTCATCCGGAGTTGTTAATCCAGTAATGGAACCAAT\n+TTATGATGAACCGACGACGACTACTAGCGTGCCTTTGTAAGCACAAGCTGATGAGTACGAACTTATGTAC\n+TCATTCGTTTCGGAAGAGACAGGTACGTTAATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTGGTAT\n+TCTTGCTAGTTACACTAGCCATCCTTACTGCGCTTCGATTGTGTGCGTACTGCTGCAATATTGTTAACGT\n+GAGTCTTGTAAAACCTTCTTTTTACGTTTACTCTCGTGTTAAAAATCTGAATTCTTCTAGAGTTCCTGAT\n+CTTCTGGTCTAAACGAACTAAATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTTAGCCATGGCAGA\n+TTCCAACGGTACTATTACCGTTGAAGAGCTTAAAAAGCTCCTTGAACAATGGAACCTAGTAATAGGTTTC\n+CTATTCCTTACATGGATTTGTCTTCTACAATTTGCCTATGCCAACAGGAATAGGTTTTTGTATATAATTA\n+AGTTAATTTTCCTCTGGCTGTTATGGCCAGTAACTTTAGCTTGTTTTGTGCTTGCTGCTGTTTACAGAAT\n+AAATTGGATCACCGGTGGAATTGCTATCGCAATGGCTTGTCTTGTAGGCTTGATGTGGCTCAGCTACTTC\n+ATTGCTTCTTTCAGACTGTTTGCGCGTACGCGTTCCATGTGGTCATTCAATCCAGAAACTAACATTCTTC\n+TCAACGTGCCACTCCATGGCACTATTCTGACCAGACCGCTTCTAGAAAGTGAACTCGTAATCGGAGCTGT\n+GATCCTTCGTGGACATCTTCGTATTGCTGGACACCATCTAGGACGCTGTGACATCAAGGACCTGCCTAAA\n+GAAATCACTGTTGCTACATCACGAACGCTTTCTTATTACAAATTGGGAGCTTCGCAGCGTGTAGCAGGTG\n+ACTCAGGTTTTGCTGCATACAGTCGCTACAGGATTGGCAACTATAAATTAAACACAGACCATTCCAGTAG\n+CAGTGACAATATTGCTTTGCTTGTACAGTAAGTGACAACAGATGTTTCATCTCGTTGACTTTCAGGTTAC\n+TATAGCAGAGATATTACTAATTATTATGAGGACTTTTAAAGTTTCCATTTGGAATCTTGATTACATCATA\n+AACCTCATAATTAAAAATTTATCTAAGTCACTAACTGAGAATAAATATTCTCAATTAGATGAAGAGCAAC\n+CAATGGAGATTGATTAAACGAACATGAAAATTATTCTTTTCTTGGCACTGATAACACTCGCTACTTGTGA\n+GCTTTATCACTACCAAGAGTGTGTTAGAGGTACAACAGTACTTTTAAAAGAACCTTGCTCTTCTGGAACA\n+TACGAGGGCAATTCACCATTTCATCCTCTAGCTGATAACAAATTTGCACTGACTTGCTTTAGCACTCAAT\n+TTGCTTTTGCTTGTCCTGACGGCGTAAAACACGTCTATCAGTTACGTGCCAGATCAGTTTCACCTAAACT\n+GTTCATCAGACAAGAGGAAGTTCAAGAACTTTACTCTCCAATTTTTCTTATTGTTGCGGCAATAGTGTTT\n+ATAACACTTTGCTTCACACTCAAAAGAAAGACAGAATGATTGAACTTTCATTAATTGACTTCTATTTGTG\n+CTTTTTAGCCTTTCTGCTATTCCTTGTTTTAATTATGCTTATTATCTTTTGGTTCTCACTTGAACTGCAA\n+GATCATAATGAAACTTGTCACGCCTAAACGAACATGAAATTTCTTGTTTTCTTAGGAATCATCACAACTG\n+TAGCTGCATTTCACCAAGAATGTAGTTTACAGTCATGTACTCAACATCAACCATATGTAGTTGATGACCC\n+GTGTCCTATTCACTTCTATTCTAAATGGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAA\n+TTGTGCGTGGATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGATATCGGTAATTATACAGTTTCCT\n+GTTTACCTTTTACAATTAATTGCCAGGAACCTAAATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGA\n+AGACTTTTTAGAGTATCATGACGTTCGTGTTGTTTTAGATTTCATCTAAACGAACAAACTAAAATGTCTG\n+ATAATGGACCCCAAAATCAGCGAAATGCACCCCGCATTACGTTTGGTGGACCCTCAGATTCAACTGGCAG\n+TAACCAGAATGGAGAACGCAGTGGGGCGCGATCAAAACAACGTCGGCCCCAAGGTTTACCCAATAATACT\n+GCGTCTTGGTTCACCGCTCTCACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGACAAGGCGTTC\n+CAATTAACACCAATAGCAGTCCAGATGACCAAATTGGCTACTACCGAAGAGCTACCAGACGAATTCGTGG\n+TGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATTTCTACTACCTAGGAACTGGGCCAGAAGCT\n+GGACTTCCCTATGGTGCTAACAAAGACGGCATCATATGGGTTGCAACTGAGGGAGCCTTGAATACACCAA\n+AAGATCACATTGGCACCCGCAATCCTGCTAACAATGCTGCAATCGTGCTACAACTTCCTCAAGGAACAAC\n+ATTGCCAAAAGGCTTCTACGCAGAAGGGAGCAGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTCATCACGT\n+AGTCGCAACAGTTCAAGAAATTCAACTCCAGGCAGCAGTAGGGGAACTTCTCCTGCTAGAATGGCTGGCA\n+ATGGCGGTGATGCTGCTCTTGCTTTGCTGCTGCTTGACAGATTGAACCAGCTTGAGAGCAAAATGTCTGG\n+TAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGCTGCTGAGGCTTCTAAGAAGCCTCGG\n+CAAAAACGTACTGCCACTAAAGCATACAATGTAACACAAGCTTTCGGCAGACGTGGTCCAGAACAAACCC\n+AAGGAAATTTTGGGGACCAGGAACTAATCAGACAAGGAACTGATTACAAACATTGGCCGCAAATTGCACA\n+ATTTGCCCCCAGCGCTTCAGCGTTCTTCGGAATGTCGCGCATTGGCATGGAAGTCACACCTTCGGGAACG\n+TGGTTGACCTACACAGGTGCCATCAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCATTTTGC\n+TGAATAAGCATATTGACGCATACAAAACATTCCCACCAACAGAGCCTAAAAAGGACAAAAAGAAGAAGGC\n+TGATGAAACTCAAGCCTTACCGCAGAGACAGAAGAAACAGCAAACTGTGACTCTTCTTCCTGCTGCAGAT\n+TTGGATGATTTCTCCAAACAATTGCAACAATCCATGAGCAGTGCTGACTCAACTCAGGCCTAAACTCATG\n+CAGACCACACAAGGCAGATGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTT\n+GTGCAGAATGAATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCT\n+TTAATCAGTGTGTAACATTAGGGAGGACTTGAAAGAGCCACCACATTTTCACCGAGGCCACGCGGAGTAC\n+GATCGAGTGTACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAAT\n+TTTAGTAGTGCTATCCCCATGTGATTTTAATAGCTTCTTAGGAGAATGACAAAAAAAAAAAAAAAAAAAA\n+AAAAAAAAAAAAA\n+\n'
b
diff -r 000000000000 -r 554aa2a63f04 test-data/test1.fastq.gz
b
Binary file test-data/test1.fastq.gz has changed
b
diff -r 000000000000 -r 554aa2a63f04 test-data/test2.fastq.gz
b
Binary file test-data/test2.fastq.gz has changed
b
diff -r 000000000000 -r 554aa2a63f04 test-data/test3.fastq.gz
b
Binary file test-data/test3.fastq.gz has changed
b
diff -r 000000000000 -r 554aa2a63f04 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Fri Jan 28 18:47:34 2022 +0000
b
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r 554aa2a63f04 trim_reference.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/trim_reference.py Fri Jan 28 18:47:34 2022 +0000
[
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import argparse
+import sys
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('input_file', type=argparse.FileType())
+    parser.add_argument('output_file', type=argparse.FileType('w'), nargs='?', default=sys.stdout)
+    args = parser.parse_args()
+    lines = args.input_file.readlines()
+    i = len(lines) - 1
+    trimmed = False
+    # step backwards through the lines, removing all As until we find a non-A nucleotide
+    while not trimmed:
+        line = lines[i].upper().rstrip()
+        for j in range(len(line) - 1, -1, -1):
+            # walk backwards through the line, checking for a non-A (and non-space) character
+            if line[j] not in ['A', ' ']:
+                lines[i] = line[:j + 1] + '\n'
+                trimmed = True
+                break
+        else:
+            # we processed the whole line - all As - so we don't include this line in the output
+            i -= 1
+    args.output_file.write(''.join(lines[:i + 1]))