Next changeset 1:1563b58905f4 (2022-03-17) |
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/read-it-and-keep commit 4b41e2742ba5f9c957e13a188ca49e60e16ae13b" |
added:
all_fasta.loc.sample read-it-and-keep.xml test-data/output_test1.fastq.gz test-data/output_test2.fastq.gz test-data/output_test3.fastq.gz test-data/reference.fasta test-data/test1.fastq.gz test-data/test2.fastq.gz test-data/test3.fastq.gz tool_data_table_conf.xml.sample trim_reference.py |
b |
diff -r 000000000000 -r 554aa2a63f04 all_fasta.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/all_fasta.loc.sample Fri Jan 28 18:47:34 2022 +0000 |
b |
@@ -0,0 +1,18 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +# |
b |
diff -r 000000000000 -r 554aa2a63f04 read-it-and-keep.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/read-it-and-keep.xml Fri Jan 28 18:47:34 2022 +0000 |
[ |
b'@@ -0,0 +1,162 @@\n+<tool id="read_it_and_keep" name="Read It and Keep" version="@TOOL_VERSION@+galaxy0" profile="20.09">\n+ <macros>\n+ <token name="@FASTQ_FORMATS@">fastq,fastq.gz,fastqsanger,fastqsanger.gz</token>\n+ <token name="@TOOL_VERSION@">0.1.0</token>\n+ </macros>\n+ <requirements>\n+ <requirement type="package" version="@TOOL_VERSION@">read-it-and-keep</requirement>\n+ <requirement type="package">python</requirement>\n+ </requirements>\n+ <command detect_errors="exit_code"><![CDATA[\n+ #if $ref_source.source == "history"\n+ ln -s \'$ref_source.ref_fasta\' ref${trim_reference}.fasta &&\n+ #elif $ref_source.source == "builtin"\n+ ln -s \'$ref_source.ref_fasta_builtin.path\' ref${trim_reference}.fasta &&\n+ #end if\n+ #if $trim_reference\n+ python \'$__tool_directory__/trim_reference.py\' ref${trim_reference}.fasta ref.fasta &&\n+ #end if\n+ #if $reads.read_type == "paired"\n+ ln -s \'$reads.read1\' read1.fastq &&\n+ ln -s \'$reads.read2\' read2.fastq &&\n+ #elif $reads.read_type == \'paired_collection\'\n+ ln -s \'$reads.paired_reads.forward\' read1.fastq &&\n+ ln -s \'$reads.paired_reads.reverse\' read2.fastq &&\n+ #elif $reads.read_type == \'single\'\n+ ln -s \'$reads.single_read1\' read1.fastq &&\n+ #end if\n+ readItAndKeep\n+ --tech $sequencing_tech\n+ --ref_fasta ref.fasta\n+ --min_map_length $adv.min_map_length\n+ --min_map_length_pc $adv.min_map_length_pc\n+ --reads1 read1.fastq\n+ #if $reads.read_type != "single"\n+ --reads2 read2.fastq\n+ #end if\n+ -o output\n+ #if $reads.read_type == \'single\'\n+ && mv output.reads.fastq.gz output.reads_1.fastq.gz\n+ #end if\n+ ]]></command>\n+ <inputs>\n+ <conditional name="reads">\n+ <param type="select" label="Read type" name="read_type">\n+ <option value="paired" selected="true">Paired end</option>\n+ <option value="paired_collection">Paired collection</option>\n+ <option value="single">Single ended</option>\n+ </param>\n+ <when value="paired">\n+ <param type="data" format="@FASTQ_FORMATS@" name="read1" label="Read1" />\n+ <param type="data" format="@FASTQ_FORMATS@" name="read2" label="Read2" />\n+ </when>\n+ <when value="paired_collection">\n+ <param type="data_collection" collection_type="paired" format="@FASTQ_FORMATS@" name="paired_reads" label="Reads" />\n+ </when>\n+ <when value="single">\n+ <param type="data" format="@FASTQ_FORMATS@" name="single_read1" label="Read1" />\n+ </when>\n+ </conditional>\n+ <conditional name="ref_source">\n+ <param type="select" label="Reference genome source" name="source">\n+ <option value="history" selected="true">History</option>\n+ <option value="builtin">Built-in</option>\n+ </param>\n+ <when value="history">\n+ <param type="data" format="fasta" name="ref_fasta" label="Reference genome" />\n+ </when>\n+ <when value="builtin">\n+ <param type="select" name="ref_fasta_builtin" label="Reference genome">\n+ <options from_data_table="all_fasta" />\n+ </param>\n+ </when>\n+ </conditional>\n+ <param type="boolean" name="trim_reference" label="Trim trailing As from the reference sequence" checked="true" truevalue="_untrimmed" falsevalue="" help="Remove all As at the end of the reference sequence to ensure that the reference has no poly-A tail (see Note in the general help section below)" />\n+ <param type="select" argument="--tech" name="sequencing_tech" label="Sequencing technology">\n+ <opti'..b' or reads["read_type"] == "paired"</filter>\n+ </data>\n+ <data name="output_reads2" format="fastqsanger.gz" label="Filtered reads ${on_string} - reads2" from_work_dir="output.reads_2.fastq.gz">\n+ <filter>reads["read_type"] == "paired"</filter>\n+ </data>\n+ <collection type="paired" format="fastqsanger.gz" name="output_collection" label="Filtered reads ${on_string}">\n+ <filter>reads["read_type"] == "paired_collection"</filter>\n+ <data name="forward" format="fastqsanger.gz" from_work_dir="output.reads_1.fastq.gz" />\n+ <data name="reverse" format="fastqsanger.gz" from_work_dir="output.reads_2.fastq.gz" />\n+ </collection>\n+ </outputs>\n+ <tests>\n+ <test expect_num_outputs="2">\n+ <conditional name="reads">\n+ <param name="read_type" value="paired" />\n+ <param name="read1" value="test1.fastq.gz" ftype="fastqsanger.gz" />\n+ <param name="read2" value="test2.fastq.gz" ftype="fastqsanger.gz" />\n+ </conditional>\n+ <conditional name="ref_source">\n+ <param name="source" value="history" />\n+ <param name="ref_fasta" value="reference.fasta" ftype="fasta" />\n+ </conditional>\n+ <param name="sequencing_tech" value="illumina" />\n+ <output name="output_reads1" value="output_test1.fastq.gz" />\n+ <output name="output_reads2" value="output_test2.fastq.gz" />\n+ </test>\n+ <test expect_num_outputs="3">\n+ <conditional name="reads">\n+ <param name="read_type" value="paired_collection" />\n+ <param name="paired_reads">\n+ <collection type="paired">\n+ <element name="forward" value="test1.fastq.gz" ftype="fastqsanger.gz" />\n+ <element name="reverse" value="test2.fastq.gz" ftype="fastqsanger.gz" /> \n+ </collection>\n+ </param>\n+ </conditional>\n+ <conditional name="ref_source">\n+ <param name="source" value="history" />\n+ <param name="ref_fasta" value="reference.fasta" ftype="fasta" />\n+ </conditional>\n+ <param name="sequencing_tech" value="illumina" />\n+ <output_collection name="output_collection">\n+ <element name="forward" value="output_test1.fastq.gz" />\n+ <element name="reverse" value="output_test2.fastq.gz" />\n+ </output_collection>\n+ </test>\n+ <test expect_num_outputs="1">\n+ <conditional name="reads">\n+ <param name="read_type" value="single" />\n+ <param name="single_read1" value="test3.fastq.gz" ftype="fastqsanger.gz"/>\n+ </conditional>\n+ <conditional name="ref_source">\n+ <param name="source" value="history" />\n+ <param name="ref_fasta" value="reference.fasta" ftype="fasta" />\n+ </conditional>\n+ <param name="sequencing_tech" value="ont" />\n+ <output name="output_reads1" value="output_test3.fastq.gz" />\n+ </test>\n+ </tests>\n+ <help><![CDATA[\n+ReadItAndKeep\n+-------------\n+\n+ReadItAndKeep is a tool for filtering viral sequence data to remove host reads, developed for cleaning\n+SARS-CoV-2 sequencing data. It maps reads against the SARS-CoV-2 viral genome (with the poly-A tail removed)\n+and only keeps those that map well.\n+\n+**Note**: If the reference genome supplied contains a poly-A tail, reads that contain part of a poly-A tail will map\n+to the refence, no matter what species they originate from. If you are not sure if the reference you are using has had\n+trailing A\'s trimmed, enable the `Trim trailing As` option.\n+\n+Input can be either Illumina or Oxford Nanopore reads.\n+ ]]></help>\n+ <citations>\n+ <citation type="doi">10.1101/2022.01.21.477194</citation>\n+ </citations>\n+</tool>\n' |
b |
diff -r 000000000000 -r 554aa2a63f04 test-data/output_test1.fastq.gz |
b |
Binary file test-data/output_test1.fastq.gz has changed |
b |
diff -r 000000000000 -r 554aa2a63f04 test-data/output_test2.fastq.gz |
b |
Binary file test-data/output_test2.fastq.gz has changed |
b |
diff -r 000000000000 -r 554aa2a63f04 test-data/output_test3.fastq.gz |
b |
Binary file test-data/output_test3.fastq.gz has changed |
b |
diff -r 000000000000 -r 554aa2a63f04 test-data/reference.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/reference.fasta Fri Jan 28 18:47:34 2022 +0000 |
b |
b'@@ -0,0 +1,430 @@\n+>MN908947.3 Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1, complete genome\n+ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAA\n+CGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAAC\n+TAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTG\n+TTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTC\n+CCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTAC\n+GTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGG\n+CTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAAACGTTCGGAT\n+GCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACTCGAAGGCATTCAGTACGGTC\n+GTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGGCGAAATACCAGTGGCTTACCGCAAGGTTCT\n+TCTTCGTAAGAACGGTAATAAAGGAGCTGGTGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTA\n+GGCGACGAGCTTGGCACTGATCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTG\n+TTACCCGTGAACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGG\n+CCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTCATGCACTTTG\n+TCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCGTGAACATGAGCATGAAATTG\n+CTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCAGACACCTTTTGAAATTAAATTGGCAAAGAA\n+ATTTGACACCTTCAATGGGGAATGTCCAAATTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAA\n+CCAAGGGTTGAAAAGAAAAAGCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCAC\n+CAAATGAATGCAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCA\n+GACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGAAGGTGCCACT\n+ACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGCATGTCACAATTCAGAAGTAG\n+GACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGGCTTGAAAACCATTCTTCGTAAGGGTGGTCG\n+CACTATTGCCTTTGGAGGCTGTGTGTTCTCTTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCA\n+CGTGCTAGCGCTAACATAGGTTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACA\n+ACCTTCTTGAAATACTCCAAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGA\n+GATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAAAGGTTTGGAT\n+TATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTACAAAAGGAAAAGCTAAAAAAG\n+GTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCCTCTTTATGCATTTGCATCAGAGGCTGCTCG\n+TGTTGTACGATCAATTTTCTCCCGCACTCTTGAAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGGCC\n+GCTATAACAATACTAGATGGAATTTCACAGTATTCACTGAGACTCATTGATGCTATGATGTTCACATCTG\n+ATTTGGCTACTAACAATCTAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTG\n+GCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTGAAGAGAAGTTT\n+AAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTATCTCAACCTGTGCTTGTGAAA\n+TTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAAGGAGAGTGTTCAGACATTCTTTAAGCTTGT\n+AAATAAATTTTTGGCTTTGTGTGCTGACTCTATCATTATTGGTGGAGCTAAACTTAAAGCCTTGAATTTA\n+GGTGAAACATTTGTCACGCACTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCC\n+TACTCATGCCTCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTT\n+AACAGAGGAAGTTGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGAAGCTGTTGAA\n+GCTCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGAAATCAAAGACACAGAAAAGT\n+ACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATACCTTCACACTCAAAGGCGGTGCACCAACAAA\n+GGTTACTTTTGGTGATGACACTGTGATAGAAGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTT\n+GATGAAAGGATTGATAAAGTACTTAATGAGAAGTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAA\n+ATGAGTTCGCCTGTGTTGTGGCAGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACC\n+ACTGGGCATTGATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGGTGAGTTTAAA\n+TTGGCTTCACATATGTATTGTTCTTTCTACCCTCCAGATGAGGATGAAGAAGAAGGTGATTGTGAAGAAG\n+AAGAGTTTGAGCCATCAACTCAATATGAGTATGGTACTGAAGATGATTACCAAGGTAAACCTTTGGAATT\n+TGGTGCCACTTCTGCTGCTCTTCAACCTGAAGAAGAGCAAGAAGAAGATTGGTTAGATGATGATAGTCAA\n+CAAACTGTTGGTCAACAAGACGGCAGTGAGGACAATCAGACAACTACTATTCAAACAATTGTTGAGGTTC\n+AACCTCAATTAGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTAGTGGTTATTT\n+AAAACTTACTGACAATGTATACATTAAAAATGCAGACATTGTGGAAGAAGCTAAAAAGGTAAAACCAACA\n+GTGGTTGTTAATGCAGCCAATGTTTACCTTAAACATGGAGGAGGTGTTGCAGGAGCCTTAAATAAGGCTA\n+CTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGCTACTAATGGACCACTTAAAGTGGGTGGTAG\n+TTGTGTTTTAAGCGGACACAATCTTGCTAAACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGT\n+GAAGACATTCAACTTCTTAAGAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTTGCACCATTAT\n+TATCAGCTGGTATTTTTGGTGCTGACCCTATACATTCTTTAAGAGTTTGTGTAGATACTGTTCGCAC'..b'GACTATTACCAGCTGTACTCA\n+ACTCAATTGAGTACAGACACTGGTGTTGAACATGTTACCTTCTTCATCTACAATAAAATTGTTGATGAGC\n+CTGAAGAACATGTCCAAATTCACACAATCGACGGTTCATCCGGAGTTGTTAATCCAGTAATGGAACCAAT\n+TTATGATGAACCGACGACGACTACTAGCGTGCCTTTGTAAGCACAAGCTGATGAGTACGAACTTATGTAC\n+TCATTCGTTTCGGAAGAGACAGGTACGTTAATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTGGTAT\n+TCTTGCTAGTTACACTAGCCATCCTTACTGCGCTTCGATTGTGTGCGTACTGCTGCAATATTGTTAACGT\n+GAGTCTTGTAAAACCTTCTTTTTACGTTTACTCTCGTGTTAAAAATCTGAATTCTTCTAGAGTTCCTGAT\n+CTTCTGGTCTAAACGAACTAAATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTTAGCCATGGCAGA\n+TTCCAACGGTACTATTACCGTTGAAGAGCTTAAAAAGCTCCTTGAACAATGGAACCTAGTAATAGGTTTC\n+CTATTCCTTACATGGATTTGTCTTCTACAATTTGCCTATGCCAACAGGAATAGGTTTTTGTATATAATTA\n+AGTTAATTTTCCTCTGGCTGTTATGGCCAGTAACTTTAGCTTGTTTTGTGCTTGCTGCTGTTTACAGAAT\n+AAATTGGATCACCGGTGGAATTGCTATCGCAATGGCTTGTCTTGTAGGCTTGATGTGGCTCAGCTACTTC\n+ATTGCTTCTTTCAGACTGTTTGCGCGTACGCGTTCCATGTGGTCATTCAATCCAGAAACTAACATTCTTC\n+TCAACGTGCCACTCCATGGCACTATTCTGACCAGACCGCTTCTAGAAAGTGAACTCGTAATCGGAGCTGT\n+GATCCTTCGTGGACATCTTCGTATTGCTGGACACCATCTAGGACGCTGTGACATCAAGGACCTGCCTAAA\n+GAAATCACTGTTGCTACATCACGAACGCTTTCTTATTACAAATTGGGAGCTTCGCAGCGTGTAGCAGGTG\n+ACTCAGGTTTTGCTGCATACAGTCGCTACAGGATTGGCAACTATAAATTAAACACAGACCATTCCAGTAG\n+CAGTGACAATATTGCTTTGCTTGTACAGTAAGTGACAACAGATGTTTCATCTCGTTGACTTTCAGGTTAC\n+TATAGCAGAGATATTACTAATTATTATGAGGACTTTTAAAGTTTCCATTTGGAATCTTGATTACATCATA\n+AACCTCATAATTAAAAATTTATCTAAGTCACTAACTGAGAATAAATATTCTCAATTAGATGAAGAGCAAC\n+CAATGGAGATTGATTAAACGAACATGAAAATTATTCTTTTCTTGGCACTGATAACACTCGCTACTTGTGA\n+GCTTTATCACTACCAAGAGTGTGTTAGAGGTACAACAGTACTTTTAAAAGAACCTTGCTCTTCTGGAACA\n+TACGAGGGCAATTCACCATTTCATCCTCTAGCTGATAACAAATTTGCACTGACTTGCTTTAGCACTCAAT\n+TTGCTTTTGCTTGTCCTGACGGCGTAAAACACGTCTATCAGTTACGTGCCAGATCAGTTTCACCTAAACT\n+GTTCATCAGACAAGAGGAAGTTCAAGAACTTTACTCTCCAATTTTTCTTATTGTTGCGGCAATAGTGTTT\n+ATAACACTTTGCTTCACACTCAAAAGAAAGACAGAATGATTGAACTTTCATTAATTGACTTCTATTTGTG\n+CTTTTTAGCCTTTCTGCTATTCCTTGTTTTAATTATGCTTATTATCTTTTGGTTCTCACTTGAACTGCAA\n+GATCATAATGAAACTTGTCACGCCTAAACGAACATGAAATTTCTTGTTTTCTTAGGAATCATCACAACTG\n+TAGCTGCATTTCACCAAGAATGTAGTTTACAGTCATGTACTCAACATCAACCATATGTAGTTGATGACCC\n+GTGTCCTATTCACTTCTATTCTAAATGGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAA\n+TTGTGCGTGGATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGATATCGGTAATTATACAGTTTCCT\n+GTTTACCTTTTACAATTAATTGCCAGGAACCTAAATTGGGTAGTCTTGTAGTGCGTTGTTCGTTCTATGA\n+AGACTTTTTAGAGTATCATGACGTTCGTGTTGTTTTAGATTTCATCTAAACGAACAAACTAAAATGTCTG\n+ATAATGGACCCCAAAATCAGCGAAATGCACCCCGCATTACGTTTGGTGGACCCTCAGATTCAACTGGCAG\n+TAACCAGAATGGAGAACGCAGTGGGGCGCGATCAAAACAACGTCGGCCCCAAGGTTTACCCAATAATACT\n+GCGTCTTGGTTCACCGCTCTCACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGACAAGGCGTTC\n+CAATTAACACCAATAGCAGTCCAGATGACCAAATTGGCTACTACCGAAGAGCTACCAGACGAATTCGTGG\n+TGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATTTCTACTACCTAGGAACTGGGCCAGAAGCT\n+GGACTTCCCTATGGTGCTAACAAAGACGGCATCATATGGGTTGCAACTGAGGGAGCCTTGAATACACCAA\n+AAGATCACATTGGCACCCGCAATCCTGCTAACAATGCTGCAATCGTGCTACAACTTCCTCAAGGAACAAC\n+ATTGCCAAAAGGCTTCTACGCAGAAGGGAGCAGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTCATCACGT\n+AGTCGCAACAGTTCAAGAAATTCAACTCCAGGCAGCAGTAGGGGAACTTCTCCTGCTAGAATGGCTGGCA\n+ATGGCGGTGATGCTGCTCTTGCTTTGCTGCTGCTTGACAGATTGAACCAGCTTGAGAGCAAAATGTCTGG\n+TAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGCTGCTGAGGCTTCTAAGAAGCCTCGG\n+CAAAAACGTACTGCCACTAAAGCATACAATGTAACACAAGCTTTCGGCAGACGTGGTCCAGAACAAACCC\n+AAGGAAATTTTGGGGACCAGGAACTAATCAGACAAGGAACTGATTACAAACATTGGCCGCAAATTGCACA\n+ATTTGCCCCCAGCGCTTCAGCGTTCTTCGGAATGTCGCGCATTGGCATGGAAGTCACACCTTCGGGAACG\n+TGGTTGACCTACACAGGTGCCATCAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCATTTTGC\n+TGAATAAGCATATTGACGCATACAAAACATTCCCACCAACAGAGCCTAAAAAGGACAAAAAGAAGAAGGC\n+TGATGAAACTCAAGCCTTACCGCAGAGACAGAAGAAACAGCAAACTGTGACTCTTCTTCCTGCTGCAGAT\n+TTGGATGATTTCTCCAAACAATTGCAACAATCCATGAGCAGTGCTGACTCAACTCAGGCCTAAACTCATG\n+CAGACCACACAAGGCAGATGGGCTATATAAACGTTTTCGCTTTTCCGTTTACGATATATAGTCTACTCTT\n+GTGCAGAATGAATTCTCGTAACTACATAGCACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCT\n+TTAATCAGTGTGTAACATTAGGGAGGACTTGAAAGAGCCACCACATTTTCACCGAGGCCACGCGGAGTAC\n+GATCGAGTGTACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAAT\n+TTTAGTAGTGCTATCCCCATGTGATTTTAATAGCTTCTTAGGAGAATGACAAAAAAAAAAAAAAAAAAAA\n+AAAAAAAAAAAAA\n+\n' |
b |
diff -r 000000000000 -r 554aa2a63f04 test-data/test1.fastq.gz |
b |
Binary file test-data/test1.fastq.gz has changed |
b |
diff -r 000000000000 -r 554aa2a63f04 test-data/test2.fastq.gz |
b |
Binary file test-data/test2.fastq.gz has changed |
b |
diff -r 000000000000 -r 554aa2a63f04 test-data/test3.fastq.gz |
b |
Binary file test-data/test3.fastq.gz has changed |
b |
diff -r 000000000000 -r 554aa2a63f04 tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Fri Jan 28 18:47:34 2022 +0000 |
b |
@@ -0,0 +1,7 @@ +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/all_fasta.loc" /> + </table> +</tables> |
b |
diff -r 000000000000 -r 554aa2a63f04 trim_reference.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trim_reference.py Fri Jan 28 18:47:34 2022 +0000 |
[ |
@@ -0,0 +1,28 @@ +#!/usr/bin/env python + +from __future__ import print_function + +import argparse +import sys + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('input_file', type=argparse.FileType()) + parser.add_argument('output_file', type=argparse.FileType('w'), nargs='?', default=sys.stdout) + args = parser.parse_args() + lines = args.input_file.readlines() + i = len(lines) - 1 + trimmed = False + # step backwards through the lines, removing all As until we find a non-A nucleotide + while not trimmed: + line = lines[i].upper().rstrip() + for j in range(len(line) - 1, -1, -1): + # walk backwards through the line, checking for a non-A (and non-space) character + if line[j] not in ['A', ' ']: + lines[i] = line[:j + 1] + '\n' + trimmed = True + break + else: + # we processed the whole line - all As - so we don't include this line in the output + i -= 1 + args.output_file.write(''.join(lines[:i + 1])) |