Repository 'tn93_readreduce'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/tn93_readreduce

Changeset 2:1d2ec0b0a0a7 (2022-04-20)
Previous changeset 1:84849140a3bc (2021-04-23) Next changeset 3:c176164dc8a5 (2024-09-28)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit 98c0d716cbd1237ae735ce83e0153ee246abd5d8"
modified:
macros.xml
readreduce.xml
test-data/filter-out1.fasta
tn93_cluster.py
tn93_filter.py
b
diff -r 84849140a3bc -r 1d2ec0b0a0a7 macros.xml
--- a/macros.xml Fri Apr 23 03:04:45 2021 +0000
+++ b/macros.xml Wed Apr 20 16:59:27 2022 +0000
b
@@ -1,6 +1,12 @@
 <?xml version="1.0"?>
 <macros>
-    <token name="@VERSION@">1.0.6</token>
+    <token name="@TOOL_VERSION@">1.0.6</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">tn93</requirement>
+            <yield />
+        </requirements>
+    </xml>
     <xml name="citations">
         <citations>
             <citation type="bibtex">
b
diff -r 84849140a3bc -r 1d2ec0b0a0a7 readreduce.xml
--- a/readreduce.xml Fri Apr 23 03:04:45 2021 +0000
+++ b/readreduce.xml Wed Apr 20 16:59:27 2022 +0000
[
@@ -1,11 +1,9 @@
-<tool id="tn93_readreduce" name="Merge matching reads" version="@VERSION@">
+<tool id="tn93_readreduce" name="Merge matching reads" version="@TOOL_VERSION@+galaxy1">
     <description>into clusters with TN-93</description>
     <macros>
         <import>macros.xml</import>
     </macros>
-    <requirements>
-        <requirement type="package" version="@VERSION@">tn93</requirement>
-    </requirements>
+    <expand macro="requirements" />
     <version_command><![CDATA[tn93 --version]]></version_command>
     <command detect_errors="exit_code"><![CDATA[
     readreduce
b
diff -r 84849140a3bc -r 1d2ec0b0a0a7 test-data/filter-out1.fasta
--- a/test-data/filter-out1.fasta Fri Apr 23 03:04:45 2021 +0000
+++ b/test-data/filter-out1.fasta Wed Apr 20 16:59:27 2022 +0000
b
b'@@ -13,3 +13,8 @@\n >gb_MW518841_Organism_Severe_acute_respiratory_syndrome_coronavirus_2_Strain_Name_SARS_CoV_2_human_USA_CA_CDC_STM_220_2020_Segment_null_1\n ATGTTAGTTTTTCTTGTTTTATTGCCACTAGTCTCTAGTCAGTGTGTTAATCTTACAACCAGAACTCAATTACCCCCTGCATACACTAATTCTTTCACACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAGGACTTGTTCTTACCTTTCTTTTCCAATGTTACTTTGTTCCATGCTATACATGTCTCTGGGACCAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGCTTCCACTGAGAAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAAGACCCAGTCCCTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATTTCAATTTTGTAATCATCCATTTTTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGATGGAAAGTGAGTTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCAGCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGTGTTTAAGAATATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTATTAATTTAGTGCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTATTAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGATTCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAGGACTTTTCTATTAAAATATAATGAAAATGGAACCATTACAGATGCTGTAGACTGTGCACTTGACCCTCTCTCAGAAACAAAGTGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTATCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATTGTTAGATTTCCTAATATTACAAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTGGAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATCATTTTCCACTTTTAAGTGTTATGGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTACTAATGTCTATGCAGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGGGCAAACTGGAAAGATTGCTGATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGTTATAGCTTGGAATTCTAACAATCTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTATAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTATCAGGCCGGTAGCACACCTTGTAATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACAATCATATGGTTTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACTTTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTTGGTTAAAAACAAATGTGTCAATTTCAACTTTAATGGTTTAACAGGCACAGGTGTTCTTACTGAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGCTGACACTACTGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGACATTACACCATGTTCTTTTGGTGGTGTCAGTGTTATAACACCAGGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCAGGGTGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTGGCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGCTGAACATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAGTTATCAGACTCAGACTAATTCTCCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCATTGCCTACACTATGTCACTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGCCATACCCACAAATTTTACTATTAGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAAGACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTTGTTGCAATATGGCAGTTTTTGTACACAATTAAACCGTGCTTTAACTGGAATAGCTGTTGAACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACCAATTAAAGATTTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCCATCAAAACCAAGCAAGAGGTCATTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTTCATCAAACAATATGGTGATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACAAAAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAATGATTGCTCAATACACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGCATTACAAATACCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACAGAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAAAATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAACCAAAATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAATTTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAATTGATAGGTTGATCACAGGCAGACTTCAAAGTTTGCAGACATATGTGACTCAACAATTAATTAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGTACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCCTCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAAGAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGGTGTCTTTGTTTCAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACAAATCATTACTACAGACAACACATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGTCAACAACACAGTTTATGATCCTTTGCAACCTGAATTAGACTCATTCAAGGAGGAGTTAGATAAATATTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACATCTCTGGCATTAATGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTTAAATGAATCTCTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCCATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTATGCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTGCTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACACA\n \n+\n+>epi_isl_1041403'..b'-----------------------------------------------------------------------------------------------------------------------------------------\n+>REFERENCE\n+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------GAGACG\n\\ No newline at end of file\n'
b
diff -r 84849140a3bc -r 1d2ec0b0a0a7 tn93_cluster.py
--- a/tn93_cluster.py Fri Apr 23 03:04:45 2021 +0000
+++ b/tn93_cluster.py Wed Apr 20 16:59:27 2022 +0000
[
@@ -2,7 +2,6 @@
 import json
 import os
 import shlex
-import shutil
 import subprocess
 import sys
 
@@ -41,27 +40,22 @@
 def main(arguments):
     threshold = arguments.threshold
     step = threshold * 0.25
-    shutil.copy(arguments.input, os.path.join(os.getcwd(), 'reference_msa.fa'))
-    shutil.copy(arguments.input, os.path.join(os.getcwd(), 'reference_msa.fa.bak'))
     with open(arguments.reference) as fh:
         for line in fh:
             if line[0] == '>':
                 _ref_seq_name = line[1:].split(' ')[0].strip()
                 break
-    while True and threshold <= 1:
-        command = 'tn93-cluster -o clusters.json -t %g -a %s -c %s -m json -l %d -g %f reference_msa.fa' % (threshold, arguments.ambigs, arguments.cluster_type, arguments.overlap, arguments.fraction)
+    while threshold <= 1:
+        command = 'tn93-cluster -o clusters.json -t %g -a %s -c %s -m json -l %d -g %f %s' % (threshold, arguments.ambigs, arguments.cluster_type, arguments.overlap, arguments.fraction, arguments.input)
         return_code = run_command(command)
         if return_code != 0:
             return return_code
-        input_stamp, cluster_count = cluster_to_fasta('clusters.json', 'reference_msa.fa.bak', _ref_seq_name)
-        if cluster_count <= arguments.cluster_count or threshold == 1:
+        input_stamp, cluster_count = cluster_to_fasta('clusters.json', 'clusters.fa', _ref_seq_name)
+        if cluster_count <= arguments.cluster_count:
             break
         else:
             threshold += step
         print('Found %d clusters at threshold %f' % (cluster_count, threshold))
-    shutil.copy('reference_msa.fa.bak', arguments.compressed)
-    shutil.copy('clusters.json', arguments.output)
-    os.remove('reference_msa.fa.bak')
     return 0
 
 
b
diff -r 84849140a3bc -r 1d2ec0b0a0a7 tn93_filter.py
--- a/tn93_filter.py Fri Apr 23 03:04:45 2021 +0000
+++ b/tn93_filter.py Wed Apr 20 16:59:27 2022 +0000
[
@@ -1,5 +1,6 @@
 import argparse
 import csv
+import random
 
 from Bio import SeqIO
 
@@ -8,15 +9,22 @@
 arguments.add_argument('-f', '--reference', help='Reference sequence', required=True, type=str)
 arguments.add_argument('-d', '--distances', help='Calculated pairwise distances', required=True, type=str)
 arguments.add_argument('-r', '--reads', help='Output file for filtered reads', required=True, type=str)
-arguments.add_argument('-q', '--clusters', help='Compressed clusters', required=True, type=str)
+arguments.add_argument('-q', '--clusters', help='Compressed background clusters', required=True, type=str)
 settings = arguments.parse_args()
 
 reference_name = 'REFERENCE'
 reference_seq = ''
 
+
+def unique_id(new_id, existing_ids):
+    while new_id in existing_ids:
+        new_id += '_' + ''.join(random.choices('0123456789abcdef', k=10))
+    return new_id
+
+
 with open(settings.reference) as seq_fh:
     for seq_record in SeqIO.parse(seq_fh, 'fasta'):
-        reference_name = seq_record.name
+        reference_name = seq_record.name.split(' ')[0]
         reference_seq = seq_record.seq
         break
 
@@ -27,17 +35,19 @@
     for line in reader:
         if line[1] not in seqs_to_filter:
             seqs_to_filter.add(line[1])
+        else:
+            seqs_to_filter.add(unique_id(line[1], seqs_to_filter))
     if reference_name in seqs_to_filter:
         seqs_to_filter.remove(reference_name)
 
 with open(settings.reads, "a+") as fh:
     seqs_filtered = list()
     for seq_record in SeqIO.parse(settings.clusters, "fasta"):
+        if seq_record.name.split(' ')[0] == reference_name:
+            continue
         if seq_record.name not in seqs_to_filter:
-            if seq_record.name == reference_name:
-                if seq_record.name not in seqs_filtered:
-                    seqs_filtered.append(seq_record.name)
-                else:
-                    continue
+            unique_name = unique_id(seq_record.name, seqs_filtered)
+            fh.write('\n>%s\n%s' % (unique_name, seq_record.seq))
+            seqs_filtered.append(unique_name)
     if reference_name not in seqs_filtered:
         fh.write('\n>REFERENCE\n%s' % reference_seq)