changeset 4:c5bb2f9af708 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 6b3b1194af0de793a1b4892c5973da835f5c0902
author artbio
date Sat, 20 Apr 2024 23:23:40 +0000
parents 0efb0ee6a7e9
children 08e50af788f7
files RepEnrich2_setup.py macros.xml repenrich2.xml
diffstat 3 files changed, 7 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/RepEnrich2_setup.py	Sat Apr 20 15:45:33 2024 +0000
+++ b/RepEnrich2_setup.py	Sat Apr 20 23:23:40 2024 +0000
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
 import argparse
 import csv
-import os
 import shlex
 import subprocess
 import sys
@@ -48,15 +47,6 @@
 genomefasta = args.genomefasta
 cpus = args.cpus
 
-# check that the programs we need are available
-try:
-    subprocess.call(shlex.split("bowtie2 --version"),
-                    stdout=open(os.devnull, 'wb'),
-                    stderr=open(os.devnull, 'wb'))
-except OSError:
-    print("Error: Bowtie2 not available in the path")
-    raise
-
 
 def starts_with_numerical(list):
     try:
@@ -68,7 +58,7 @@
         return False
 
 
-# define a text importer for .out/.txt format of repbase
+# text import function for .out/.txt format of repbase
 def import_text(filename, separator):
     csv.field_size_limit(sys.maxsize)
     file = csv.reader(open(filename), delimiter=separator,
@@ -81,7 +71,7 @@
 genome = defaultdict(dict)
 
 for chr in g.keys():
-    genome[chr]['sequence'] = g[chr].seq
+    genome[chr]['sequence'] = str(g[chr].seq)
     genome[chr]['length'] = len(g[chr].seq)
 
 # Build a bedfile of repeatcoordinates to use by RepEnrich region_sorter
@@ -110,7 +100,7 @@
 
 # generate metagenomes and save them to FASTA files for bowtie build
 for repname in rep_coords:
-    metagenome = ''
+    genomes_list = []
     # iterating coordinate list by block of 3 (chr, start, end)
     block = 3
     for i in range(0, len(rep_coords[repname]) - block + 1, block):
@@ -119,11 +109,8 @@
         start = max(int(batch[1]) - flankingl, 0)
         end = min(int(batch[2]) + flankingl,
                   int(genome[chromosome]['length'])-1) + 1
-        metagenome = (
-            f"{metagenome}{spacer}"
-            f"{genome[chromosome]['sequence'][start:end]}"
-            )
-
+        genomes_list.append(genome[chromosome]['sequence'][start:end])
+    metagenome = spacer.join(genomes_list)
     # Create Fasta of repeat pseudogenome
     fastafilename = f"{repname}.fa"
     record = SeqRecord(Seq(metagenome), id=repname, name='', description='')
--- a/macros.xml	Sat Apr 20 15:45:33 2024 +0000
+++ b/macros.xml	Sat Apr 20 23:23:40 2024 +0000
@@ -1,6 +1,6 @@
 <macros>
     <token name="@TOOL_VERSION@">2.31.1</token>
-    <token name="@VERSION_SUFFIX@">3</token>
+    <token name="@VERSION_SUFFIX@">4</token>
     <token name="@PROFILE@">23.0</token>
 
     <xml name="repenrich_requirements">
--- a/repenrich2.xml	Sat Apr 20 15:45:33 2024 +0000
+++ b/repenrich2.xml	Sat Apr 20 23:23:40 2024 +0000
@@ -26,7 +26,7 @@
             #set index_path = 'genome'
         #else:
             #set index_path = $refGenomeSource.genome.fields.path
-            bowtie-inspect $index_path > genome.fa &&
+            ln -s '${index_path}.fa' 'genome.fa' &&
         #end if
 
         python $__tool_directory__/RepEnrich2_setup.py