changeset 2:8fdd3e06e1ec draft

planemo upload commit eb7c6866b218a41d9b22255ee6afb83551a5ff40
author yating-l
date Mon, 25 Jun 2018 15:29:18 -0400
parents 0d13e4410c3d
children 9529a207d704
files rename.py rename_scaffold.xml test-data/Dbia3_index.csv
diffstat 3 files changed, 5 insertions(+), 11 deletions(-) [+]
line wrap: on
line diff
--- a/rename.py	Mon Jun 18 18:36:00 2018 -0400
+++ b/rename.py	Mon Jun 25 15:29:18 2018 -0400
@@ -3,10 +3,8 @@
 """
 import sys
 import csv
-from collections import OrderedDict
 
 def rename(inputfile, outputfile, writer):
-    namemap = OrderedDict()
     with open(outputfile, 'w') as out:
         with open(inputfile, 'r') as rf:
             lines = rf.readlines()
@@ -15,20 +13,17 @@
                 if ">" in line:
                     oldname = line[1:].rstrip()
                     newname = "scaffold_" + str(i)
-                    line = ">" + newname
+                    line = ">" + newname + "\n"
                     i = i+1
                     writer.writerow([oldname, newname])
-                #TODO: Add line breaks to chromosome sequences that are in a single line
-                out.write(line.rstrip() + "\n")
+                out.write(line)
 
 def main():
     inputfile = str(sys.argv[1])
     outputfile = str(sys.argv[2])
     indexfile = str(sys.argv[3])
     csvfile = open(indexfile, 'w')
-    fieldnames = ['Original sequence name', 'Renamed sequence name']
     writer = csv.writer(csvfile)
-    writer.writerow(fieldnames)
     rename(inputfile, outputfile, writer)
 
 if __name__ == "__main__":
--- a/rename_scaffold.xml	Mon Jun 18 18:36:00 2018 -0400
+++ b/rename_scaffold.xml	Mon Jun 25 15:29:18 2018 -0400
@@ -1,5 +1,5 @@
-<tool id="rename_scaffold" name="rename the scaffolds" version="1.1">
-    <description>Rename the scaffolds so that they won't exceed 31 characters</description>
+<tool id="rename_scaffold" name="rename the scaffolds" version="1.2">
+    <description>a Galaxy tool to rename the scaffolds in the reference genome so that they won't exceed 31 characters</description>
 <stdio>
     <exit_code range="1:" />
 </stdio>
@@ -22,7 +22,7 @@
     </test>
 </tests>
 <help><![CDATA[
-This tool is to rename scaffolds in reference genome so that the sequence names are less than 31 characters. Rename all scaffolds to scaffold_1, scaffold_2, ..., scaffold_N and provide a name mapping file
+This tool is to rename scaffolds in the reference genome so that the sequence names are less than 31 characters. Rename all scaffolds to scaffold_1, scaffold_2, ..., scaffold_N and also output a name mapping file
 
 ]]></help>
 <citations>
--- a/test-data/Dbia3_index.csv	Mon Jun 18 18:36:00 2018 -0400
+++ b/test-data/Dbia3_index.csv	Mon Jun 25 15:29:18 2018 -0400
@@ -1,4 +1,3 @@
-Original sequence name,Renamed sequence name
 contig1,scaffold_1
 contig2,scaffold_2
 contig3,scaffold_3