diff rename.py @ 1:0d13e4410c3d draft

planemo upload commit 68b7e251486853b77396932b588c20f389c366d2
author yating-l
date Mon, 18 Jun 2018 18:36:00 -0400
parents 65c9ce351343
children 8fdd3e06e1ec
line wrap: on
line diff
--- a/rename.py	Fri Jan 20 17:06:52 2017 -0500
+++ b/rename.py	Mon Jun 18 18:36:00 2018 -0400
@@ -2,9 +2,10 @@
 Call rename to rename scaffolds in reference genome so that the sequence names are less than 31 characters. Rename all scaffolds to scaffold_1, scaffold_2, ..., scaffold_N and provide a name mapping file
 """
 import sys
+import csv
 from collections import OrderedDict
 
-def rename(inputfile, outputfile, indexfile):
+def rename(inputfile, outputfile, writer):
     namemap = OrderedDict()
     with open(outputfile, 'w') as out:
         with open(inputfile, 'r') as rf:
@@ -16,21 +17,19 @@
                     newname = "scaffold_" + str(i)
                     line = ">" + newname
                     i = i+1
-                    namemap[oldname] = newname
+                    writer.writerow([oldname, newname])
                 #TODO: Add line breaks to chromosome sequences that are in a single line
-                #else:
-                    #if (len(line) > 50):
-                        #for 
                 out.write(line.rstrip() + "\n")
-    with open(indexfile, 'w') as index:
-        for k in namemap:
-            index.write(k + "\t" + namemap[k] + "\n")
 
 def main():
     inputfile = str(sys.argv[1])
     outputfile = str(sys.argv[2])
     indexfile = str(sys.argv[3])
-    rename(inputfile, outputfile, indexfile)
+    csvfile = open(indexfile, 'w')
+    fieldnames = ['Original sequence name', 'Renamed sequence name']
+    writer = csv.writer(csvfile)
+    writer.writerow(fieldnames)
+    rename(inputfile, outputfile, writer)
 
 if __name__ == "__main__":
     main()