# HG changeset patch
# User yating-l
# Date 1529361360 14400
# Node ID 0d13e4410c3da457f1053931a5ee56cbe0ef1611
# Parent 65c9ce351343ac57ab26b6e9ee2d4a40a898b54e
planemo upload commit 68b7e251486853b77396932b588c20f389c366d2
diff -r 65c9ce351343 -r 0d13e4410c3d rename.py
--- a/rename.py Fri Jan 20 17:06:52 2017 -0500
+++ b/rename.py Mon Jun 18 18:36:00 2018 -0400
@@ -2,9 +2,10 @@
Call rename to rename scaffolds in reference genome so that the sequence names are less than 31 characters. Rename all scaffolds to scaffold_1, scaffold_2, ..., scaffold_N and provide a name mapping file
"""
import sys
+import csv
from collections import OrderedDict
-def rename(inputfile, outputfile, indexfile):
+def rename(inputfile, outputfile, writer):
namemap = OrderedDict()
with open(outputfile, 'w') as out:
with open(inputfile, 'r') as rf:
@@ -16,21 +17,19 @@
newname = "scaffold_" + str(i)
line = ">" + newname
i = i+1
- namemap[oldname] = newname
+ writer.writerow([oldname, newname])
#TODO: Add line breaks to chromosome sequences that are in a single line
- #else:
- #if (len(line) > 50):
- #for
out.write(line.rstrip() + "\n")
- with open(indexfile, 'w') as index:
- for k in namemap:
- index.write(k + "\t" + namemap[k] + "\n")
def main():
inputfile = str(sys.argv[1])
outputfile = str(sys.argv[2])
indexfile = str(sys.argv[3])
- rename(inputfile, outputfile, indexfile)
+ csvfile = open(indexfile, 'w')
+ fieldnames = ['Original sequence name', 'Renamed sequence name']
+ writer = csv.writer(csvfile)
+ writer.writerow(fieldnames)
+ rename(inputfile, outputfile, writer)
if __name__ == "__main__":
main()
diff -r 65c9ce351343 -r 0d13e4410c3d rename_scaffold.iml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rename_scaffold.iml Mon Jun 18 18:36:00 2018 -0400
@@ -0,0 +1,9 @@
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r 65c9ce351343 -r 0d13e4410c3d rename_scaffold.xml
--- a/rename_scaffold.xml Fri Jan 20 17:06:52 2017 -0500
+++ b/rename_scaffold.xml Mon Jun 18 18:36:00 2018 -0400
@@ -1,4 +1,4 @@
-
+
Rename the scaffolds so that they won't exceed 31 characters
@@ -11,14 +11,14 @@
-
+
-
+