Mercurial > repos > yating-l > rename_scaffolds
changeset 2:8fdd3e06e1ec draft
planemo upload commit eb7c6866b218a41d9b22255ee6afb83551a5ff40
author | yating-l |
---|---|
date | Mon, 25 Jun 2018 15:29:18 -0400 |
parents | 0d13e4410c3d |
children | 9529a207d704 |
files | rename.py rename_scaffold.xml test-data/Dbia3_index.csv |
diffstat | 3 files changed, 5 insertions(+), 11 deletions(-) [+] |
line wrap: on
line diff
--- a/rename.py Mon Jun 18 18:36:00 2018 -0400 +++ b/rename.py Mon Jun 25 15:29:18 2018 -0400 @@ -3,10 +3,8 @@ """ import sys import csv -from collections import OrderedDict def rename(inputfile, outputfile, writer): - namemap = OrderedDict() with open(outputfile, 'w') as out: with open(inputfile, 'r') as rf: lines = rf.readlines() @@ -15,20 +13,17 @@ if ">" in line: oldname = line[1:].rstrip() newname = "scaffold_" + str(i) - line = ">" + newname + line = ">" + newname + "\n" i = i+1 writer.writerow([oldname, newname]) - #TODO: Add line breaks to chromosome sequences that are in a single line - out.write(line.rstrip() + "\n") + out.write(line) def main(): inputfile = str(sys.argv[1]) outputfile = str(sys.argv[2]) indexfile = str(sys.argv[3]) csvfile = open(indexfile, 'w') - fieldnames = ['Original sequence name', 'Renamed sequence name'] writer = csv.writer(csvfile) - writer.writerow(fieldnames) rename(inputfile, outputfile, writer) if __name__ == "__main__":
--- a/rename_scaffold.xml Mon Jun 18 18:36:00 2018 -0400 +++ b/rename_scaffold.xml Mon Jun 25 15:29:18 2018 -0400 @@ -1,5 +1,5 @@ -<tool id="rename_scaffold" name="rename the scaffolds" version="1.1"> - <description>Rename the scaffolds so that they won't exceed 31 characters</description> +<tool id="rename_scaffold" name="rename the scaffolds" version="1.2"> + <description>a Galaxy tool to rename the scaffolds in the reference genome so that they won't exceed 31 characters</description> <stdio> <exit_code range="1:" /> </stdio> @@ -22,7 +22,7 @@ </test> </tests> <help><![CDATA[ -This tool is to rename scaffolds in reference genome so that the sequence names are less than 31 characters. Rename all scaffolds to scaffold_1, scaffold_2, ..., scaffold_N and provide a name mapping file +This tool is to rename scaffolds in the reference genome so that the sequence names are less than 31 characters. Rename all scaffolds to scaffold_1, scaffold_2, ..., scaffold_N and also output a name mapping file ]]></help> <citations>