changeset 1:0d13e4410c3d draft

planemo upload commit 68b7e251486853b77396932b588c20f389c366d2
author yating-l
date Mon, 18 Jun 2018 18:36:00 -0400
parents 65c9ce351343
children 8fdd3e06e1ec
files rename.py rename_scaffold.iml rename_scaffold.xml test-data/Dbia3_index.csv test-data/Dbia3_index.fa
diffstat 5 files changed, 91 insertions(+), 82 deletions(-) [+]
line wrap: on
line diff
--- a/rename.py	Fri Jan 20 17:06:52 2017 -0500
+++ b/rename.py	Mon Jun 18 18:36:00 2018 -0400
@@ -2,9 +2,10 @@
 Call rename to rename scaffolds in reference genome so that the sequence names are less than 31 characters. Rename all scaffolds to scaffold_1, scaffold_2, ..., scaffold_N and provide a name mapping file
 """
 import sys
+import csv
 from collections import OrderedDict
 
-def rename(inputfile, outputfile, indexfile):
+def rename(inputfile, outputfile, writer):
     namemap = OrderedDict()
     with open(outputfile, 'w') as out:
         with open(inputfile, 'r') as rf:
@@ -16,21 +17,19 @@
                     newname = "scaffold_" + str(i)
                     line = ">" + newname
                     i = i+1
-                    namemap[oldname] = newname
+                    writer.writerow([oldname, newname])
                 #TODO: Add line breaks to chromosome sequences that are in a single line
-                #else:
-                    #if (len(line) > 50):
-                        #for 
                 out.write(line.rstrip() + "\n")
-    with open(indexfile, 'w') as index:
-        for k in namemap:
-            index.write(k + "\t" + namemap[k] + "\n")
 
 def main():
     inputfile = str(sys.argv[1])
     outputfile = str(sys.argv[2])
     indexfile = str(sys.argv[3])
-    rename(inputfile, outputfile, indexfile)
+    csvfile = open(indexfile, 'w')
+    fieldnames = ['Original sequence name', 'Renamed sequence name']
+    writer = csv.writer(csvfile)
+    writer.writerow(fieldnames)
+    rename(inputfile, outputfile, writer)
 
 if __name__ == "__main__":
     main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rename_scaffold.iml	Mon Jun 18 18:36:00 2018 -0400
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager" inherit-compiler-output="true">
+    <exclude-output />
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
--- a/rename_scaffold.xml	Fri Jan 20 17:06:52 2017 -0500
+++ b/rename_scaffold.xml	Mon Jun 18 18:36:00 2018 -0400
@@ -1,4 +1,4 @@
-<tool id="rename_scaffold" name="rename the scaffolds" version="0.1.0">
+<tool id="rename_scaffold" name="rename the scaffolds" version="1.1">
     <description>Rename the scaffolds so that they won't exceed 31 characters</description>
 <stdio>
     <exit_code range="1:" />
@@ -11,14 +11,14 @@
 </inputs>
 <outputs>
     <data name="output" format="fasta" label="${tool.name} on ${on_string}: renamed_reference" />
-    <data name="index" format="fasta" label="${tool.name} on ${on_string}: name mapping"/>
+    <data name="index" format="csv" label="${tool.name} on ${on_string}: name mapping"/>
 </outputs>
 <tests>
     <test>
         <!-- Test with Dbia3.fa -->
         <param name="input" value="Dbia3.fa" />
         <output name="output" file="Dbia3_renamed.fa"/>
-        <output name="index" file="Dbia3_index.fa"/>
+        <output name="index" file="Dbia3_index.csv"/>
     </test>
 </tests>
 <help><![CDATA[
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Dbia3_index.csv	Mon Jun 18 18:36:00 2018 -0400
@@ -0,0 +1,71 @@
+Original sequence name,Renamed sequence name
+contig1,scaffold_1
+contig2,scaffold_2
+contig3,scaffold_3
+contig4,scaffold_4
+contig5,scaffold_5
+contig6,scaffold_6
+contig7,scaffold_7
+contig8,scaffold_8
+contig9,scaffold_9
+contig10,scaffold_10
+contig11,scaffold_11
+contig12,scaffold_12
+contig13,scaffold_13
+contig14,scaffold_14
+contig15,scaffold_15
+contig16,scaffold_16
+contig17,scaffold_17
+contig18,scaffold_18
+contig19,scaffold_19
+contig20,scaffold_20
+contig21,scaffold_21
+contig22,scaffold_22
+contig23,scaffold_23
+contig24,scaffold_24
+contig25,scaffold_25
+contig26,scaffold_26
+contig27,scaffold_27
+contig28,scaffold_28
+contig29,scaffold_29
+contig30,scaffold_30
+contig31,scaffold_31
+contig32,scaffold_32
+contig33,scaffold_33
+contig34,scaffold_34
+contig35,scaffold_35
+contig36,scaffold_36
+contig37,scaffold_37
+contig38,scaffold_38
+contig39,scaffold_39
+contig40,scaffold_40
+contig41,scaffold_41
+contig42,scaffold_42
+contig43,scaffold_43
+contig44,scaffold_44
+contig45,scaffold_45
+contig46,scaffold_46
+contig47,scaffold_47
+contig48,scaffold_48
+contig49,scaffold_49
+contig50,scaffold_50
+contig51,scaffold_51
+contig52,scaffold_52
+contig53,scaffold_53
+contig54,scaffold_54
+contig55,scaffold_55
+contig56,scaffold_56
+contig57,scaffold_57
+contig58,scaffold_58
+contig59,scaffold_59
+contig60,scaffold_60
+contig61,scaffold_61
+contig62,scaffold_62
+contig63,scaffold_63
+contig64,scaffold_64
+contig65,scaffold_65
+contig66,scaffold_66
+contig67,scaffold_67
+contig68,scaffold_68
+contig69,scaffold_69
+contig70,scaffold_70
--- a/test-data/Dbia3_index.fa	Fri Jan 20 17:06:52 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,70 +0,0 @@
-contig1	scaffold_1
-contig2	scaffold_2
-contig3	scaffold_3
-contig4	scaffold_4
-contig5	scaffold_5
-contig6	scaffold_6
-contig7	scaffold_7
-contig8	scaffold_8
-contig9	scaffold_9
-contig10	scaffold_10
-contig11	scaffold_11
-contig12	scaffold_12
-contig13	scaffold_13
-contig14	scaffold_14
-contig15	scaffold_15
-contig16	scaffold_16
-contig17	scaffold_17
-contig18	scaffold_18
-contig19	scaffold_19
-contig20	scaffold_20
-contig21	scaffold_21
-contig22	scaffold_22
-contig23	scaffold_23
-contig24	scaffold_24
-contig25	scaffold_25
-contig26	scaffold_26
-contig27	scaffold_27
-contig28	scaffold_28
-contig29	scaffold_29
-contig30	scaffold_30
-contig31	scaffold_31
-contig32	scaffold_32
-contig33	scaffold_33
-contig34	scaffold_34
-contig35	scaffold_35
-contig36	scaffold_36
-contig37	scaffold_37
-contig38	scaffold_38
-contig39	scaffold_39
-contig40	scaffold_40
-contig41	scaffold_41
-contig42	scaffold_42
-contig43	scaffold_43
-contig44	scaffold_44
-contig45	scaffold_45
-contig46	scaffold_46
-contig47	scaffold_47
-contig48	scaffold_48
-contig49	scaffold_49
-contig50	scaffold_50
-contig51	scaffold_51
-contig52	scaffold_52
-contig53	scaffold_53
-contig54	scaffold_54
-contig55	scaffold_55
-contig56	scaffold_56
-contig57	scaffold_57
-contig58	scaffold_58
-contig59	scaffold_59
-contig60	scaffold_60
-contig61	scaffold_61
-contig62	scaffold_62
-contig63	scaffold_63
-contig64	scaffold_64
-contig65	scaffold_65
-contig66	scaffold_66
-contig67	scaffold_67
-contig68	scaffold_68
-contig69	scaffold_69
-contig70	scaffold_70