Repository 'genephys'
hg clone https://toolshed.g2.bx.psu.edu/repos/mcharles/genephys

Changeset 3:8dfa09868059 (2014-10-24)
Previous changeset 2:c52e74b98773 (2014-08-20) Next changeset 4:3d79224aa2dc (2014-10-30)
Commit message:
Uploaded
modified:
genephys/fastaGroomerForMakeBlastdb.pl
genephys/fastaGroomerForMakeBlastdb.xml
added:
genephys/Galaxy-Workflow-GenePhys_-_blastn.ga
genephys/Galaxy-Workflow-GenePhys_-_blastp.ga
genephys/Galaxy-Workflow-GenePhys_-_blastx.ga
genephys/Galaxy-Workflow-GenePhys_-_tblastx.ga
genephys/GenePhys.pl
genephys/GenePhys.xml
genephys/MergeBlastResults.pl
genephys/MergeBlastResults.xml
genephys/repository_dependencies.xml
removed:
genephys/Galaxy-Workflow-GenePhys.ga
genephys/extractgenesfromsegment.pl
genephys/extractgenesfromsegment.xml
genephys/extractgenomicsegment.pl
genephys/extractgenomicsegment.xml
genephys/extractgenomicsequencefromsegment.pl
genephys/extractgenomicsequencefromsegment.xml
genephys/mergeAllBestBlast.pl
genephys/mergeAllBestBlast.xml
genephys/parseblasttab.pl
genephys/parseblasttab.xml
b
diff -r c52e74b98773 -r 8dfa09868059 genephys/Galaxy-Workflow-GenePhys.ga
--- a/genephys/Galaxy-Workflow-GenePhys.ga Wed Aug 20 12:42:40 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,674 +0,0 @@\n-{\n-    "a_galaxy_workflow": "true", \n-    "annotation": "", \n-    "format-version": "0.1", \n-    "name": "GenePhys", \n-    "steps": {\n-        "0": {\n-            "annotation": "", \n-            "id": 0, \n-            "input_connections": {}, \n-            "inputs": [\n-                {\n-                    "description": "", \n-                    "name": "ASSEMBLY"\n-                }\n-            ], \n-            "name": "Input dataset", \n-            "outputs": [], \n-            "position": {\n-                "left": 201, \n-                "top": 392\n-            }, \n-            "tool_errors": null, \n-            "tool_id": null, \n-            "tool_state": "{\\"name\\": \\"ASSEMBLY\\"}", \n-            "tool_version": null, \n-            "type": "data_input", \n-            "user_outputs": []\n-        }, \n-        "1": {\n-            "annotation": "", \n-            "id": 1, \n-            "input_connections": {}, \n-            "inputs": [\n-                {\n-                    "description": "", \n-                    "name": "GENE XML"\n-                }\n-            ], \n-            "name": "Input dataset", \n-            "outputs": [], \n-            "position": {\n-                "left": 200, \n-                "top": 497\n-            }, \n-            "tool_errors": null, \n-            "tool_id": null, \n-            "tool_state": "{\\"name\\": \\"GENE XML\\"}", \n-            "tool_version": null, \n-            "type": "data_input", \n-            "user_outputs": []\n-        }, \n-        "2": {\n-            "annotation": "", \n-            "id": 2, \n-            "input_connections": {}, \n-            "inputs": [\n-                {\n-                    "description": "", \n-                    "name": "MARKERS"\n-                }\n-            ], \n-            "name": "Input dataset", \n-            "outputs": [], \n-            "position": {\n-                "left": 186, \n-                "top": 618\n-            }, \n-            "tool_errors": null, \n-            "tool_id": null, \n-            "tool_state": "{\\"name\\": \\"MARKERS\\"}", \n-            "tool_version": null, \n-            "type": "data_input", \n-            "user_outputs": []\n-        }, \n-        "3": {\n-            "annotation": "", \n-            "id": 3, \n-            "input_connections": {}, \n-            "inputs": [\n-                {\n-                    "description": "", \n-                    "name": "GENETIC MAP"\n-                }\n-            ], \n-            "name": "Input dataset", \n-            "outputs": [], \n-            "position": {\n-                "left": 200, \n-                "top": 737\n-            }, \n-            "tool_errors": null, \n-            "tool_id": null, \n-            "tool_state": "{\\"name\\": \\"GENETIC MAP\\"}", \n-            "tool_version": null, \n-            "type": "data_input", \n-            "user_outputs": []\n-        }, \n-        "4": {\n-            "annotation": "", \n-            "id": 4, \n-            "input_connections": {}, \n-            "inputs": [\n-                {\n-                    "description": "", \n-                    "name": "REFERENCE(NUC)"\n-                }\n-            ], \n-            "name": "Input dataset", \n-            "outputs": [], \n-            "position": {\n-                "left": 200, \n-                "top": 857\n-            }, \n-            "tool_errors": null, \n-            "tool_id": null, \n-            "tool_state": "{\\"name\\": \\"REFERENCE(NUC)\\"}", \n-            "tool_version": null, \n-            "type": "data_input", \n-            "user_outputs": []\n-        }, \n-        "5": {\n-            "annotation": "", \n-            "id": 5, \n-            "input_connections": {}, \n-            "inputs": [\n-                {\n-                    "description": "", \n-                    "name": "REFERENCE(PROT)"\n-                }\n-            ], \n-            "name": "Input dataset", \n-            "outputs": [], \n-            "position": {\n-                "left": 206, \n-                "top'..b'               "left": 1368, \n-                "top": 776.5\n-            }, \n-            "post_job_actions": {}, \n-            "tool_errors": null, \n-            "tool_id": "parseblasttab", \n-            "tool_state": "{\\"__page__\\": 0, \\"__rerun_remap_job_id__\\": null, \\"input_blast\\": \\"null\\"}", \n-            "tool_version": "0.01", \n-            "type": "tool", \n-            "user_outputs": []\n-        }, \n-        "19": {\n-            "annotation": "", \n-            "id": 19, \n-            "input_connections": {\n-                "input_blast": {\n-                    "id": 15, \n-                    "output_name": "output1"\n-                }\n-            }, \n-            "inputs": [], \n-            "name": "parseblasttab", \n-            "outputs": [\n-                {\n-                    "name": "output_merge", \n-                    "type": "txt"\n-                }, \n-                {\n-                    "name": "output_best", \n-                    "type": "txt"\n-                }\n-            ], \n-            "position": {\n-                "left": 1366, \n-                "top": 477.5\n-            }, \n-            "post_job_actions": {}, \n-            "tool_errors": null, \n-            "tool_id": "parseblasttab", \n-            "tool_state": "{\\"__page__\\": 0, \\"__rerun_remap_job_id__\\": null, \\"input_blast\\": \\"null\\"}", \n-            "tool_version": "0.01", \n-            "type": "tool", \n-            "user_outputs": []\n-        }, \n-        "20": {\n-            "annotation": "", \n-            "id": 20, \n-            "input_connections": {\n-                "input_blast": {\n-                    "id": 16, \n-                    "output_name": "output1"\n-                }\n-            }, \n-            "inputs": [], \n-            "name": "parseblasttab", \n-            "outputs": [\n-                {\n-                    "name": "output_merge", \n-                    "type": "txt"\n-                }, \n-                {\n-                    "name": "output_best", \n-                    "type": "txt"\n-                }\n-            ], \n-            "position": {\n-                "left": 1372, \n-                "top": 623.5\n-            }, \n-            "post_job_actions": {}, \n-            "tool_errors": null, \n-            "tool_id": "parseblasttab", \n-            "tool_state": "{\\"__page__\\": 0, \\"__rerun_remap_job_id__\\": null, \\"input_blast\\": \\"null\\"}", \n-            "tool_version": "0.01", \n-            "type": "tool", \n-            "user_outputs": []\n-        }, \n-        "21": {\n-            "annotation": "", \n-            "id": 21, \n-            "input_connections": {\n-                "input_blastn": {\n-                    "id": 17, \n-                    "output_name": "output_best"\n-                }, \n-                "input_blastp": {\n-                    "id": 20, \n-                    "output_name": "output_best"\n-                }, \n-                "input_blastx": {\n-                    "id": 19, \n-                    "output_name": "output_best"\n-                }, \n-                "input_tblastx": {\n-                    "id": 18, \n-                    "output_name": "output_best"\n-                }\n-            }, \n-            "inputs": [], \n-            "name": "mergeAllBestBlast", \n-            "outputs": [\n-                {\n-                    "name": "output_results", \n-                    "type": "txt"\n-                }\n-            ], \n-            "position": {\n-                "left": 1739.5, \n-                "top": 384.5\n-            }, \n-            "post_job_actions": {}, \n-            "tool_errors": null, \n-            "tool_id": "mergeAllBestBlast", \n-            "tool_state": "{\\"__page__\\": 0, \\"input_tblastx\\": \\"null\\", \\"input_blastp\\": \\"null\\", \\"input_blastx\\": \\"null\\", \\"__rerun_remap_job_id__\\": null, \\"input_blastn\\": \\"null\\"}", \n-            "tool_version": "0.01", \n-            "type": "tool", \n-            "user_outputs": []\n-        }\n-    }\n-}\n\\ No newline at end of file\n'
b
diff -r c52e74b98773 -r 8dfa09868059 genephys/Galaxy-Workflow-GenePhys_-_blastn.ga
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/genephys/Galaxy-Workflow-GenePhys_-_blastn.ga Fri Oct 24 05:54:20 2014 -0400
[
b'@@ -0,0 +1,204 @@\n+{\n+    "a_galaxy_workflow": "true", \n+    "annotation": "", \n+    "format-version": "0.1", \n+    "name": "GenePhys - blastn", \n+    "steps": {\n+        "0": {\n+            "annotation": "", \n+            "id": 0, \n+            "input_connections": {}, \n+            "inputs": [\n+                {\n+                    "description": "", \n+                    "name": "NUCLEIC GENE SEQUENCE"\n+                }\n+            ], \n+            "name": "Input dataset", \n+            "outputs": [], \n+            "position": {\n+                "left": 294, \n+                "top": 225\n+            }, \n+            "tool_errors": null, \n+            "tool_id": null, \n+            "tool_state": "{\\"name\\": \\"NUCLEIC GENE SEQUENCE\\"}", \n+            "tool_version": null, \n+            "type": "data_input", \n+            "user_outputs": []\n+        }, \n+        "1": {\n+            "annotation": "", \n+            "id": 1, \n+            "input_connections": {}, \n+            "inputs": [\n+                {\n+                    "description": "", \n+                    "name": "NUCLEIC DB FILE"\n+                }\n+            ], \n+            "name": "Input dataset", \n+            "outputs": [], \n+            "position": {\n+                "left": 294, \n+                "top": 382\n+            }, \n+            "tool_errors": null, \n+            "tool_id": null, \n+            "tool_state": "{\\"name\\": \\"NUCLEIC DB FILE\\"}", \n+            "tool_version": null, \n+            "type": "data_input", \n+            "user_outputs": []\n+        }, \n+        "2": {\n+            "annotation": "", \n+            "id": 2, \n+            "input_connections": {\n+                "input_fasta": {\n+                    "id": 1, \n+                    "output_name": "output"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "fastaGroomerForMakeBlastdb", \n+            "outputs": [\n+                {\n+                    "name": "output_fasta", \n+                    "type": "fasta"\n+                }\n+            ], \n+            "position": {\n+                "left": 550, \n+                "top": 351\n+            }, \n+            "post_job_actions": {\n+                "HideDatasetActionoutput_fasta": {\n+                    "action_arguments": {}, \n+                    "action_type": "HideDatasetAction", \n+                    "output_name": "output_fasta"\n+                }\n+            }, \n+            "tool_errors": null, \n+            "tool_id": "fastaGroomerForMakeBlastdb", \n+            "tool_state": "{\\"input_fasta\\": \\"null\\", \\"__rerun_remap_job_id__\\": null, \\"__page__\\": 0}", \n+            "tool_version": "1.00", \n+            "type": "tool", \n+            "user_outputs": []\n+        }, \n+        "3": {\n+            "annotation": "", \n+            "id": 3, \n+            "input_connections": {\n+                "input_file": {\n+                    "id": 2, \n+                    "output_name": "output_fasta"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "NCBI BLAST+ makeblastdb", \n+            "outputs": [\n+                {\n+                    "name": "outfile", \n+                    "type": "data"\n+                }\n+            ], \n+            "position": {\n+                "left": 925.5, \n+                "top": 332\n+            }, \n+            "post_job_actions": {\n+                "HideDatasetActionoutfile": {\n+                    "action_arguments": {}, \n+                    "action_type": "HideDatasetAction", \n+                    "output_name": "outfile"\n+                }\n+            }, \n+            "tool_errors": null, \n+            "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_makeblastdb/0.1.00", \n+            "tool_state": "{\\"__page__\\": 0, \\"mask_data_file\\": \\"null\\", \\"input_file\\": \\"null\\", \\"dbtype\\": \\"\\\\\\"nucl\\\\\\"\\", \\"__rerun_remap_job_id__\\": null, \\"hash_index\\": \\"\\\\\\"True\\\\\\"\\", \\"tax\\": \\"{\\\\\\"taxselect\\\\\\": \\\\\\"\\\\\\'..b'se__\\\\\\": 0}\\", \\"title\\": \\"\\\\\\"\\\\\\"\\", \\"parse_seqids\\": \\"\\\\\\"False\\\\\\"\\"}", \n+            "tool_version": "0.1.00", \n+            "type": "tool", \n+            "user_outputs": []\n+        }, \n+        "4": {\n+            "annotation": "", \n+            "id": 4, \n+            "input_connections": {\n+                "db_opts|histdb": {\n+                    "id": 3, \n+                    "output_name": "outfile"\n+                }, \n+                "query": {\n+                    "id": 0, \n+                    "output_name": "output"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "NCBI BLAST+ blastn", \n+            "outputs": [\n+                {\n+                    "name": "output1", \n+                    "type": "tabular"\n+                }\n+            ], \n+            "position": {\n+                "left": 1295, \n+                "top": 212\n+            }, \n+            "post_job_actions": {\n+                "HideDatasetActionoutput1": {\n+                    "action_arguments": {}, \n+                    "action_type": "HideDatasetAction", \n+                    "output_name": "output1"\n+                }\n+            }, \n+            "tool_errors": null, \n+            "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_blastn_wrapper/0.1.00", \n+            "tool_state": "{\\"evalue_cutoff\\": \\"\\\\\\"0.001\\\\\\"\\", \\"__page__\\": 0, \\"adv_opts\\": \\"{\\\\\\"identity_cutoff\\\\\\": \\\\\\"0.0\\\\\\", \\\\\\"adv_opts_selector\\\\\\": \\\\\\"advanced\\\\\\", \\\\\\"ungapped\\\\\\": \\\\\\"False\\\\\\", \\\\\\"filter_query\\\\\\": \\\\\\"False\\\\\\", \\\\\\"word_size\\\\\\": \\\\\\"0\\\\\\", \\\\\\"__current_case__\\\\\\": 1, \\\\\\"parse_deflines\\\\\\": \\\\\\"False\\\\\\", \\\\\\"strand\\\\\\": \\\\\\"-strand both\\\\\\", \\\\\\"max_hits\\\\\\": \\\\\\"10\\\\\\"}\\", \\"__rerun_remap_job_id__\\": null, \\"blast_type\\": \\"\\\\\\"megablast\\\\\\"\\", \\"db_opts\\": \\"{\\\\\\"db_opts_selector\\\\\\": \\\\\\"histdb\\\\\\", \\\\\\"subject\\\\\\": \\\\\\"\\\\\\", \\\\\\"histdb\\\\\\": null, \\\\\\"__current_case__\\\\\\": 1, \\\\\\"database\\\\\\": \\\\\\"\\\\\\"}\\", \\"output\\": \\"{\\\\\\"out_format\\\\\\": \\\\\\"cols\\\\\\", \\\\\\"std_cols\\\\\\": [\\\\\\"qseqid\\\\\\", \\\\\\"sseqid\\\\\\", \\\\\\"qstart\\\\\\", \\\\\\"qend\\\\\\", \\\\\\"sstart\\\\\\", \\\\\\"send\\\\\\"], \\\\\\"ids_cols\\\\\\": null, \\\\\\"tax_cols\\\\\\": null, \\\\\\"__current_case__\\\\\\": 2, \\\\\\"misc_cols\\\\\\": null, \\\\\\"ext_cols\\\\\\": [\\\\\\"positive\\\\\\", \\\\\\"qlen\\\\\\", \\\\\\"slen\\\\\\", \\\\\\"salltitles\\\\\\"]}\\", \\"query\\": \\"null\\"}", \n+            "tool_version": "0.1.00", \n+            "type": "tool", \n+            "user_outputs": []\n+        }, \n+        "5": {\n+            "annotation": "", \n+            "id": 5, \n+            "input_connections": {\n+                "input_blast": {\n+                    "id": 4, \n+                    "output_name": "output1"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "MergeBlastResults", \n+            "outputs": [\n+                {\n+                    "name": "output_merge", \n+                    "type": "txt"\n+                }, \n+                {\n+                    "name": "log_file", \n+                    "type": "txt"\n+                }\n+            ], \n+            "position": {\n+                "left": 1604, \n+                "top": 216\n+            }, \n+            "post_job_actions": {\n+                "RenameDatasetActionoutput_merge": {\n+                    "action_arguments": {\n+                        "newname": "BLASTN"\n+                    }, \n+                    "action_type": "RenameDatasetAction", \n+                    "output_name": "output_merge"\n+                }\n+            }, \n+            "tool_errors": null, \n+            "tool_id": "MergeBlastResults", \n+            "tool_state": "{\\"__page__\\": 0, \\"max_overlap_fraction\\": \\"\\\\\\"0.5\\\\\\"\\", \\"__rerun_remap_job_id__\\": null, \\"header\\": \\"\\\\\\"BLASTN\\\\\\"\\", \\"input_blast\\": \\"null\\", \\"max_overlap_length_ignored\\": \\"\\\\\\"3\\\\\\"\\"}", \n+            "tool_version": "1.03", \n+            "type": "tool", \n+            "user_outputs": []\n+        }\n+    }\n+}\n\\ No newline at end of file\n'
b
diff -r c52e74b98773 -r 8dfa09868059 genephys/Galaxy-Workflow-GenePhys_-_blastp.ga
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/genephys/Galaxy-Workflow-GenePhys_-_blastp.ga Fri Oct 24 05:54:20 2014 -0400
[
@@ -0,0 +1,204 @@
+{
+    "a_galaxy_workflow": "true", 
+    "annotation": "", 
+    "format-version": "0.1", 
+    "name": "GenePhys - blastp", 
+    "steps": {
+        "0": {
+            "annotation": "", 
+            "id": 0, 
+            "input_connections": {}, 
+            "inputs": [
+                {
+                    "description": "", 
+                    "name": "PROTEIC GENE SEQUENCE"
+                }
+            ], 
+            "name": "Input dataset", 
+            "outputs": [], 
+            "position": {
+                "left": 294, 
+                "top": 254
+            }, 
+            "tool_errors": null, 
+            "tool_id": null, 
+            "tool_state": "{\"name\": \"PROTEIC GENE SEQUENCE\"}", 
+            "tool_version": null, 
+            "type": "data_input", 
+            "user_outputs": []
+        }, 
+        "1": {
+            "annotation": "", 
+            "id": 1, 
+            "input_connections": {}, 
+            "inputs": [
+                {
+                    "description": "", 
+                    "name": "PROTEIN DB FILE"
+                }
+            ], 
+            "name": "Input dataset", 
+            "outputs": [], 
+            "position": {
+                "left": 294, 
+                "top": 411
+            }, 
+            "tool_errors": null, 
+            "tool_id": null, 
+            "tool_state": "{\"name\": \"PROTEIN DB FILE\"}", 
+            "tool_version": null, 
+            "type": "data_input", 
+            "user_outputs": []
+        }, 
+        "2": {
+            "annotation": "", 
+            "id": 2, 
+            "input_connections": {
+                "input_fasta": {
+                    "id": 1, 
+                    "output_name": "output"
+                }
+            }, 
+            "inputs": [], 
+            "name": "fastaGroomerForMakeBlastdb", 
+            "outputs": [
+                {
+                    "name": "output_fasta", 
+                    "type": "fasta"
+                }
+            ], 
+            "position": {
+                "left": 550, 
+                "top": 380
+            }, 
+            "post_job_actions": {
+                "HideDatasetActionoutput_fasta": {
+                    "action_arguments": {}, 
+                    "action_type": "HideDatasetAction", 
+                    "output_name": "output_fasta"
+                }
+            }, 
+            "tool_errors": null, 
+            "tool_id": "fastaGroomerForMakeBlastdb", 
+            "tool_state": "{\"input_fasta\": \"null\", \"__rerun_remap_job_id__\": null, \"__page__\": 0}", 
+            "tool_version": "1.00", 
+            "type": "tool", 
+            "user_outputs": []
+        }, 
+        "3": {
+            "annotation": "", 
+            "id": 3, 
+            "input_connections": {
+                "input_file": {
+                    "id": 2, 
+                    "output_name": "output_fasta"
+                }
+            }, 
+            "inputs": [], 
+            "name": "NCBI BLAST+ makeblastdb", 
+            "outputs": [
+                {
+                    "name": "outfile", 
+                    "type": "data"
+                }
+            ], 
+            "position": {
+                "left": 923.5, 
+                "top": 362
+            }, 
+            "post_job_actions": {
+                "HideDatasetActionoutfile": {
+                    "action_arguments": {}, 
+                    "action_type": "HideDatasetAction", 
+                    "output_name": "outfile"
+                }
+            }, 
+            "tool_errors": null, 
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_makeblastdb/0.1.00", 
+            "tool_state": "{\"__page__\": 0, \"mask_data_file\": \"null\", \"input_file\": \"null\", \"dbtype\": \"\\\"prot\\\"\", \"__rerun_remap_job_id__\": null, \"hash_index\": \"\\\"True\\\"\", \"tax\": \"{\\\"taxselect\\\": \\\"\\\", \\\"__current_case__\\\": 0}\", \"title\": \"\\\"\\\"\", \"parse_seqids\": \"\\\"False\\\"\"}", 
+            "tool_version": "0.1.00", 
+            "type": "tool", 
+            "user_outputs": []
+        }, 
+        "4": {
+            "annotation": "", 
+            "id": 4, 
+            "input_connections": {
+                "db_opts|histdb": {
+                    "id": 3, 
+                    "output_name": "outfile"
+                }, 
+                "query": {
+                    "id": 0, 
+                    "output_name": "output"
+                }
+            }, 
+            "inputs": [], 
+            "name": "NCBI BLAST+ blastp", 
+            "outputs": [
+                {
+                    "name": "output1", 
+                    "type": "tabular"
+                }
+            ], 
+            "position": {
+                "left": 1222, 
+                "top": 214
+            }, 
+            "post_job_actions": {
+                "HideDatasetActionoutput1": {
+                    "action_arguments": {}, 
+                    "action_type": "HideDatasetAction", 
+                    "output_name": "output1"
+                }
+            }, 
+            "tool_errors": null, 
+            "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_blastp_wrapper/0.1.00", 
+            "tool_state": "{\"evalue_cutoff\": \"\\\"0.001\\\"\", \"__page__\": 0, \"adv_opts\": \"{\\\"matrix\\\": \\\"BLOSUM62\\\", \\\"adv_opts_selector\\\": \\\"advanced\\\", \\\"filter_query\\\": \\\"False\\\", \\\"word_size\\\": \\\"0\\\", \\\"__current_case__\\\": 1, \\\"parse_deflines\\\": \\\"False\\\", \\\"max_hits\\\": \\\"10\\\"}\", \"__rerun_remap_job_id__\": null, \"blast_type\": \"\\\"blastp\\\"\", \"db_opts\": \"{\\\"db_opts_selector\\\": \\\"histdb\\\", \\\"subject\\\": \\\"\\\", \\\"histdb\\\": null, \\\"__current_case__\\\": 1, \\\"database\\\": \\\"\\\"}\", \"output\": \"{\\\"out_format\\\": \\\"cols\\\", \\\"std_cols\\\": [\\\"qseqid\\\", \\\"sseqid\\\", \\\"qstart\\\", \\\"qend\\\", \\\"sstart\\\", \\\"send\\\"], \\\"ids_cols\\\": null, \\\"tax_cols\\\": null, \\\"__current_case__\\\": 2, \\\"misc_cols\\\": null, \\\"ext_cols\\\": [\\\"positive\\\", \\\"qlen\\\", \\\"slen\\\", \\\"salltitles\\\"]}\", \"query\": \"null\"}", 
+            "tool_version": "0.1.00", 
+            "type": "tool", 
+            "user_outputs": []
+        }, 
+        "5": {
+            "annotation": "", 
+            "id": 5, 
+            "input_connections": {
+                "input_blast": {
+                    "id": 4, 
+                    "output_name": "output1"
+                }
+            }, 
+            "inputs": [], 
+            "name": "MergeBlastResults", 
+            "outputs": [
+                {
+                    "name": "output_merge", 
+                    "type": "txt"
+                }, 
+                {
+                    "name": "log_file", 
+                    "type": "txt"
+                }
+            ], 
+            "position": {
+                "left": 1544, 
+                "top": 215
+            }, 
+            "post_job_actions": {
+                "RenameDatasetActionoutput_merge": {
+                    "action_arguments": {
+                        "newname": "BLASTP"
+                    }, 
+                    "action_type": "RenameDatasetAction", 
+                    "output_name": "output_merge"
+                }
+            }, 
+            "tool_errors": null, 
+            "tool_id": "MergeBlastResults", 
+            "tool_state": "{\"__page__\": 0, \"max_overlap_fraction\": \"\\\"0.5\\\"\", \"__rerun_remap_job_id__\": null, \"header\": \"\\\"BLASTP\\\"\", \"input_blast\": \"null\", \"max_overlap_length_ignored\": \"\\\"3\\\"\"}", 
+            "tool_version": "1.03", 
+            "type": "tool", 
+            "user_outputs": []
+        }
+    }
+}
\ No newline at end of file
b
diff -r c52e74b98773 -r 8dfa09868059 genephys/Galaxy-Workflow-GenePhys_-_blastx.ga
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/genephys/Galaxy-Workflow-GenePhys_-_blastx.ga Fri Oct 24 05:54:20 2014 -0400
[
b'@@ -0,0 +1,204 @@\n+{\n+    "a_galaxy_workflow": "true", \n+    "annotation": "", \n+    "format-version": "0.1", \n+    "name": "GenePhys - blastx", \n+    "steps": {\n+        "0": {\n+            "annotation": "", \n+            "id": 0, \n+            "input_connections": {}, \n+            "inputs": [\n+                {\n+                    "description": "", \n+                    "name": "NUCLEIC GENE SEQUENCE"\n+                }\n+            ], \n+            "name": "Input dataset", \n+            "outputs": [], \n+            "position": {\n+                "left": 294, \n+                "top": 313\n+            }, \n+            "tool_errors": null, \n+            "tool_id": null, \n+            "tool_state": "{\\"name\\": \\"NUCLEIC GENE SEQUENCE\\"}", \n+            "tool_version": null, \n+            "type": "data_input", \n+            "user_outputs": []\n+        }, \n+        "1": {\n+            "annotation": "", \n+            "id": 1, \n+            "input_connections": {}, \n+            "inputs": [\n+                {\n+                    "description": "", \n+                    "name": "PROTEIN DB FILE"\n+                }\n+            ], \n+            "name": "Input dataset", \n+            "outputs": [], \n+            "position": {\n+                "left": 298, \n+                "top": 476\n+            }, \n+            "tool_errors": null, \n+            "tool_id": null, \n+            "tool_state": "{\\"name\\": \\"PROTEIN DB FILE\\"}", \n+            "tool_version": null, \n+            "type": "data_input", \n+            "user_outputs": []\n+        }, \n+        "2": {\n+            "annotation": "", \n+            "id": 2, \n+            "input_connections": {\n+                "input_fasta": {\n+                    "id": 1, \n+                    "output_name": "output"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "fastaGroomerForMakeBlastdb", \n+            "outputs": [\n+                {\n+                    "name": "output_fasta", \n+                    "type": "fasta"\n+                }\n+            ], \n+            "position": {\n+                "left": 550, \n+                "top": 439\n+            }, \n+            "post_job_actions": {\n+                "HideDatasetActionoutput_fasta": {\n+                    "action_arguments": {}, \n+                    "action_type": "HideDatasetAction", \n+                    "output_name": "output_fasta"\n+                }\n+            }, \n+            "tool_errors": null, \n+            "tool_id": "fastaGroomerForMakeBlastdb", \n+            "tool_state": "{\\"input_fasta\\": \\"null\\", \\"__rerun_remap_job_id__\\": null, \\"__page__\\": 0}", \n+            "tool_version": "1.00", \n+            "type": "tool", \n+            "user_outputs": []\n+        }, \n+        "3": {\n+            "annotation": "", \n+            "id": 3, \n+            "input_connections": {\n+                "input_file": {\n+                    "id": 2, \n+                    "output_name": "output_fasta"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "NCBI BLAST+ makeblastdb", \n+            "outputs": [\n+                {\n+                    "name": "outfile", \n+                    "type": "data"\n+                }\n+            ], \n+            "position": {\n+                "left": 918.5, \n+                "top": 414\n+            }, \n+            "post_job_actions": {\n+                "HideDatasetActionoutfile": {\n+                    "action_arguments": {}, \n+                    "action_type": "HideDatasetAction", \n+                    "output_name": "outfile"\n+                }\n+            }, \n+            "tool_errors": null, \n+            "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_makeblastdb/0.1.00", \n+            "tool_state": "{\\"__page__\\": 0, \\"mask_data_file\\": \\"null\\", \\"input_file\\": \\"null\\", \\"dbtype\\": \\"\\\\\\"prot\\\\\\"\\", \\"__rerun_remap_job_id__\\": null, \\"hash_index\\": \\"\\\\\\"True\\\\\\"\\", \\"tax\\": \\"{\\\\\\"taxselect\\\\\\": \\\\\\"\\\\\\'..b'rent_case__\\\\\\": 0}\\", \\"title\\": \\"\\\\\\"\\\\\\"\\", \\"parse_seqids\\": \\"\\\\\\"False\\\\\\"\\"}", \n+            "tool_version": "0.1.00", \n+            "type": "tool", \n+            "user_outputs": []\n+        }, \n+        "4": {\n+            "annotation": "", \n+            "id": 4, \n+            "input_connections": {\n+                "db_opts|histdb": {\n+                    "id": 3, \n+                    "output_name": "outfile"\n+                }, \n+                "query": {\n+                    "id": 0, \n+                    "output_name": "output"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "NCBI BLAST+ blastx", \n+            "outputs": [\n+                {\n+                    "name": "output1", \n+                    "type": "tabular"\n+                }\n+            ], \n+            "position": {\n+                "left": 1276.5, \n+                "top": 291\n+            }, \n+            "post_job_actions": {\n+                "HideDatasetActionoutput1": {\n+                    "action_arguments": {}, \n+                    "action_type": "HideDatasetAction", \n+                    "output_name": "output1"\n+                }\n+            }, \n+            "tool_errors": null, \n+            "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_blastx_wrapper/0.1.00", \n+            "tool_state": "{\\"evalue_cutoff\\": \\"\\\\\\"0.001\\\\\\"\\", \\"__page__\\": 0, \\"adv_opts\\": \\"{\\\\\\"matrix\\\\\\": \\\\\\"BLOSUM62\\\\\\", \\\\\\"adv_opts_selector\\\\\\": \\\\\\"advanced\\\\\\", \\\\\\"ungapped\\\\\\": \\\\\\"False\\\\\\", \\\\\\"filter_query\\\\\\": \\\\\\"False\\\\\\", \\\\\\"word_size\\\\\\": \\\\\\"0\\\\\\", \\\\\\"__current_case__\\\\\\": 1, \\\\\\"parse_deflines\\\\\\": \\\\\\"False\\\\\\", \\\\\\"strand\\\\\\": \\\\\\"-strand both\\\\\\", \\\\\\"max_hits\\\\\\": \\\\\\"10\\\\\\"}\\", \\"__rerun_remap_job_id__\\": null, \\"db_opts\\": \\"{\\\\\\"db_opts_selector\\\\\\": \\\\\\"histdb\\\\\\", \\\\\\"subject\\\\\\": \\\\\\"\\\\\\", \\\\\\"histdb\\\\\\": null, \\\\\\"__current_case__\\\\\\": 1, \\\\\\"database\\\\\\": \\\\\\"\\\\\\"}\\", \\"query_gencode\\": \\"\\\\\\"1\\\\\\"\\", \\"output\\": \\"{\\\\\\"out_format\\\\\\": \\\\\\"cols\\\\\\", \\\\\\"std_cols\\\\\\": [\\\\\\"qseqid\\\\\\", \\\\\\"sseqid\\\\\\", \\\\\\"qstart\\\\\\", \\\\\\"qend\\\\\\", \\\\\\"sstart\\\\\\", \\\\\\"send\\\\\\"], \\\\\\"ids_cols\\\\\\": null, \\\\\\"tax_cols\\\\\\": null, \\\\\\"__current_case__\\\\\\": 2, \\\\\\"misc_cols\\\\\\": null, \\\\\\"ext_cols\\\\\\": [\\\\\\"positive\\\\\\", \\\\\\"qlen\\\\\\", \\\\\\"slen\\\\\\", \\\\\\"salltitles\\\\\\"]}\\", \\"query\\": \\"null\\"}", \n+            "tool_version": "0.1.00", \n+            "type": "tool", \n+            "user_outputs": []\n+        }, \n+        "5": {\n+            "annotation": "", \n+            "id": 5, \n+            "input_connections": {\n+                "input_blast": {\n+                    "id": 4, \n+                    "output_name": "output1"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "MergeBlastResults", \n+            "outputs": [\n+                {\n+                    "name": "output_merge", \n+                    "type": "txt"\n+                }, \n+                {\n+                    "name": "log_file", \n+                    "type": "txt"\n+                }\n+            ], \n+            "position": {\n+                "left": 1599, \n+                "top": 291\n+            }, \n+            "post_job_actions": {\n+                "RenameDatasetActionoutput_merge": {\n+                    "action_arguments": {\n+                        "newname": "BLASTX"\n+                    }, \n+                    "action_type": "RenameDatasetAction", \n+                    "output_name": "output_merge"\n+                }\n+            }, \n+            "tool_errors": null, \n+            "tool_id": "MergeBlastResults", \n+            "tool_state": "{\\"__page__\\": 0, \\"max_overlap_fraction\\": \\"\\\\\\"0.5\\\\\\"\\", \\"__rerun_remap_job_id__\\": null, \\"header\\": \\"\\\\\\"BLASTX\\\\\\"\\", \\"input_blast\\": \\"null\\", \\"max_overlap_length_ignored\\": \\"\\\\\\"3\\\\\\"\\"}", \n+            "tool_version": "1.03", \n+            "type": "tool", \n+            "user_outputs": []\n+        }\n+    }\n+}\n\\ No newline at end of file\n'
b
diff -r c52e74b98773 -r 8dfa09868059 genephys/Galaxy-Workflow-GenePhys_-_tblastx.ga
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/genephys/Galaxy-Workflow-GenePhys_-_tblastx.ga Fri Oct 24 05:54:20 2014 -0400
[
b'@@ -0,0 +1,204 @@\n+{\n+    "a_galaxy_workflow": "true", \n+    "annotation": "", \n+    "format-version": "0.1", \n+    "name": "GenePhys - tblastx", \n+    "steps": {\n+        "0": {\n+            "annotation": "", \n+            "id": 0, \n+            "input_connections": {}, \n+            "inputs": [\n+                {\n+                    "description": "", \n+                    "name": "NUCLEIC GENE SEQUENCE"\n+                }\n+            ], \n+            "name": "Input dataset", \n+            "outputs": [], \n+            "position": {\n+                "left": 294, \n+                "top": 222\n+            }, \n+            "tool_errors": null, \n+            "tool_id": null, \n+            "tool_state": "{\\"name\\": \\"NUCLEIC GENE SEQUENCE\\"}", \n+            "tool_version": null, \n+            "type": "data_input", \n+            "user_outputs": []\n+        }, \n+        "1": {\n+            "annotation": "", \n+            "id": 1, \n+            "input_connections": {}, \n+            "inputs": [\n+                {\n+                    "description": "", \n+                    "name": "NUCLEIC DB FILE"\n+                }\n+            ], \n+            "name": "Input dataset", \n+            "outputs": [], \n+            "position": {\n+                "left": 294, \n+                "top": 379\n+            }, \n+            "tool_errors": null, \n+            "tool_id": null, \n+            "tool_state": "{\\"name\\": \\"NUCLEIC DB FILE\\"}", \n+            "tool_version": null, \n+            "type": "data_input", \n+            "user_outputs": []\n+        }, \n+        "2": {\n+            "annotation": "", \n+            "id": 2, \n+            "input_connections": {\n+                "input_fasta": {\n+                    "id": 1, \n+                    "output_name": "output"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "fastaGroomerForMakeBlastdb", \n+            "outputs": [\n+                {\n+                    "name": "output_fasta", \n+                    "type": "fasta"\n+                }\n+            ], \n+            "position": {\n+                "left": 557, \n+                "top": 353\n+            }, \n+            "post_job_actions": {\n+                "HideDatasetActionoutput_fasta": {\n+                    "action_arguments": {}, \n+                    "action_type": "HideDatasetAction", \n+                    "output_name": "output_fasta"\n+                }\n+            }, \n+            "tool_errors": null, \n+            "tool_id": "fastaGroomerForMakeBlastdb", \n+            "tool_state": "{\\"input_fasta\\": \\"null\\", \\"__rerun_remap_job_id__\\": null, \\"__page__\\": 0}", \n+            "tool_version": "1.00", \n+            "type": "tool", \n+            "user_outputs": []\n+        }, \n+        "3": {\n+            "annotation": "", \n+            "id": 3, \n+            "input_connections": {\n+                "input_file": {\n+                    "id": 2, \n+                    "output_name": "output_fasta"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "NCBI BLAST+ makeblastdb", \n+            "outputs": [\n+                {\n+                    "name": "outfile", \n+                    "type": "data"\n+                }\n+            ], \n+            "position": {\n+                "left": 925.5, \n+                "top": 329\n+            }, \n+            "post_job_actions": {\n+                "HideDatasetActionoutfile": {\n+                    "action_arguments": {}, \n+                    "action_type": "HideDatasetAction", \n+                    "output_name": "outfile"\n+                }\n+            }, \n+            "tool_errors": null, \n+            "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_makeblastdb/0.1.00", \n+            "tool_state": "{\\"__page__\\": 0, \\"mask_data_file\\": \\"null\\", \\"input_file\\": \\"null\\", \\"dbtype\\": \\"\\\\\\"nucl\\\\\\"\\", \\"__rerun_remap_job_id__\\": null, \\"hash_index\\": \\"\\\\\\"True\\\\\\"\\", \\"tax\\": \\"{\\\\\\"taxselect\\\\\\": \\\\\\"\\\\'..b'rent_case__\\\\\\": 0}\\", \\"title\\": \\"\\\\\\"\\\\\\"\\", \\"parse_seqids\\": \\"\\\\\\"False\\\\\\"\\"}", \n+            "tool_version": "0.1.00", \n+            "type": "tool", \n+            "user_outputs": []\n+        }, \n+        "4": {\n+            "annotation": "", \n+            "id": 4, \n+            "input_connections": {\n+                "db_opts|histdb": {\n+                    "id": 3, \n+                    "output_name": "outfile"\n+                }, \n+                "query": {\n+                    "id": 0, \n+                    "output_name": "output"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "NCBI BLAST+ tblastx", \n+            "outputs": [\n+                {\n+                    "name": "output1", \n+                    "type": "tabular"\n+                }\n+            ], \n+            "position": {\n+                "left": 1239, \n+                "top": 189\n+            }, \n+            "post_job_actions": {\n+                "HideDatasetActionoutput1": {\n+                    "action_arguments": {}, \n+                    "action_type": "HideDatasetAction", \n+                    "output_name": "output1"\n+                }\n+            }, \n+            "tool_errors": null, \n+            "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_tblastx_wrapper/0.1.00", \n+            "tool_state": "{\\"evalue_cutoff\\": \\"\\\\\\"0.001\\\\\\"\\", \\"__page__\\": 0, \\"adv_opts\\": \\"{\\\\\\"matrix\\\\\\": \\\\\\"BLOSUM62\\\\\\", \\\\\\"adv_opts_selector\\\\\\": \\\\\\"advanced\\\\\\", \\\\\\"filter_query\\\\\\": \\\\\\"False\\\\\\", \\\\\\"word_size\\\\\\": \\\\\\"0\\\\\\", \\\\\\"__current_case__\\\\\\": 1, \\\\\\"parse_deflines\\\\\\": \\\\\\"False\\\\\\", \\\\\\"db_gencode\\\\\\": \\\\\\"1\\\\\\", \\\\\\"strand\\\\\\": \\\\\\"-strand both\\\\\\", \\\\\\"max_hits\\\\\\": \\\\\\"10\\\\\\"}\\", \\"__rerun_remap_job_id__\\": null, \\"db_opts\\": \\"{\\\\\\"db_opts_selector\\\\\\": \\\\\\"histdb\\\\\\", \\\\\\"subject\\\\\\": \\\\\\"\\\\\\", \\\\\\"histdb\\\\\\": null, \\\\\\"__current_case__\\\\\\": 1, \\\\\\"database\\\\\\": \\\\\\"\\\\\\"}\\", \\"query_gencode\\": \\"\\\\\\"1\\\\\\"\\", \\"output\\": \\"{\\\\\\"out_format\\\\\\": \\\\\\"cols\\\\\\", \\\\\\"std_cols\\\\\\": [\\\\\\"qseqid\\\\\\", \\\\\\"sseqid\\\\\\", \\\\\\"qstart\\\\\\", \\\\\\"qend\\\\\\", \\\\\\"sstart\\\\\\", \\\\\\"send\\\\\\"], \\\\\\"ids_cols\\\\\\": null, \\\\\\"tax_cols\\\\\\": null, \\\\\\"__current_case__\\\\\\": 2, \\\\\\"misc_cols\\\\\\": null, \\\\\\"ext_cols\\\\\\": [\\\\\\"positive\\\\\\", \\\\\\"qlen\\\\\\", \\\\\\"slen\\\\\\", \\\\\\"salltitles\\\\\\"]}\\", \\"query\\": \\"null\\"}", \n+            "tool_version": "0.1.00", \n+            "type": "tool", \n+            "user_outputs": []\n+        }, \n+        "5": {\n+            "annotation": "", \n+            "id": 5, \n+            "input_connections": {\n+                "input_blast": {\n+                    "id": 4, \n+                    "output_name": "output1"\n+                }\n+            }, \n+            "inputs": [], \n+            "name": "MergeBlastResults", \n+            "outputs": [\n+                {\n+                    "name": "output_merge", \n+                    "type": "txt"\n+                }, \n+                {\n+                    "name": "log_file", \n+                    "type": "txt"\n+                }\n+            ], \n+            "position": {\n+                "left": 1529, \n+                "top": 192\n+            }, \n+            "post_job_actions": {\n+                "RenameDatasetActionoutput_merge": {\n+                    "action_arguments": {\n+                        "newname": "TBLASTX"\n+                    }, \n+                    "action_type": "RenameDatasetAction", \n+                    "output_name": "output_merge"\n+                }\n+            }, \n+            "tool_errors": null, \n+            "tool_id": "MergeBlastResults", \n+            "tool_state": "{\\"__page__\\": 0, \\"max_overlap_fraction\\": \\"\\\\\\"0.5\\\\\\"\\", \\"__rerun_remap_job_id__\\": null, \\"header\\": \\"\\\\\\"TBLASTX\\\\\\"\\", \\"input_blast\\": \\"null\\", \\"max_overlap_length_ignored\\": \\"\\\\\\"3\\\\\\"\\"}", \n+            "tool_version": "1.03", \n+            "type": "tool", \n+            "user_outputs": []\n+        }\n+    }\n+}\n\\ No newline at end of file\n'
b
diff -r c52e74b98773 -r 8dfa09868059 genephys/GenePhys.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/genephys/GenePhys.pl Fri Oct 24 05:54:20 2014 -0400
[
b'@@ -0,0 +1,335 @@\n+#!/usr/bin/perl\n+#V1.1.0 integrated gene extraction \n+#V1.0.2 integrated segment fasta extraction\n+#V1.0.1 added log and option\n+#V1.0.0\n+use strict;\n+use warnings;\n+use Getopt::Long;\n+\n+my $input_blast_files;\n+my $input_genes_position_file;\n+my $input_assembly_file;\n+my $input_markers_position_file;\n+my $input_markers_file;\n+my $log_file;\n+my $output_fasta_file;\n+my $output_segment_file;\n+my $output_genes_list_file;\n+my $EXTRACT_SEQ = "NO";\n+my $WINDOW = 200000;\n+my $OFFSET = 100000;\n+my $MAX_BLAST_LINES = 1;\n+\n+GetOptions (\n+"input_assembly_file=s" => \\$input_assembly_file,\n+"input_markers_position_file=s" => \\$input_markers_position_file,\n+"input_markers_file=s" => \\$input_markers_file,\n+"log_file=s" => \\$log_file,\n+"output_fasta_file=s" => \\$output_fasta_file,\n+"output_segment_file=s" => \\$output_segment_file,\n+"extractseq=s" => \\$EXTRACT_SEQ,\n+"window=i" => \\$WINDOW,\n+"offset=i" =>\\$OFFSET,\n+"input_blast_files=s" => \\$input_blast_files,\n+"input_genes_position_file=s"=> \\$input_genes_position_file,\n+"output_genes_list_file=s"=>\\$output_genes_list_file,\n+"max_blast_lines=i" => \\$MAX_BLAST_LINES\n+) or die("Error in command line arguments\\n");\n+\n+open(LF, ">$log_file")  or die("Can\'t open $log_file\\n");\n+#print LF $EXTRACT_SEQ."\\n";\n+\n+my $current_annotation="";\n+my @list_marquer;\n+my %chr;\n+my %position;\n+\n+open(MP, $input_markers_position_file)  or die("Can\'t open $input_markers_position_file\\n");\n+\n+my $compt=0;\n+while (my $line=<MP>){\n+\t$compt++;\n+\tmy @cols = split(/\\t/,$line);\n+\tif ($#cols != 3){\n+\t\tprint STDERR "Error in marker position file format\\n$compt : $line\\n";\n+\t\texit(0);\n+\t}\n+\tmy %current;\n+\t# Number#Map#Name#Chr#Position#GeneAT#FunctionAT\n+\tmy $Name = $cols[0];\n+\tmy $Locus = $cols[1];\n+\tmy $Chr = $cols[2];\n+\tmy $Position = $cols[3];\n+\n+\n+\t$chr{$Name} = $Chr;\n+\t$position{$Name} = $Position;\n+\t\n+\t### Modification 0.9.9\n+\tif ($Locus ne $Name){ \n+\t\t$chr{$Locus} = $Chr;\n+\t\t$position{$Locus} = $Position;\t\n+\t}\n+\t###\n+\t\n+}\n+close (MP);\n+\n+open(MA, $input_markers_file)  or die("Can\'t open $input_markers_file\\n");\n+while (my $line=<MA>){\n+\tmy @cols = split (/\\s+/,$line);\n+\tfor (my $i=0;$i<=$#cols;$i++){\n+\t\tmy $current = $cols[$i];\n+\t\tchomp($current);\n+\t\tif ($current !~ /^\\s+$/){\n+\t\t\tpush(@list_marquer,$current);\n+\t\t}\n+\t}\n+}\n+close (MA);\n+\n+my %coord_by_chr;\n+for (my $i=0;$i<=$#list_marquer;$i++){\n+\tmy $current_name = $list_marquer[$i];\n+\tmy $current_chr = $chr{$current_name};\n+\tmy $current_position = $position{$current_name};\n+\t\n+\tif ($current_position =~ /^\\d+$/){\n+\t\tmy @tbl_coord_for_current_chr;\n+\t\tif ($coord_by_chr{$current_chr}){\n+\t\t\t@tbl_coord_for_current_chr = @{$coord_by_chr{$current_chr}};\n+\t\t}\n+\t\tpush(@tbl_coord_for_current_chr,$current_position);\n+\t\t$coord_by_chr{$current_chr}=\\@tbl_coord_for_current_chr;\n+\t}\n+\telsif (($current_position =~/\\s*-\\s*/)||($current_position =~/none/i)){\n+\t\t\n+\t}\n+\telse {\n+\t\tchomp($current_position);\n+\t\tprint STDERR "Error Parsing $current_name\\tposition not recognized : $current_position \\n";\n+\t\tprint $list_marquer[$i],"\\n";\n+\t}\n+}\n+\n+open(OS, ">$output_segment_file") or die ("Can\'t open $output_segment_file\\n");\n+\n+my @segment_chr;\n+my @segment_start;\n+my @segment_end;\n+\n+foreach my $key (sort keys %coord_by_chr){\n+\tmy @tbl_coord = @{$coord_by_chr{$key}};\n+\t@tbl_coord = sort { $a <=> $b } @tbl_coord;\n+\tmy $current_start;\n+\tmy $current_stop;\n+\tmy $current_start_with_offset;\n+\tmy $current_stop_with_offset;\n+\t\n+\tfor (my $i=0;$i<=$#tbl_coord;$i++){\n+\t\tif (!$current_start){$current_start=$tbl_coord[$i];$current_stop=$tbl_coord[$i]}\n+\t\t\n+\t\t# print "$i : $current_start / $current_stop\\n";\n+\t\tif ($tbl_coord[$i]>$current_stop+$WINDOW){\n+\t\t\t#OFFSET\n+\t\t\tif ($current_start>$OFFSET){$current_start_with_offset=$current_start-$OFFSET;}else{$current_start_with_offset=1;}\n+\t\t\t$current_stop_with_offset = $current_stop + $OFFSET;\n+\t\t\t#######\n+\t\t\tprint OS $key,":",$current_start_with_offset,"..",$current_stop_with_offset,"\\n";\n+\t\t\tpush(@segment_chr,$key);\n+\t\t\tpush(@segment_start,$'..b'ligne=~/^([ATGCNXatgcnx]+)\\s*$/){\n+\t\t\t\t$current_seq .= $1;\n+\t\t\t}\n+\t\t\telse {\n+\t\t\t\tprint STDERR "Erreur Parsing n\xc2\xb01\\n$ligne\\n";\n+\t\t\t}\n+\t\t}\n+\t}\n+\n+\t#TRAITEMENT DU DERNIER\n+\tif ($current_header){\n+\t\t$genome{$current_header} = $current_seq;\n+\t\tundef($current_seq);\n+\t}\n+\tclose (AF);\n+\n+\topen(OF, ">$output_fasta_file") or die ("Can\'t open $output_fasta_file\\n");\n+\tfor (my $i=0;$i<=$#segment_chr;$i++){\n+\t\tmy $compt=0;\n+\t\tmy $current_seq="";\n+\t\tprint OF ">",$segment_chr[$i],":",$segment_start[$i],"..",$segment_end[$i]."\\n";\n+\t\t### Modification 0.9.9\n+\t\tif ($segment_end[$i]>length($genome{$segment_chr[$i]})){\n+\t\t\t$segment_end[$i] = length($genome{$segment_chr[$i]});\n+\t\t}\n+\t\t###\n+\n+\t\tmy @SEQ = split(//,$genome{$segment_chr[$i]});\n+\t\tfor (my $coord = $segment_start[$i]-1; $coord<=$segment_end[$i]-1;$coord++){\n+\t\t\t$compt++;\n+\t\t\tif ($compt > 60 ){\n+\t\t\t\t$current_seq .= "\\n";\n+\t\t\t\t$compt=1;\n+\t\t\t}\n+\t\t\t$current_seq .= $SEQ[$coord];\n+\t\t\n+\t\t}\n+\t\tprint OF "$current_seq\\n";\n+\t}\n+\tclose (OF);\n+}\n+\n+### GENE and BLAST Extraction\n+my @blast_by_base;\n+my @header;\n+\n+\n+my @blastfiles = split(/\\,/,$input_blast_files);\n+for (my $i=0;$i<=$#blastfiles;$i++){\n+\tmy $current_blast_file = $blastfiles[$i];\n+\tmy $current_blast_header = "DEFAULT";\n+\tmy %current_blast; \n+\topen (B,"$current_blast_file") or die ("Can\'t open $current_blast_file\\n");\n+\twhile (my $line =<B>){\n+\t\tif ($line =~ /^\\#\\#(.*?)$/){\n+\t\t\t$current_blast_header = $1;\n+\t\t\tprint LF $current_blast_header."\\n";\n+\t\t}\n+\t\telsif ($line =~ /^\\#/){\n+\t\t\t# blast file column legend\n+\t\t}\n+\t\telse {\n+\t\t\tmy @fields = split(/\\s+/,$line);\n+\t\t\tmy $gene_id = $fields[0];\n+\t\t\tmy @blast_for_this_gene;\n+\t\t\tif ($current_blast{$gene_id}){\n+\t\t\t\t@blast_for_this_gene = @{$current_blast{$gene_id}};\n+\t\t\t}\t\t\t\n+\t\t\t\n+\t\t\tif ($#blast_for_this_gene<$MAX_BLAST_LINES-1){\n+\t\t\t\tpush(@blast_for_this_gene,$line);\n+\t\t\t\tprint LF $gene_id,"\\n";\n+\t\t\t}\n+\t\t\t$current_blast{$gene_id}=\\@blast_for_this_gene;\n+\t\t}\n+\t}\n+\tclose(B);\n+\tpush (@blast_by_base,\\%current_blast);\n+\tpush (@header,$current_blast_header);\n+}\n+\n+\n+open (OGL,">$output_genes_list_file") or die ("Can\'t open $output_genes_list_file\\n");\n+\n+for (my $i=0;$i<=$#segment_chr;$i++){\n+\tmy $segment_chr = $segment_chr[$i];\n+\tmy $segment_start = $segment_start[$i];\n+\tmy $segment_end = $segment_end[$i];\n+\n+\tprint OGL "#",$segment_chr[$i],":",$segment_start[$i],"..",$segment_end[$i],"\\n";\n+\t\n+\topen(IG, $input_genes_position_file)  or die("Can\'t open $input_genes_position_file\\n");\n+\twhile (my $gene_desc=<IG>){\n+\t\tmy @gene_desc = split(/\\s+/,$gene_desc);\n+\t\tif ($#gene_desc != 4){\n+\t\t\tprint STDERR "Error in gene position file format\\n$gene_desc\\n";\n+\t\t\texit(0);\n+\t\t}\n+\t\tmy $gene_id = $gene_desc[0];\n+\t\tmy $cds_id = $gene_desc[1];\n+\t\tmy $gene_chr = $gene_desc[2];\n+\t\tmy $gene_start = $gene_desc[3];\n+\t\tmy $gene_end = $gene_desc[4];\n+\t\tif ($segment_chr eq $gene_chr){\n+\t\t\tif ((($gene_start>=$segment_start)&&($gene_start<=$segment_end))||(($gene_end>=$segment_start)&&($gene_end<=$segment_end))){\n+\t\t\t\tprint OGL $gene_id," / ",$cds_id,"\\n";\n+\t\t\t\t\n+\t\t\t\tfor (my $i=0;$i<=$#blast_by_base;$i++){\n+\t\t\t\t\t#print LF $header[$i]."\\n";\n+\t\t\t\t\tmy %current_blast = %{$blast_by_base[$i]};\n+\t\t\t\t\tif ($current_blast{$cds_id}){\n+\t\t\t\t\t\tmy @blast_by_gene = @{$current_blast{$cds_id}};\n+\t\t\t\t\t\t#print LF $#blast_by_gene."\\n";\n+\t\t\t\t\t\tfor (my $j=0;$j<=$#blast_by_gene;$j++){\n+\t\t\t\t\t\t\tmy @fields = split(/\\t/,$blast_by_gene[$j]);\n+\t\t\t\t\t\t\tprint OGL $header[$i],"\\t";\n+\t\t\t\t\t\t\tprint OGL $fields[1],"\\t";\n+\t\t\t\t\t\t\tprint OGL $fields[3],"\\t";\n+\t\t\t\t\t\t\tprint OGL $fields[4],"\\t";\n+\t\t\t\t\t\t\tprint OGL $fields[5],"\\t";\n+\t\t\t\t\t\t\tprint OGL $fields[10],"\\t";\n+\t\t\t\t\t\t\tprint OGL $fields[6],"..",$fields[7],"(",$fields[11],")","\\t";\n+\t\t\t\t\t\t\tprint OGL $fields[8],"..",$fields[9],"(",$fields[12],")","\\t";\n+\t\t\t\t\t\t\tprint OGL $fields[13];\n+\t\t\t\t\t\t}\n+\t\t\t\t\t\tprint OGL "\\n";\n+\t\t\t\t\t}\n+\t\t\t\t\telse {\n+\t\t\t\t\t\tprint OGL $header[$i],"\\t","No BLAST results\\n";\n+\t\t\t\t\t\tprint LF $gene_id," / ",$cds_id,"\\n";\n+\t\t\t\t\t}\n+\t\t\t\t}\n+\t\t\t}\n+\t\t}\n+\n+\t}\n+\tclose(IG);\n+}\n+\n+\n+close (OGL);\n+\n+close (LF);\n+\n'
b
diff -r c52e74b98773 -r 8dfa09868059 genephys/GenePhys.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/genephys/GenePhys.xml Fri Oct 24 05:54:20 2014 -0400
b
@@ -0,0 +1,48 @@
+<tool id="GenePhys" name="GenePhys" version="1.1">
+<description>Extract the genes underlying a genetic segment defined by genetic markers</description>
+<command interpreter="perl">
+
+  #if $extractseq.do_extractseq=="YES"
+      GenePhys.pl -extractseq $extractseq.do_extractseq -input_assembly_file $input_assembly_file -input_markers_position_file $input_markers_position_file -input_markers_file $input_markers_file -window $window -offset $offset -output_segment_file $output_segment_file -output_fasta_file $output_fasta_file -log_file $log_file -input_blast_files $input_blast_files -max_blast_lines $max_blast_lines -input_genes_position_file $input_genes_position_file -output_genes_list_file $output_genes_list_file
+  #else
+      GenePhys.pl -input_markers_position_file $input_markers_position_file -input_markers_file $input_markers_file -window $window -offset $offset -output_segment_file $output_segment_file -output_fasta_file $output_fasta_file -log_file $log_file -input_blast_files $input_blast_files -max_blast_lines $max_blast_lines -input_genes_position_file $input_genes_position_file -output_genes_list_file $output_genes_list_file
+  #end if
+
+
+
+    
+</command>
+<inputs>
+ <param name="input_markers_file"  type="data" format="txt" label="Select a suitable input MARKERS file from your history"/>
+ <param name="input_markers_position_file"  type="data" format="txt" label="Select a suitable input MARKERS POSITION file from your history"/>
+ <param name="input_genes_position_file"  type="data" format="txt" label="Select a suitable input GENE POSITION file from your history"/>
+ <param name="input_blast_files"  type="data" format="txt" multiple="true" label="Select a BLAST files from your history"/>
+ <param name="window" type="integer" value="200000" label="Maximum distance between markers of a segment (Threshold for splitting a segment)"/>
+ <param name="offset" type="integer" value="100000" label="Additionnal segment size in 5' and 3' (security marging)"/>
+ <param name="max_blast_lines"  type="integer" value="3" label="Select the maximum number of BLAST best match to show"/>
+
+ <conditional name="extractseq">
+   <param name="do_extractseq" type="select" label="Extract genomic segment">
+     <option value="NO">NO</option>
+     <option value="YES">YES</option>
+   </param>
+   <when value="YES">
+     <param name="input_assembly_file"  type="data" format="fasta" label="Select a suitable input ASSEMBLY file from your history"/>
+   </when>
+   <when value="NO"></when>
+ </conditional> 
+
+</inputs>
+<outputs>
+ <data name="output_segment_file" format="txt" label="SEGMENT ${tool.name} on ${on_string}"/>
+ <data name="output_fasta_file" format="fasta" label="FASTA ${tool.name} on ${on_string}"/>
+ <data name="output_genes_list_file" format="txt" label="GENES ${tool.name} on ${on_string}"/>
+ <data name="log_file" format="txt" label="LOG ${tool.name} on ${on_string}"/>
+</outputs>
+
+<help>
+
+
+
+</help>
+</tool>
b
diff -r c52e74b98773 -r 8dfa09868059 genephys/MergeBlastResults.pl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/genephys/MergeBlastResults.pl Fri Oct 24 05:54:20 2014 -0400
[
b'@@ -0,0 +1,545 @@\n+#!/usr/bin/perl\n+#V1.0.3 header added\n+#V1.0.2 suppressed the final sort (very heavyload) and replaced it by another level of hash\n+#V1.0.1 added log, option parameters\n+use strict;\n+use warnings;\n+use Getopt::Long;\n+\n+my $inputblast;\n+my $outputjoin;\n+my $log_file;\n+my $MAX_OVERLAP_FRACTION = 0.5;\n+my $MAX_OVERLAP_LENGTH_IGNORED = 3;\n+my $VERBOSE = "OFF";\n+my $ALLOWED_GAP_FRACTION_FOR_MERGING = 0.3;\n+my $HEADER ="";\n+\n+GetOptions (\n+"input_blasttab_file=s" => \\$inputblast,\n+"output_joinmatch_file=s" => \\$outputjoin,\n+"log_file=s" => \\$log_file,\n+"header=s" => \\$HEADER,\n+"max_overlap_fraction=f" => \\$MAX_OVERLAP_FRACTION,\n+"max_overlap_length_ignored=i" =>\\$MAX_OVERLAP_LENGTH_IGNORED\n+) or die("Error in command line arguments\\n");\n+\n+open(IB, $inputblast) or die ("Can\'t open $inputblast \\n");\n+open (LF,">$log_file") or die("Can\'t open $log_file\\n");\n+\n+\n+my %match_by_query;\n+\n+my @query_keys;\n+my %querys;\n+\n+\n+my $stats_nb_match=0;\n+my $stats_included=0;\n+my $stats_large_overlapping=0;\n+my %stats_query_coverage;\n+$stats_query_coverage{"0-10%"}=0;\n+$stats_query_coverage{"10-20%"}=0;\n+$stats_query_coverage{"20-30%"}=0;\n+$stats_query_coverage{"30-40%"}=0;\n+$stats_query_coverage{"40-50%"}=0;\n+$stats_query_coverage{"50-60%"}=0;\n+$stats_query_coverage{"60-70%"}=0;\n+$stats_query_coverage{"70-80%"}=0;\n+$stats_query_coverage{"80-90%"}=0;\n+$stats_query_coverage{"90-100%"}=0;\n+\n+my $current_query="";\n+while (my $ligne = <IB>){\n+\tmy @fields = split (/\\t/,$ligne);\n+\tif ($#fields != 9){\n+\t\tprint STDERR "Invalid blasttab format, must have 10 columns\\n";\n+\t\texit(0);\n+\t}\n+\t$stats_nb_match++;\n+\tmy %match;\n+\t$match{"Query"}=$fields[0];\n+\tif (!$querys{$match{"Query"}}){\n+\t\tpush(@query_keys,$match{"Query"});\n+\t\t$querys{$match{"Query"}} = 1;\n+\t}\n+\t$match{"Subject_id"}=$fields[1];\n+\t$match{"Orientation"}="+";\n+\t$match{"Query_start"}=$fields[2];\n+\t$match{"Query_end"}=$fields[3];\n+\t$match{"Subject_start"}=$fields[4];\n+\t$match{"Subject_end"}=$fields[5];\n+\t\t\t\n+\n+\tif ($fields[2]>$fields[3]){\n+\t\t$match{"Query_start"}=$fields[3];\n+\t\t$match{"Query_end"}=$fields[2];\n+\t\t$match{"Orientation"}="-";\n+\t\t#print "- $ligne";\n+\t}\n+\t\n+\tif ($fields[4]>$fields[5]){\n+\t\t$match{"Subject_start"}=$fields[5];\n+\t\t$match{"Subject_end"}=$fields[4];\n+\t\t$match{"Orientation"}="-";\n+\t\t#print "- $ligne";\n+\t}\n+\t$match{"Similarity"}=$fields[6];\n+\t$match{"Query_length"}=$fields[7];\n+\t$match{"Subject_length"}=$fields[8];\n+\tchomp($fields[9]);\n+\t$match{"Subject"}=$fields[9];\n+\t\n+\t$match{"Ligne"}=$ligne;\n+\t\n+\tmy $querykey = $match{"Query"};\n+\tmy $key = $match{"Query"}."##".$match{"Subject"}."##".$match{"Orientation"};\n+\tif ($match{"Subject_length"}==0){\n+\t\tprint LF "Match 0",$ligne,"\\n",$match{"Subject_length"},"\\n";\n+\t}\t\n+\tmy %match_by_query_and_subject;\n+\tmy @match_table;\n+\n+\tif ($match_by_query{$querykey}){\n+\t\t%match_by_query_and_subject = %{$match_by_query{$querykey}};\n+\t}\n+\tif ($match_by_query_and_subject{$key}){\n+\t\t@match_table=@{$match_by_query_and_subject{$key}};\n+\t}\n+\n+\tpush (@match_table,\\%match);\n+\t$match_by_query_and_subject{$key} = \\@match_table;\n+\t$match_by_query{$querykey}= \\%match_by_query_and_subject;\n+}\n+\n+close (IB);\n+\n+#print LF "NB query : $#query_keys\\n";\n+#foreach my $querykey (sort @query_keys){\n+#\tmy %current_match_by_query_and_subject = %{$match_by_query{$querykey}};\n+#\tforeach my $key (sort {$a cmp $b} keys %current_match_by_query_and_subject){\n+#\t\tmy @current_match_table = sort sortbyquerycoord @{$current_match_by_query_and_subject{$key}};\n+#\t\tfor (my $i=0;$i<=$#current_match_table;$i++){\n+#\t\t\tmy %current_match = %{$current_match_table[$i]};\n+#\t\t\tprint LF $current_match{"Ligne"}."\\n";\n+#\t\t}\n+#\t}\n+#}\n+#exit(0);\n+\n+open (OJ, ">$outputjoin") or die ("Can\'t open $outputjoin \\n");\n+print OJ "##",$HEADER,"\\n";\n+print OJ "#Query\\tSubject_Id\\torientation\\tQuery_coverage\\tSubject_coverage\\tIdentity\\tmin_query\\tmax_query\\tmin_subject\\tmax_subject\\tNBmatch\\tq_length\\tsub_length\\tsubject\\n";\n+\t\n+foreach my $querykey (sort @query_keys){\n+\tmy %current_match'..b'elsif($Subject_coverage<0.7){$stats_query_coverage{"60-70%"}++;}\n+\t\telsif($Subject_coverage<0.8){$stats_query_coverage{"70-80%"}++;}\n+\t\telsif($Subject_coverage<0.9){$stats_query_coverage{"80-90%"}++;}\n+\t\telse{$stats_query_coverage{"90-100%"}++;}\n+\n+\t\tif ($VERBOSE eq "ON"){\n+\t\t\tprint LF "Final\\n";\n+\t\t\tprint LF $Query,"\\t",$Subject_Id,"\\t",$orientation,"\\t",$min_query,"\\t",$max_query,"\\t",$min_subject,"\\t",$max_subject,"\\t",$sub_length,"\\t";\n+\t\t\tprint LF "NB:",$nb_match,"\\t","O:",$overlap_length,"\\t","CQ:",$nb_covered_query,"\\t","CS:",$nb_covered_subject,"\\t",$Query_coverage,"\\t",$Subject_coverage,"\\t",$Identity,"\\n";\n+\n+\t\t}\n+\n+\t\tif ($subject=~/^(.*?)\\s*$/){\n+\t\t\t$subject = $1;\n+\t\t}\n+\t\tmy %current_match_joined;\n+\t\t$current_match_joined{"Query"}=$Query;\n+\t\t$current_match_joined{"Query_start"}=$min_query;\n+\t\t$current_match_joined{"Query_end"}=$max_query;\n+\t\t$current_match_joined{"Query_length"}=$q_length;\n+\t\t$current_match_joined{"QCoverage"} = $Query_coverage;\n+\t\t$current_match_joined{"Subject_id"}=$Subject_Id;\n+\t\t$current_match_joined{"Subject"}=$subject;\n+\t\t$current_match_joined{"Subject_start"}=$min_subject;\n+\t\t$current_match_joined{"Subject_end"}=$max_subject;\n+\t\t$current_match_joined{"Subject_length"}=$sub_length;\n+\t\t$current_match_joined{"SCoverage"} = $Subject_coverage;\n+\t\t$current_match_joined{"Similarity"}=$Identity;\n+\t\tmy $NBmatch = $nb_match-$overlap_length;\n+\t\t$current_match_joined{"Nbmatch"}=$NBmatch;\n+\t\t$current_match_joined{"Display"}="$Query\\t$Subject_Id\\t$orientation\\t$Query_coverage%\\t$Subject_coverage%\\t$Identity%\\t$min_query\\t$max_query\\t$min_subject\\t$max_subject\\t$NBmatch\\t$q_length\\t$sub_length\\t$subject";\n+\n+\t\tpush(@match_joined,\\%current_match_joined);\n+\t\t#print OJ $match_joined{"Display"},"\\n";\n+\t}\n+\tmy @match_joined_sorted = sort sortbyrelevanceandsubject @match_joined;\n+\tfor (my $i=0;$i<=$#match_joined_sorted;$i++){\n+\t\tmy %match = %{$match_joined_sorted[$i]};\n+\t\tprint OJ $match{"Display"},"\\n";\n+\t}\n+}\n+\n+\n+#my %all_match_joined_best;\n+\n+#foreach my $key (sort sortkey keys %all_match_joined){\n+#\tmy %match = %{$all_match_joined{$key}};\n+#\tprint OJ $match{"Display"},"\\n";\n+#}\n+\n+#close (OB);\n+close (OJ);\n+\n+\n+print LF "Nb query : $#query_keys\\n";\n+print LF "Nb match : $stats_nb_match\\n";\n+print LF "Nb match filtered included / too large overlap : $stats_included / $stats_large_overlapping \\n";\n+print LF "Query coverage\\n";\n+print LF "percent:\\t";\n+foreach my $key (sort {$a cmp $b} keys %stats_query_coverage) {\n+\tprint LF $key,"\\t";\n+}\n+print LF "\\n number :\\t";\n+foreach my $key (sort {$a cmp $b} keys %stats_query_coverage) {\n+\tprint LF $stats_query_coverage{$key},"\\t";\n+}\n+print LF "\\n";\n+\n+\n+close (LF);\n+\n+\n+# for (my $i=0;$i<=$#all_match_joined;$i++){\n+\t# my $match_joined = %{$all_match_joined[$i]};\n+\t# print $match_joined{"Query"},"\\t",$match_joined{"Subject"},"\\t",$match_joined{"Subject_id"},"\\t",$match_joined{"Similarity"},"\\t",$match_joined{"Query_length"},"\\t",$match_joined{"Subject_length"},"\\n";\n+# }\n+\n+\n+sub mysort{\n+\tmy %matcha=%{$a};\n+\tmy %matchb=%{$b};\n+\t\n+\t#print "TEST : ",$matcha{"Query_start"}, " / ", $matchb{"Query_start"},"\\n";\n+\t\n+\t$matcha{"Query_start"} <=> $matchb{"Query_start"}\n+\t||\n+\t$matcha{"Query_end"} <=> $matchb{"Query_end"}\n+\t\n+}\n+\n+sub sortbyquerycoord{\n+\tmy %matcha=%{$a};\n+\tmy %matchb=%{$b};\n+\t\n+\t#print "TEST : ",$matcha{"Query_start"}, " / ", $matchb{"Query_start"},"\\n";\n+\t\n+\t$matcha{"Query_start"} <=> $matchb{"Query_start"}\n+\t||\n+\t$matcha{"Query_end"} <=> $matchb{"Query_end"}\n+\t\n+}\n+\n+sub sortbyrelevanceandsubject{\n+\tmy %matcha=%{$a};\n+\tmy %matchb=%{$b};\n+\t\n+\t$matchb{"Nbmatch"} <=> $matcha{"Nbmatch"}\n+\t||\n+\t$matchb{"QCoverage"} <=> $matcha{"QCoverage"}\n+\t||\n+\t$matcha{"Subject"} cmp $matchb{"Subject"}\n+}\n+\n+\n+sub sortkey {\n+\tmy @fieldsa = split (/\\#/,$a);\n+\tmy @fieldsb = split (/\\#/,$b);\n+\t\n+\t#print "$a\\n$b\\n";\n+\t#print $fieldsa[0]," cmp ",$fieldsb[0],"\\n";\n+\t#exit(0);\n+\n+\t$fieldsa[0] cmp $fieldsb[0]\n+\t||\n+\t$fieldsa[1] cmp $fieldsb[1]\n+\t||\n+\t$fieldsa[2] <=> $fieldsb[2]\n+\n+}\n'
b
diff -r c52e74b98773 -r 8dfa09868059 genephys/MergeBlastResults.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/genephys/MergeBlastResults.xml Fri Oct 24 05:54:20 2014 -0400
b
@@ -0,0 +1,21 @@
+<tool id="MergeBlastResults" name="MergeBlastResults" version="1.03">
+<description>Parse Blast result (Tabular) to merge feature</description>
+<command interpreter="perl">
+    MergeBlastResults.pl -input_blasttab_file $input_blast -output_joinmatch_file $output_merge -log_file $log_file -max_overlap_fraction $max_overlap_fraction -max_overlap_length_ignored $max_overlap_length_ignored -header $header
+</command>
+<inputs>
+ <param name="input_blast"  type="data" format="txt" label="Select a suitable input BLASTTAB (10 columns) file from your history"/>
+ <param name="max_overlap_fraction" type="float" value="0.5" label="Maximum overlap fraction between two match (other wise considered as duplicated)"/>
+ <param name="max_overlap_length_ignored" type="integer" value="3" label="Maximum overlap length ignored"/>
+ <param name="header" type="text" value="" label="Header for the blast file"/>
+</inputs>
+<outputs>
+ <data name="output_merge" format="txt" label="${tool.name} MERGE on ${on_string}"/>
+ <data name="log_file" format="txt" label="${tool.name} LOG on ${on_string}"/>
+</outputs>
+
+<help>
+
+</help>
+</tool>
+
b
diff -r c52e74b98773 -r 8dfa09868059 genephys/extractgenesfromsegment.pl
--- a/genephys/extractgenesfromsegment.pl Wed Aug 20 12:42:40 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,223 +0,0 @@
-#!/usr/bin/perl
-use strict;
-
-my $input_gene_file = $ARGV[0];
-my $input_segment_file = $ARGV[1];
-my $output_seq_nuc = $ARGV[2];
-my $output_seq_prot = $ARGV[3];
-
-open(IG, $input_gene_file)  or die("Can't open $input_gene_file\n");
-open(IS, $input_segment_file)  or die("Can't open $input_segment_file\n");
-open (ON,">$output_seq_nuc") or die ("Can't open $output_seq_nuc\n");
-open (OP,">$output_seq_prot") or die ("Can't open $output_seq_prot\n");
-
-my $current_annotation="";
-my @gene_annotation;
-my @list_gene;
-my @current_gene;
-my %current_gene_annotation;
-
-# while ((my $line=<IG>)&&($#list_gene<5)){
-while (my $line=<IG>){
- if ($line =~/\<Gene\>/){
- if (@current_gene){
- my %current_gene_annotation = %{&extract_annotation(\@current_gene)};
-
-
- push(@list_gene,\%current_gene_annotation);
- undef @current_gene;
- }
- push (@current_gene,$line);
-
- }
- else {
- push (@current_gene,$line);
- }
-}
-close(IG);
-
-# for (my $i=0;$i<=$#list_gene;$i++){
- # my %current_gene_annotation = %{$list_gene[$i]};
- # foreach my $key (keys %current_gene_annotation){
- # print "TEST ",$key,"\t",$current_gene_annotation{$key},"\n";
- # }
-# }
-
-# my @segment_chr;
-# my @segment_start;
-# my @segment_stop;
-
-while (my $line=<IS>){
- print "\n$line";
- if ($line =~/(.*?)\:(\d+)\.\.(\d+)/){
- my $chr = $1;
- my $start = $2;
- my $stop = $3;
-
- my @list_gene_selected = @{&extract_gene_from_position($chr,$start,$stop,\@list_gene)};
-
- if ($#list_gene_selected>=0){
- for (my $i=0;$i<=$#list_gene_selected;$i++){
- my %current = %{$list_gene_selected[$i]},"\n";
- print $current{"00 BN_Id"},"\t",$current{"01 BN_Position"},"\t",$current{"02 ATH_Function"},"\t",$current{"03 ATH_Id"},"\n";
-
- my $seq = $current{"04 Sequence"};
- my $formated_seq;
- my @SEQ = split(//,$seq);
- my $compt_seq=0;
- for (my $i=0;$i<=$#SEQ;$i++){
- if ($SEQ[$i] =~ /[ATGNCXatgcnx]/){
- if ($compt_seq == 60){
- $formated_seq .="\n";
- $compt_seq=0;
- }
- $formated_seq.= $SEQ[$i];
- $compt_seq ++;
- }
- } 
- print ON ">",$current{"01 BN_Position"}," (",$current{"00 BN_Id"},")","\n",$formated_seq,"\n";
-
- my $prot = $current{"05 Protein"};
- my $formated_prot;
- my @PROT = split(//,$prot);
- my $compt_prot=0;
- for (my $i=0;$i<=$#PROT;$i++){
- if ($PROT[$i] =~ /[A-Za-z\*\+]/){
- if ($compt_prot == 60){
- $formated_prot .="\n";
- $compt_prot=0;
- }
- $formated_prot.= $PROT[$i];
- $compt_prot ++;
- }
- } 
- print OP ">",$current{"01 BN_Position"}," (",$current{"00 BN_Id"},")","\n",$formated_prot,"\n";
-
- # foreach my $key (sort keys %current){
- # print "   ",$key,"\t",$current{$key},"\n";
- # }
- # print "\n";
- }
- }
- else {
- print "   NO GENE FOUND\n";
- }
- }
- else {
- print "Error Parsing n°2 : $line\n";
- }
-}
-
-close (IS);
-
-close (ON);
-close (OP);
-
-
-# my @list_gene_selected = @{&extract_gene_from_position("chrA01",1437,3000,\@list_gene)};
-
-
-# for (my $i=0;$i<=$#list_gene_selected;$i++){
- # my %current = %{$list_gene_selected[$i]},"\n";
- # foreach my $key (keys %current){
- # print $key,"\t",$current{$key},"\n";
- # }
-# }
-
-
-
-
-
-
-
-sub extract_annotation{
- my $ref = shift;
- my @gene = @$ref;
- my %gene_annotation;
- for (my $i=0;$i<=$#gene;$i++){
- #print "TEST : $gene[$i]\n";
- if ($gene[$i]=~/\<Id\>(.*?)\<\/Id\>/){
- $gene_annotation{"00 BN_Id"} = $1;
- }
- elsif ($gene[$i]=~/\<Position\>(.*?)\<\/Position\>/){
- $gene_annotation{"01 BN_Position"} = $1;
- }
- elsif ($gene[$i]=~/\<ATH_Function\>(.*?)\<\/ATH_Function\>/){
- $gene_annotation{"02 ATH_Function"} = $1;
- }
- elsif ($gene[$i]=~/\<SId\>(.*?)\<\/SId\>/){
- $gene_annotation{"03 ATH_Id"} = $1;
- }
- elsif ($gene[$i]=~/\<CDS_Sequence\>(.*?)\<\/CDS_Sequence\>/){ #modif 1.11
- $gene_annotation{"04 Sequence"} = $1;
- }
- elsif ($gene[$i]=~/\<Protein\>(.*?)\<\/Protein\>/){
- $gene_annotation{"05 Protein"} = $1;
- # print "TEST : $1\n";
- # exit (0);
- }
- }
- if ((!$gene_annotation{"00 BN_Id"})||(!$gene_annotation{"01 BN_Position"})||(!$gene_annotation{"04 Sequence"})||(!$gene_annotation{"05 Protein"})){
-
- print "Erreur Parsing n°3\n";
- print "Id :",$gene_annotation{"00 BN_Id"},"\n";
- print "Position : ",$gene_annotation{"01 BN_Position"},"\n";
- print "ATH Function : ",$gene_annotation{"02 ATH_Function"},"\n";
- print "ATH Id : ",$gene_annotation{"03 ATH_Id"},"\n";
- print "CDS seq : ",$gene_annotation{"04 Sequence"},"\n";
- print "CDS prot : ",$gene_annotation{"05 Protein"},"\n";
- for (my $i=0;$i<=$#gene;$i++){
- print $gene[$i],"\n";
- }
-
- exit(0);
-
- }
-
- return \%gene_annotation;
-}
-
-
-sub extract_gene_from_position{
- my $chr = shift;
- my $start = shift;
- my $end = shift;
-
- my $ref = shift;
- my @list_gene = @$ref;
- my @list_gene_selected;
-
- for (my $i=0;$i<=$#list_gene;$i++){
- my %current_gene_annotation = %{$list_gene[$i]};
- my $current_position = $current_gene_annotation{"01 BN_Position"};
- my $current_chr;
- my $current_start;
- my $current_end;
-
- #Extraction de la position
- if ($current_position =~ /^(.*?)\:(\d+)[\.]+(\d+)/){ # modif 1.11
- $current_chr = $1;
- $current_start = $2;
- $current_end = $3;
- if ($current_start > $current_end){
- ($current_start,$current_end) = ($current_end,$current_start);
- }
- }
- else {
- print "Erreur Parsing n°1\npos : $current_position\n";
- exit(0);
- }
- #Test de selection
- if ($chr eq $current_chr){
- if (
- ($current_end>=$start)&&($current_end<=$end) ||
- ($current_start>=$start)&&($current_start<=$end)
- )
- {
- push(@list_gene_selected,$list_gene[$i]);
- }
- }
-
- }
- return \@list_gene_selected;
-}
\ No newline at end of file
b
diff -r c52e74b98773 -r 8dfa09868059 genephys/extractgenesfromsegment.xml
--- a/genephys/extractgenesfromsegment.xml Wed Aug 20 12:42:40 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,19 +0,0 @@
-<tool id="extractgenesfromsegment" name="extractgenesfromsegment" version="0.01">
-<description>Extract gene sequence (nucleic, proteic) and function</description>
-<command interpreter="perl">
-    extractgenesfromsegment.pl $input_genexml $input_segment $output_gene_nuc $output_gene_prot > $output_gene_function
-</command>
-<inputs>
- <param name="input_genexml"  type="data" format="xml" label="Select a suitable input GENEXML file from your history"/>
- <param name="input_segment"  type="data" format="txt" label="Select a suitable input SEGMENT file from your history"/>
-</inputs>
-<outputs>
- <data name="output_gene_nuc" format="fasta" label="${tool.name} NUC on ${on_string}"/>
- <data name="output_gene_prot" format="fasta" label="${tool.name} PROT on ${on_string}"/>
- <data name="output_gene_function" format="txt" label="${tool.name} FUNCTION on ${on_string}"/>
-</outputs>
-
-<help>
-
-</help>
-</tool>
b
diff -r c52e74b98773 -r 8dfa09868059 genephys/extractgenomicsegment.pl
--- a/genephys/extractgenomicsegment.pl Wed Aug 20 12:42:40 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,176 +0,0 @@
-#!/usr/bin/perl
-#V1.10
-use strict;
-
-
-my $inputfile1 = $ARGV[0];
-my $inputfile2 = $ARGV[1];
-my $WINDOW = $ARGV[2];
-my $OFFSET = $ARGV[3];
-
-if (!$WINDOW){$WINDOW = 200000;}
-if (!$OFFSET){$OFFSET = 100000;}
-
-open(IF1, $inputfile1)  or die("Can't open $inputfile1\n");
-open(IF2, $inputfile2)  or die("Can't open $inputfile2\n");
-my $current_annotation="";
-my @list_marquer;
-my %chr;
-my %position;
-
-# print "$inputfile2\n";
-
-while (my $line=<IF1>){
- my @cols = split(/\t/,$line);
- my %current;
- # Number#Map#Name#Chr#Position#GeneAT#FunctionAT
-
- my $Number = $cols[0];
- my $Map = $cols[2];
- my $Name = $cols[7];
- my $Locus = $cols[8];
- my $Chr = $cols[19];
- my $Position = $cols[20];
- $Position =~ s/\s+//g;
- my $GeneAT=$cols[32];
- my $FunctionAT=$cols[37];
- $chr{$Name} = $Chr;
- $position{$Name} = $Position;
-
- ### Modification 1.10
- if ($Locus ne $Name){ 
- $chr{$Locus} = $Chr;
- $position{$Locus} = $Position;
- }
- ###
-
- #print "$Number#$Map#$Name#$Chr#$Position#$GeneAT#$FunctionAT\n";
-}
-close (IF1);
-
-# my @key = keys(%chr);
-# for (my $i=0;$i<=$#key;$i++){
- # print $key[$i],"\n";
-# }
-
-while (my $line=<IF2>){
- my @cols = split (/\s+/,$line);
- for (my $i=0;$i<=$#cols;$i++){
- my $current = $cols[$i];
- chomp($current);
- if ($current !~ /^\s+$/){
- push(@list_marquer,$current);
- }
- }
-}
-close (IF2);
-
-my %coord_by_chr;
-for (my $i=0;$i<=$#list_marquer;$i++){
- my $current_name = $list_marquer[$i];
- my $current_chr = $chr{$current_name};
- my $current_position = $position{$current_name};
-
- if ($current_position =~ /^\d+$/){
- my @tbl_coord_for_current_chr;
- if ($coord_by_chr{$current_chr}){
- @tbl_coord_for_current_chr = @{$coord_by_chr{$current_chr}};
- }
- push(@tbl_coord_for_current_chr,$current_position);
- $coord_by_chr{$current_chr}=\@tbl_coord_for_current_chr;
- }
- elsif (($current_position eq "-")||($current_position =~/none/i)){
-
- }
- else {
- chomp($current_position);
- #$current_position =~ s/\s+//g;
- print STDERR "Error Parsing $current_name\tposition not recognized : $current_position \n";
- print $list_marquer[$i],"\n";
- #exit(0);
- }
-}
-
-# foreach my $key (keys %coord_by_chr){
- # my @tbl_coord = @{$coord_by_chr{$key}};
- # print "\n$key\n";
- # @tbl_coord = sort { $a <=> $b } @tbl_coord;
- # for (my $i=0;$i<=$#tbl_coord;$i++){
- # print $tbl_coord[$i],"\n";
- # }
-# }
-
-foreach my $key (sort keys %coord_by_chr){
- my @tbl_coord = @{$coord_by_chr{$key}};
- # print "TEST : $key\n";
- @tbl_coord = sort { $a <=> $b } @tbl_coord;
- my $current_start;
- my $current_stop;
- my $current_start_offset;
- my $current_stop_offset;
-
-
- for (my $i=0;$i<=$#tbl_coord;$i++){
- if (!$current_start){$current_start=$tbl_coord[$i];$current_stop=$tbl_coord[$i]}
-
- # print "$i : $current_start / $current_stop\n";
- if ($tbl_coord[$i]>$current_stop+$WINDOW){
- #OFFSET
- if ($current_start>$OFFSET){$current_start_offset=$current_start-$OFFSET;}else{$current_start_offset=1;}
- $current_stop_offset = $current_stop + $OFFSET;
- #######
- print $key,":",$current_start_offset,"..",$current_stop_offset,"\n";
-
- $current_start = $tbl_coord[$i];
- $current_stop = $tbl_coord[$i];
-
- if ($i==$#tbl_coord){
- #OFFSET
- if ($current_start>$OFFSET){$current_start_offset=$current_start-$OFFSET;}else{$current_start_offset=1;}
- $current_stop_offset = $current_stop + $OFFSET;
- #######
- print $key,":",$current_start_offset,"..",$current_stop_offset,"\n";
- }
- }
- else {
- $current_stop=$tbl_coord[$i];
- if ($i==$#tbl_coord){
- #OFFSET
- if ($current_start>$OFFSET){$current_start_offset=$current_start-$OFFSET;}else{$current_start_offset=1;}
- $current_stop_offset = $current_stop + $OFFSET;
- #######
- print $key,":",$current_start_offset,"..",$current_stop_offset,"\n";
- }
- }
- }
-}
-#Traitement du dernier
-
-# if ($#tbl_coord == 0){
- # print $key,":",$tbl_coord[$i],"\n";
-# }
-# else {
- # if ($i==0){
- # push (@current_table,$tbl_coord[$i]);
- # }
- # else {
- # if ($tbl_coord[$i]>$current_table[$#current_table]+$WINDOW){
- # print $key,":",$current_table[0],":",$current_table[$#current_table],"\n";
- # undef @current_table;
- # push (@current_table,$tbl_coord[$i]);
- # }
- # else {
- # push (@current_table,$tbl_coord[$i]);
- # }
- # }
-# }
-
-
-# print "\n";
-# foreach my $key (keys %coord_by_chr){
- # print "\n$key\n";
- # @tbl_coord = sort { $a <=> $b } @tbl_coord;
- # for (my $i=0;$i<=$#tbl_coord;$i++){
- # print $tbl_coord[$i],"\n";
- # }
-# }
b
diff -r c52e74b98773 -r 8dfa09868059 genephys/extractgenomicsegment.xml
--- a/genephys/extractgenomicsegment.xml Wed Aug 20 12:42:40 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,21 +0,0 @@
-<tool id="extractgenomicsegment" name="extractgenomicsegment" version="0.01">
-<description>Extract the coordinate of genomic segment containing the genetic markers</description>
-<command interpreter="perl">
-    extractgenomicsegment.pl $input_geneticmap $input_markers $window $offset > $output_file 
-</command>
-<inputs>
-<param name="input_markers"  type="data" format="txt" label="Select a suitable input MARKERS file from your history"/>
-<param name="input_geneticmap"  type="data" format="txt" label="Select a suitable input GENETIC MAP file from your history"/>
-<param name="window" type="integer" value="200000" label="Maximum distance between markers of a segment (Threshold for splitting a segment)"/>
-<param name="offset" type="integer" value="100000" label="Additionnal segment size in 5' and 3' (security marging)"/>
-</inputs>
-<outputs>
- <data name="output_file" format="fasta" label="${tool.name} on ${on_string}"/>
-</outputs>
-
-<help>
-
-
-
-</help>
-</tool>
b
diff -r c52e74b98773 -r 8dfa09868059 genephys/extractgenomicsequencefromsegment.pl
--- a/genephys/extractgenomicsequencefromsegment.pl Wed Aug 20 12:42:40 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,90 +0,0 @@
-#!/usr/bin/perl
-#V1.10
-my $inputsegment = $ARGV[0];
-my $inputfasta = $ARGV[1];
-
-open(IS, $inputsegment) or die ("Can't open $inputsegment\n");
-open(IF, $inputfasta) or die ("Can't open $inputfasta\n");
-
-
-my @header;
-my @start;
-my @end;
-my @segment_header;
-
-while (my $ligne = <IS>){
- if ($ligne=~/(.*?):(\d+)\.+(\d+)/){
- push (@header,$1);
- push (@start,$2);
- push (@end,$3);
- push (@segment_header,$1.":".$2."..".$3);
- }
-}
-
-close (IS);
-
-#print "TEST : $#header\n";
-
-my %genome;
-
-my $current_header;
-my $current_seq="";
-while (my $ligne = <IF>){
- if ($ligne =~ /^\>(.*?)\s*$/){
- if ($current_header){
- $genome{$current_header} = $current_seq;
- }
-
- # my $length = length($current_seq);
- # print "TEST : $current_header\t$length\n";
- # print "TEST : $current_header\n";
- $current_header=$1;
- $current_seq = "";
- $current_position=0;
- }
- else {
- if ($ligne=~/^([ATGCNXatgcnx]+)\s*$/){
- $current_seq .= $1;
- }
- else {
- print STDERR "Erreur Parsing n°1\n$ligne\n";
- }
- }
-}
-
-#TRAITEMENT DU DERNIER
-if ($current_header){
- $genome{$current_header} = $current_seq;
- undef($current_seq);
-}
-
-# foreach my $key (keys %genome){
- # print $key,"\t",length($genome{$key}),"\n";
-# }
-
-for (my $i=0;$i<=$#header;$i++){
- my $compt=0;
- my $current_seq="";
- print ">",$header[$i],":",$start[$i],"..",$end[$i],"\n";
- ### Modification 1.10
- if ($end[$i]>length($genome{$header[$i]})){
- $end[$i] = length($genome{$header[$i]});
- }
- ###
-
- my @SEQ = split(//,$genome{$header[$i]});
- for (my $coord = $start[$i]-1; $coord<=$end[$i]-1;$coord++){
- $compt++;
- # print "TEST : $compt\n";
- if ($compt > 60 ){
- $current_seq .= "\n";
- $compt=1;
- }
- $current_seq .= $SEQ[$coord];
-
- }
- print "$current_seq\n";
-}
-
-close (IF);
-
b
diff -r c52e74b98773 -r 8dfa09868059 genephys/extractgenomicsequencefromsegment.xml
--- a/genephys/extractgenomicsequencefromsegment.xml Wed Aug 20 12:42:40 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,19 +0,0 @@
-<tool id="extractgenomicsequencefromsegment" name="extractgenomicsequencefromsegment" version="0.01">
-<description>Extract the genomic sequence corresponding to a genomic segment (format : chr:start..stop)</description>
-<command interpreter="perl">
-    extractgenomicsequencefromsegment.pl $input_segment $input_assembly > $output_file 
-</command>
-<inputs>
-<param name="input_segment"  type="data" format="txt" label="Select a suitable input SEGMENT file from your history"/>
-<param name="input_assembly"  type="data" format="fasta" label="Select a suitable input ASSEMBLY file from your history"/>
-</inputs>
-<outputs>
- <data name="output_file" format="fasta" label="${tool.name} on ${on_string}"/>
-</outputs>
-
-<help>
-
-
-
-</help>
-</tool>
b
diff -r c52e74b98773 -r 8dfa09868059 genephys/fastaGroomerForMakeBlastdb.pl
--- a/genephys/fastaGroomerForMakeBlastdb.pl Wed Aug 20 12:42:40 2014 -0400
+++ b/genephys/fastaGroomerForMakeBlastdb.pl Fri Oct 24 05:54:20 2014 -0400
[
@@ -1,4 +1,5 @@
 #!/usr/bin/perl
+#V1.0.0
 my $inputfasta = $ARGV[0];
 
 open(IB, $inputfasta) or die ("Can't open $inputfasta \n");
b
diff -r c52e74b98773 -r 8dfa09868059 genephys/fastaGroomerForMakeBlastdb.xml
--- a/genephys/fastaGroomerForMakeBlastdb.xml Wed Aug 20 12:42:40 2014 -0400
+++ b/genephys/fastaGroomerForMakeBlastdb.xml Fri Oct 24 05:54:20 2014 -0400
b
@@ -1,4 +1,4 @@
-<tool id="fastaGroomerForMakeBlastdb" name="fastaGroomerForMakeBlastdb" version="0.01">
+<tool id="fastaGroomerForMakeBlastdb" name="fastaGroomerForMakeBlastdb" version="1.00">
 <description>fasta Groomer For MakeBlastdb</description>
 <command interpreter="perl">
     fastaGroomerForMakeBlastdb.pl $input_fasta > $output_fasta
b
diff -r c52e74b98773 -r 8dfa09868059 genephys/mergeAllBestBlast.pl
--- a/genephys/mergeAllBestBlast.pl Wed Aug 20 12:42:40 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,47 +0,0 @@
-#!/usr/bin/perl
-my $inputblastn = $ARGV[0];
-my $inputtblastx = $ARGV[1];
-my $inputblastx = $ARGV[2];
-my $inputblastp = $ARGV[3];
-
-open(IN, $inputblastn) or die ("Can't open $inputblastn \n");
-open(ITX, $inputtblastx) or die ("Can't open $inputtblastx \n");
-open(IX, $inputblastx) or die ("Can't open $inputblastx \n");
-open(IP, $inputblastp) or die ("Can't open $inputblastp \n");
-
-my %blastx;
-my %tblastx;
-my %blastp;
-
-while (my $ligne = <ITX>){
- my @fields = split (/\t/,$ligne);
- chomp($ligne);
- $tblastx{$fields[0]} = $ligne;
-}
-close (ITX);
-
-while (my $ligne = <IX>){
- my @fields = split (/\t/,$ligne);
- chomp($ligne);
- $blastx{$fields[0]} = $ligne;
-}
-close (IX);
-
-while (my $ligne = <IP>){
- my @fields = split (/\t/,$ligne);
- chomp($ligne);
- $blastp{$fields[0]} = $ligne;
-}
-close (IP);
-
-
-while (my $ligne = <IN>){
- my @fields = split (/\t/,$ligne);
- my $query = $fields[0];
- print "BLASTN\t$ligne";
- print "TBLASTX\t",$tblastx{$query},"\n";
- print "BLASTX\t",$blastx{$query},"\n";
- print "BLASTP\t",$blastp{$query},"\n\n";
-
-}
-close (IN);
b
diff -r c52e74b98773 -r 8dfa09868059 genephys/mergeAllBestBlast.xml
--- a/genephys/mergeAllBestBlast.xml Wed Aug 20 12:42:40 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,19 +0,0 @@
-<tool id="mergeAllBestBlast" name="mergeAllBestBlast" version="0.01">
-<description>Merge best results from Blast</description>
-<command interpreter="perl">
-    mergeAllBestBlast.pl $input_blastn $input_tblastx $input_blastx $input_blastp > $output_results
-</command>
-<inputs>
- <param name="input_blastn"  type="data" format="txt" label="Select a suitable input BEST BLASTN file from your history"/>
- <param name="input_tblastx"  type="data" format="txt" label="Select a suitable input BEST TBLASTX file from your history"/>
- <param name="input_blastx"  type="data" format="txt" label="Select a suitable input BEST BLASTX file from your history"/>
- <param name="input_blastp"  type="data" format="txt" label="Select a suitable input BEST BLASTP file from your history"/>
-</inputs>
-<outputs>
- <data name="output_results" format="txt" label="${tool.name} on ${on_string}"/>
-</outputs>
-
-<help>
-
-</help>
-</tool>
b
diff -r c52e74b98773 -r 8dfa09868059 genephys/parseblasttab.pl
--- a/genephys/parseblasttab.pl Wed Aug 20 12:42:40 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,511 +0,0 @@\n-#!/usr/bin/perl\n-my $inputblast = $ARGV[0];\n-my $outputjoin = $ARGV[1];\n-my $outputbest = $ARGV[2];\n-open(IB, $inputblast) or die ("Can\'t open $inputblast \\n");\n-open (OJ, ">$outputjoin") or die ("Can\'t open $outputjoin \\n");\n-open (OB, ">$outputbest") or die ("Can\'t open $outputbest \\n");\n-\n-my %all_match;\n-my @all_match_joined;\n-\n-\n-my $MAX_OVERLAP_FRACTION = 0.5;\n-my $MAX_OVERLAP_LENGTH_IGNORED = 3;\n-\n-\n-while (my $ligne = <IB>){\n-\tmy @fields = split (/\\t/,$ligne);\n-\tmy %match;\n-\t$match{"Query"}=$fields[0];\n-\t$match{"Subject_id"}=$fields[1];\n-\t$match{"Subject_start"}=$fields[8];\n-\t$match{"Subject_end"}=$fields[9];\n-\t$match{"Similarity"}=$fields[13];\n-\t$match{"Query_length"}=$fields[14];\n-\t$match{"Subject_length"}=$fields[15];\n-\t$match{"Subject"}=$fields[16];\n-\t\n-\tif ($fields[6]<=$fields[7]){\n-\t\t$match{"Query_start"}=$fields[6];\n-\t\t$match{"Query_end"}=$fields[7];\n-\t\t$match{"Orientation"}="+";\n-\t\t#print "+ $ligne";\n-\t}\n-\telse {\n-\t\t$match{"Query_start"}=$fields[7];\n-\t\t$match{"Query_end"}=$fields[6];\n-\t\t$match{"Orientation"}="-";\n-\t\t#print "- $ligne";\n-\t}\n-\t\n-\tif ($fields[9]<=$fields[8]){\n-\t\t$match{"Subject_start"}=$fields[9];\n-\t\t$match{"Subject_end"}=$fields[8];\n-\t\t$match{"Orientation"}="+";\n-\t\t#print "+ $ligne";\n-\t}\n-\telse {\n-\t\t$match{"Subject_start"}=$fields[8];\n-\t\t$match{"Subject_end"}=$fields[9];\n-\t\t$match{"Orientation"}="-";\n-\t\t#print "- $ligne";\n-\t}\n-\t\n-\t$match{"Ligne"}=$ligne;\n-\tmy $key = $match{"Query"}."##".$match{"Subject"}."##".$match{"Orientation"};\n-\tif ($match{"Subject_length"}==0){\n-\t\tprint $ligne,"\\n",$match{"Subject_length"},"\\n";\n-\t}\t\n-\tmy @match_table;\n-\n-\tif ($all_match{$key}){\n-\t\t@match_table = @{$all_match{$key}};\n-\t}\n-\tpush (@match_table,\\%match);\n-\t$all_match{$key} = \\@match_table;\n-}\n-\n-foreach my $key (keys %all_match){\n-\tmy @match_table = @{$all_match{$key}};\n-\t#### Sort\n-\t@match_table = sort mysort @match_table;\n-\t\n-\t\n-\tmy @duplicate;\n-\tmy @overlap;\n-\tfor (my $i=0;$i<=$#match_table;$i++){\n-\t\tpush (@duplicate,0);\n-\t}\n-\tprint "\\nTable Match ($#match_table)\\n";\n-\tfor (my $i=0;$i<=$#match_table;$i++){\n-\t\tmy %match=%{$match_table[$i]};\n-\t\tprint $match{"Query"},"\\t",$match{"Subject_id"},"\\t",$match{"Orientation"},"\\t",$match{"Query_start"},"\\t",$match{"Query_end"},"\\t";\n-\t\tprint $match{"Subject_start"},"\\t",$match{"Subject_end"},"\\t",$match{"Subject_length"},"\\t",$match{"Similarity"},"\\n";\n-\t}\n-\t\n-\t#Scan d\'inclusion strict\n-\tfor (my $i=0;$i<=$#match_table;$i++){\n-\t\tmy %match1=%{$match_table[$i]};\n-\t\tfor (my $j=0;$j<=$#match_table;$j++){\n-\t\t\tif (($j != $i)&&($duplicate[$j]==0)){ # On scan dans les deux sens, pas seuelment $j = $i+1 a cause du last;\n-\t\t\t\tmy %match2=%{$match_table[$j]};\n-\t\t\t\t# Inclus Subject\n-\t\t\t\tif (($match1{"Subject_start"}>=$match2{"Subject_start"})&&($match1{"Subject_end"}<=$match2{"Subject_end"}))\n-\t\t\t\t{\n-\t\t\t\t\t$duplicate[$i]=1;\n-\t\t\t\t\t# print $i," : 1 : ",$match1{"Query"},"\\t",$match1{"Subject_id"},"\\t",$match1{"Query_start"},"\\t",$match1{"Query_end"},"\\t",$match1{"Subject_start"},"\\t",$match1{"Subject_end"},"\\n";\n-\t\t\t\t\t# print $j," : 1 : ",$match2{"Query"},"\\t",$match2{"Subject_id"},"\\t",$match2{"Query_start"},"\\t",$match2{"Query_end"},"\\t",$match2{"Subject_start"},"\\t",$match2{"Subject_end"},"\\n";\n-\t\n-\t\t\t\t\tlast;\n-\t\t\t\t}\n-\t\t\t\t# Inclus Query\n-\t\t\t\telsif (($match1{"Query_start"}>=$match2{"Query_start"})&&($match1{"Query_end"}<=$match2{"Query_end"}))\n-\t\t\t\t{\n-\t\t\t\t\t$duplicate[$i]=2;\n-\t\t\t\t\t# print $i," : 2 : ",$match1{"Query"},"\\t",$match1{"Subject_id"},"\\t",$match1{"Query_start"},"\\t",$match1{"Query_end"},"\\t",$match1{"Subject_start"},"\\t",$match1{"Subject_end"},"\\n";\n-\t\t\t\t\t# print $j," : 2 : ",$match2{"Query"},"\\t",$match2{"Subject_id"},"\\t",$match2{"Query_start"},"\\t",$match2{"Query_end"},"\\t",$match2{"Subject_start"},"\\t",$match2{"Subject_end"},"\\n";\n-\t\t\t\t\tlast;\n-\t\t\t\t}\n-\n-\t\t\t}\n-\t\t}\n-\t}\n-\t\n-\tmy @match_table_filtered;\n-\tfor (my $i=0;$i<=$#match_table;$i++){\n-\t\tif ($duplicate[$i] == 0){\n-\t\t\tpush (@match_table_filtered,$match_table[$i]);\n-\t\t}\n-\t}\n-\t\n-\tif ($#match_table > $#m'..b'ntf("%.2f",$nb_covered_subject*100/$sub_length);\n-\t$Query_coverage = sprintf("%.2f",$nb_covered_query*100/$q_length);\n-\t\n-\tprint "Final\\n";\n-\tprint $Query,"\\t",$Subject_Id,"\\t",$orientation,"\\t",$min_query,"\\t",$max_query,"\\t",$min_subject,"\\t",$max_subject,"\\t",$sub_length,"\\t";\n-\tprint "NB:",$nb_match,"\\t","O:",$overlap_length,"\\t","CQ:",$nb_covered_query,"\\t","CS:",$nb_covered_subject,"\\t",$Query_coverage,"\\t",$Subject_coverage,"\\t",$Identity,"\\n";\n-\n-\tif ($subject=~/^(.*?)\\s*$/){\n-\t\t$subject = $1;\n-\t}\n-\t\n-\tmy %match_joined;\n-\t$match_joined{"Query"}=$Query;\n-\t$match_joined{"Query_start"}=$min_query;\n-\t$match_joined{"Query_end"}=$max_query;\n-\t$match_joined{"Query_length"}=$q_length;\n-\t$match_joined{"QCoverage"} = $Query_coverage;\n-\t$match_joined{"Subject_id"}=$Subject_Id;\n-\t$match_joined{"Subject"}=$subject;\n-\t$match_joined{"Subject_start"}=$min_subject;\n-\t$match_joined{"Subject_end"}=$max_subject;\n-\t$match_joined{"Subject_length"}=$sub_length;\n-\t$match_joined{"SCoverage"} = $Subject_coverage;\n-\t$match_joined{"Similarity"}=$Identity;\n-\t$match_joined{"Nbmatch"}=$nb_match-$overlap_length;\n-\t$match_joined{"Display"}="$Query\\t$Subject_Id\\t$orientation\\t$Query_coverage%\\t$Subject_coverage%\\t$Identity%\\t$min_query\\t$max_query\\t$min_subject\\t$max_subject\\t$q_length\\t$sub_length\\t$subject";\n-\t\n-\tmy $chr;\n-\tmy $start;\n-\tmy $end;\n-\t\n-\tif ($match_joined{"Query"}=~/(.*?)\\:(\\d+)[\\.]+(\\d+)/){\n-\t\t$chr =$1;\n-\t\t$start = $2;\n-\t\t$end = $3;\n-\t\t\n-\t}\n-\telse {\n-\t\tprint "Error Parsing Query : ",$match_joined{"Query"},"\\n";\n-\t\texit(0);\n-\t}\n-\t\n-\tmy $subid = $match_joined{"Subject_id"};\n-\tmy $nb = $nb_match-$overlap_length;\n-\t\n-\tmy $key = "$chr#$start#$end#$nb#$subid";\n-\t$all_match_joined{$key} = \\%match_joined;\n-\t\n-\t\n-\t# my %match_joined;\n-\t# my $nb_covered=0;\n-\t# my $length=0;\n-\t# for (my $i=0;$i<=$#match_table;$i++){\n-\t\t# my %match=%{$match_table[$i]};\n-\t\t# $nb_covered+=$match{"Similarity"};\n-\t\t# $length = $match{"Subject_length"}\n-\t# }\n-\t# # if ($match{"Subject_length"} == 0){\n-\t\t# # print $key,"\\n",$match{"Ligne"},"\\n",$match{"Subject"},"\\n";\n-\t\t# # exit(0);\n-\t# # }\n-\t# my $similarity = sprintf("%.2f",$nb_covered / $length);\n-\t\n-\t# print "TEST : ",$key,"\\t",$similarity,"\\t",$nb_covered,"\\t",$length,"\\n";\n-\t\n-\t\n-\t# if ($similarity > 1){\n-\t\t# for (my $i=0;$i<=$#match_table;$i++){\n-\t\t\t# my %match=%{$match_table[$i]};\n-\t\t\t# print "----- : ",$match{"Ligne"},"\\n";\n-\t\t\t# exit(0);\n-\t\t# }\t\n-\t# }\n-\t# $match_joined{"Query"}=$match{"Query"};\n-\t# $match_joined{"Subject"}=$match{"Subject"};\n-\t# $match_joined{"Subject_id"}=$match{"Subject_id"};\n-\t# $match_joined{"Similarity"}=$similarity;\n-\t# $match_joined{"Query_length"}=$match{"Query_length"};\n-\t# $match_joined{"Subject_length"}=$match{"Subject_length"};\n-\t# push(@all_match_joined,\\%match_joined);\n-\n-}\n-\n-close (IB);\n-\n-my %all_match_joined_best;\n-foreach my $key (sort sortkey keys %all_match_joined){\n-\tmy %match = %{$all_match_joined{$key}};\n-\tprint OJ $match{"Display"},"\\n";\n-\tmy $shortkey = $match{"Query"};\n-\tif ($all_match_joined_best{$shortkey}){\n-\t}\n-\telse {\n-\t\t$all_match_joined_best{$shortkey} = \\%match;\n-\t\tprint OB $match{"Display"},"\\n";\n-\t}\n-\t\n-}\n-\n-# for (my $i=0;$i<=$#all_match_joined;$i++){\n-\t# my $match_joined = %{$all_match_joined[$i]};\n-\t# print $match_joined{"Query"},"\\t",$match_joined{"Subject"},"\\t",$match_joined{"Subject_id"},"\\t",$match_joined{"Similarity"},"\\t",$match_joined{"Query_length"},"\\t",$match_joined{"Subject_length"},"\\n";\n-# }\n-\n-\n-sub mysort{\n-\tmy %matcha=%{$a};\n-\tmy %matchb=%{$b};\n-\t\n-\t#print "TEST : ",$matcha{"Query_start"}, " / ", $matchb{"Query_start"},"\\n";\n-\t\n-\t$matcha{"Query_start"} <=> $matchb{"Query_start"}\n-\t||\n-\t$matcha{"Query_end"} <=> $matchb{"Query_end"}\n-\t\n-}\n-\n-sub sortkey {\n-\tmy @fieldsa = split (/\\#/,$a);\n-\tmy @fieldsb = split (/\\#/,$b);\n-\t\n-\t#print "$a\\n$b\\n";\n-\t#print $fieldsa[0]," cmp ",$fieldsb[0],"\\n";\n-\t#exit(0);\n-\n-\t$fieldsa[0] cmp $fieldsb[0]\n-\t||\n-\t$fieldsa[1] <=> $fieldsb[1]\n-\t||\n-\t$fieldsb[2] <=> $fieldsa[2]\n-\t||\n-\t$fieldsb[3] <=> $fieldsa[3]\n-}\n'
b
diff -r c52e74b98773 -r 8dfa09868059 genephys/parseblasttab.xml
--- a/genephys/parseblasttab.xml Wed Aug 20 12:42:40 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,17 +0,0 @@
-<tool id="parseblasttab" name="parseblasttab" version="0.01">
-<description>Parse Blast result (Tabular) to merge feature</description>
-<command interpreter="perl">
-    parseblasttab.pl $input_blast $output_merge $output_best
-</command>
-<inputs>
- <param name="input_blast"  type="data" format="txt" label="Select a suitable input BLASTTAB () file from your history"/>
-</inputs>
-<outputs>
- <data name="output_merge" format="txt" label="${tool.name} MERGE on ${on_string}"/>
- <data name="output_best" format="txt" label="${tool.name} BEST on ${on_string}"/>
-</outputs>
-
-<help>
-
-</help>
-</tool>
b
diff -r c52e74b98773 -r 8dfa09868059 genephys/repository_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/genephys/repository_dependencies.xml Fri Oct 24 05:54:20 2014 -0400
b
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<repositories description="GenePhys repository">
+    <repository changeset_revision="623f727cdff1" name="ncbi_blast_plus" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" />
+</repositories>