Previous changeset 2:c52e74b98773 (2014-08-20) Next changeset 4:3d79224aa2dc (2014-10-30) |
Commit message:
Uploaded |
modified:
genephys/fastaGroomerForMakeBlastdb.pl genephys/fastaGroomerForMakeBlastdb.xml |
added:
genephys/Galaxy-Workflow-GenePhys_-_blastn.ga genephys/Galaxy-Workflow-GenePhys_-_blastp.ga genephys/Galaxy-Workflow-GenePhys_-_blastx.ga genephys/Galaxy-Workflow-GenePhys_-_tblastx.ga genephys/GenePhys.pl genephys/GenePhys.xml genephys/MergeBlastResults.pl genephys/MergeBlastResults.xml genephys/repository_dependencies.xml |
removed:
genephys/Galaxy-Workflow-GenePhys.ga genephys/extractgenesfromsegment.pl genephys/extractgenesfromsegment.xml genephys/extractgenomicsegment.pl genephys/extractgenomicsegment.xml genephys/extractgenomicsequencefromsegment.pl genephys/extractgenomicsequencefromsegment.xml genephys/mergeAllBestBlast.pl genephys/mergeAllBestBlast.xml genephys/parseblasttab.pl genephys/parseblasttab.xml |
b |
diff -r c52e74b98773 -r 8dfa09868059 genephys/Galaxy-Workflow-GenePhys.ga --- a/genephys/Galaxy-Workflow-GenePhys.ga Wed Aug 20 12:42:40 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,674 +0,0 @@\n-{\n- "a_galaxy_workflow": "true", \n- "annotation": "", \n- "format-version": "0.1", \n- "name": "GenePhys", \n- "steps": {\n- "0": {\n- "annotation": "", \n- "id": 0, \n- "input_connections": {}, \n- "inputs": [\n- {\n- "description": "", \n- "name": "ASSEMBLY"\n- }\n- ], \n- "name": "Input dataset", \n- "outputs": [], \n- "position": {\n- "left": 201, \n- "top": 392\n- }, \n- "tool_errors": null, \n- "tool_id": null, \n- "tool_state": "{\\"name\\": \\"ASSEMBLY\\"}", \n- "tool_version": null, \n- "type": "data_input", \n- "user_outputs": []\n- }, \n- "1": {\n- "annotation": "", \n- "id": 1, \n- "input_connections": {}, \n- "inputs": [\n- {\n- "description": "", \n- "name": "GENE XML"\n- }\n- ], \n- "name": "Input dataset", \n- "outputs": [], \n- "position": {\n- "left": 200, \n- "top": 497\n- }, \n- "tool_errors": null, \n- "tool_id": null, \n- "tool_state": "{\\"name\\": \\"GENE XML\\"}", \n- "tool_version": null, \n- "type": "data_input", \n- "user_outputs": []\n- }, \n- "2": {\n- "annotation": "", \n- "id": 2, \n- "input_connections": {}, \n- "inputs": [\n- {\n- "description": "", \n- "name": "MARKERS"\n- }\n- ], \n- "name": "Input dataset", \n- "outputs": [], \n- "position": {\n- "left": 186, \n- "top": 618\n- }, \n- "tool_errors": null, \n- "tool_id": null, \n- "tool_state": "{\\"name\\": \\"MARKERS\\"}", \n- "tool_version": null, \n- "type": "data_input", \n- "user_outputs": []\n- }, \n- "3": {\n- "annotation": "", \n- "id": 3, \n- "input_connections": {}, \n- "inputs": [\n- {\n- "description": "", \n- "name": "GENETIC MAP"\n- }\n- ], \n- "name": "Input dataset", \n- "outputs": [], \n- "position": {\n- "left": 200, \n- "top": 737\n- }, \n- "tool_errors": null, \n- "tool_id": null, \n- "tool_state": "{\\"name\\": \\"GENETIC MAP\\"}", \n- "tool_version": null, \n- "type": "data_input", \n- "user_outputs": []\n- }, \n- "4": {\n- "annotation": "", \n- "id": 4, \n- "input_connections": {}, \n- "inputs": [\n- {\n- "description": "", \n- "name": "REFERENCE(NUC)"\n- }\n- ], \n- "name": "Input dataset", \n- "outputs": [], \n- "position": {\n- "left": 200, \n- "top": 857\n- }, \n- "tool_errors": null, \n- "tool_id": null, \n- "tool_state": "{\\"name\\": \\"REFERENCE(NUC)\\"}", \n- "tool_version": null, \n- "type": "data_input", \n- "user_outputs": []\n- }, \n- "5": {\n- "annotation": "", \n- "id": 5, \n- "input_connections": {}, \n- "inputs": [\n- {\n- "description": "", \n- "name": "REFERENCE(PROT)"\n- }\n- ], \n- "name": "Input dataset", \n- "outputs": [], \n- "position": {\n- "left": 206, \n- "top'..b' "left": 1368, \n- "top": 776.5\n- }, \n- "post_job_actions": {}, \n- "tool_errors": null, \n- "tool_id": "parseblasttab", \n- "tool_state": "{\\"__page__\\": 0, \\"__rerun_remap_job_id__\\": null, \\"input_blast\\": \\"null\\"}", \n- "tool_version": "0.01", \n- "type": "tool", \n- "user_outputs": []\n- }, \n- "19": {\n- "annotation": "", \n- "id": 19, \n- "input_connections": {\n- "input_blast": {\n- "id": 15, \n- "output_name": "output1"\n- }\n- }, \n- "inputs": [], \n- "name": "parseblasttab", \n- "outputs": [\n- {\n- "name": "output_merge", \n- "type": "txt"\n- }, \n- {\n- "name": "output_best", \n- "type": "txt"\n- }\n- ], \n- "position": {\n- "left": 1366, \n- "top": 477.5\n- }, \n- "post_job_actions": {}, \n- "tool_errors": null, \n- "tool_id": "parseblasttab", \n- "tool_state": "{\\"__page__\\": 0, \\"__rerun_remap_job_id__\\": null, \\"input_blast\\": \\"null\\"}", \n- "tool_version": "0.01", \n- "type": "tool", \n- "user_outputs": []\n- }, \n- "20": {\n- "annotation": "", \n- "id": 20, \n- "input_connections": {\n- "input_blast": {\n- "id": 16, \n- "output_name": "output1"\n- }\n- }, \n- "inputs": [], \n- "name": "parseblasttab", \n- "outputs": [\n- {\n- "name": "output_merge", \n- "type": "txt"\n- }, \n- {\n- "name": "output_best", \n- "type": "txt"\n- }\n- ], \n- "position": {\n- "left": 1372, \n- "top": 623.5\n- }, \n- "post_job_actions": {}, \n- "tool_errors": null, \n- "tool_id": "parseblasttab", \n- "tool_state": "{\\"__page__\\": 0, \\"__rerun_remap_job_id__\\": null, \\"input_blast\\": \\"null\\"}", \n- "tool_version": "0.01", \n- "type": "tool", \n- "user_outputs": []\n- }, \n- "21": {\n- "annotation": "", \n- "id": 21, \n- "input_connections": {\n- "input_blastn": {\n- "id": 17, \n- "output_name": "output_best"\n- }, \n- "input_blastp": {\n- "id": 20, \n- "output_name": "output_best"\n- }, \n- "input_blastx": {\n- "id": 19, \n- "output_name": "output_best"\n- }, \n- "input_tblastx": {\n- "id": 18, \n- "output_name": "output_best"\n- }\n- }, \n- "inputs": [], \n- "name": "mergeAllBestBlast", \n- "outputs": [\n- {\n- "name": "output_results", \n- "type": "txt"\n- }\n- ], \n- "position": {\n- "left": 1739.5, \n- "top": 384.5\n- }, \n- "post_job_actions": {}, \n- "tool_errors": null, \n- "tool_id": "mergeAllBestBlast", \n- "tool_state": "{\\"__page__\\": 0, \\"input_tblastx\\": \\"null\\", \\"input_blastp\\": \\"null\\", \\"input_blastx\\": \\"null\\", \\"__rerun_remap_job_id__\\": null, \\"input_blastn\\": \\"null\\"}", \n- "tool_version": "0.01", \n- "type": "tool", \n- "user_outputs": []\n- }\n- }\n-}\n\\ No newline at end of file\n' |
b |
diff -r c52e74b98773 -r 8dfa09868059 genephys/Galaxy-Workflow-GenePhys_-_blastn.ga --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genephys/Galaxy-Workflow-GenePhys_-_blastn.ga Fri Oct 24 05:54:20 2014 -0400 |
[ |
b'@@ -0,0 +1,204 @@\n+{\n+ "a_galaxy_workflow": "true", \n+ "annotation": "", \n+ "format-version": "0.1", \n+ "name": "GenePhys - blastn", \n+ "steps": {\n+ "0": {\n+ "annotation": "", \n+ "id": 0, \n+ "input_connections": {}, \n+ "inputs": [\n+ {\n+ "description": "", \n+ "name": "NUCLEIC GENE SEQUENCE"\n+ }\n+ ], \n+ "name": "Input dataset", \n+ "outputs": [], \n+ "position": {\n+ "left": 294, \n+ "top": 225\n+ }, \n+ "tool_errors": null, \n+ "tool_id": null, \n+ "tool_state": "{\\"name\\": \\"NUCLEIC GENE SEQUENCE\\"}", \n+ "tool_version": null, \n+ "type": "data_input", \n+ "user_outputs": []\n+ }, \n+ "1": {\n+ "annotation": "", \n+ "id": 1, \n+ "input_connections": {}, \n+ "inputs": [\n+ {\n+ "description": "", \n+ "name": "NUCLEIC DB FILE"\n+ }\n+ ], \n+ "name": "Input dataset", \n+ "outputs": [], \n+ "position": {\n+ "left": 294, \n+ "top": 382\n+ }, \n+ "tool_errors": null, \n+ "tool_id": null, \n+ "tool_state": "{\\"name\\": \\"NUCLEIC DB FILE\\"}", \n+ "tool_version": null, \n+ "type": "data_input", \n+ "user_outputs": []\n+ }, \n+ "2": {\n+ "annotation": "", \n+ "id": 2, \n+ "input_connections": {\n+ "input_fasta": {\n+ "id": 1, \n+ "output_name": "output"\n+ }\n+ }, \n+ "inputs": [], \n+ "name": "fastaGroomerForMakeBlastdb", \n+ "outputs": [\n+ {\n+ "name": "output_fasta", \n+ "type": "fasta"\n+ }\n+ ], \n+ "position": {\n+ "left": 550, \n+ "top": 351\n+ }, \n+ "post_job_actions": {\n+ "HideDatasetActionoutput_fasta": {\n+ "action_arguments": {}, \n+ "action_type": "HideDatasetAction", \n+ "output_name": "output_fasta"\n+ }\n+ }, \n+ "tool_errors": null, \n+ "tool_id": "fastaGroomerForMakeBlastdb", \n+ "tool_state": "{\\"input_fasta\\": \\"null\\", \\"__rerun_remap_job_id__\\": null, \\"__page__\\": 0}", \n+ "tool_version": "1.00", \n+ "type": "tool", \n+ "user_outputs": []\n+ }, \n+ "3": {\n+ "annotation": "", \n+ "id": 3, \n+ "input_connections": {\n+ "input_file": {\n+ "id": 2, \n+ "output_name": "output_fasta"\n+ }\n+ }, \n+ "inputs": [], \n+ "name": "NCBI BLAST+ makeblastdb", \n+ "outputs": [\n+ {\n+ "name": "outfile", \n+ "type": "data"\n+ }\n+ ], \n+ "position": {\n+ "left": 925.5, \n+ "top": 332\n+ }, \n+ "post_job_actions": {\n+ "HideDatasetActionoutfile": {\n+ "action_arguments": {}, \n+ "action_type": "HideDatasetAction", \n+ "output_name": "outfile"\n+ }\n+ }, \n+ "tool_errors": null, \n+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_makeblastdb/0.1.00", \n+ "tool_state": "{\\"__page__\\": 0, \\"mask_data_file\\": \\"null\\", \\"input_file\\": \\"null\\", \\"dbtype\\": \\"\\\\\\"nucl\\\\\\"\\", \\"__rerun_remap_job_id__\\": null, \\"hash_index\\": \\"\\\\\\"True\\\\\\"\\", \\"tax\\": \\"{\\\\\\"taxselect\\\\\\": \\\\\\"\\\\\\'..b'se__\\\\\\": 0}\\", \\"title\\": \\"\\\\\\"\\\\\\"\\", \\"parse_seqids\\": \\"\\\\\\"False\\\\\\"\\"}", \n+ "tool_version": "0.1.00", \n+ "type": "tool", \n+ "user_outputs": []\n+ }, \n+ "4": {\n+ "annotation": "", \n+ "id": 4, \n+ "input_connections": {\n+ "db_opts|histdb": {\n+ "id": 3, \n+ "output_name": "outfile"\n+ }, \n+ "query": {\n+ "id": 0, \n+ "output_name": "output"\n+ }\n+ }, \n+ "inputs": [], \n+ "name": "NCBI BLAST+ blastn", \n+ "outputs": [\n+ {\n+ "name": "output1", \n+ "type": "tabular"\n+ }\n+ ], \n+ "position": {\n+ "left": 1295, \n+ "top": 212\n+ }, \n+ "post_job_actions": {\n+ "HideDatasetActionoutput1": {\n+ "action_arguments": {}, \n+ "action_type": "HideDatasetAction", \n+ "output_name": "output1"\n+ }\n+ }, \n+ "tool_errors": null, \n+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_blastn_wrapper/0.1.00", \n+ "tool_state": "{\\"evalue_cutoff\\": \\"\\\\\\"0.001\\\\\\"\\", \\"__page__\\": 0, \\"adv_opts\\": \\"{\\\\\\"identity_cutoff\\\\\\": \\\\\\"0.0\\\\\\", \\\\\\"adv_opts_selector\\\\\\": \\\\\\"advanced\\\\\\", \\\\\\"ungapped\\\\\\": \\\\\\"False\\\\\\", \\\\\\"filter_query\\\\\\": \\\\\\"False\\\\\\", \\\\\\"word_size\\\\\\": \\\\\\"0\\\\\\", \\\\\\"__current_case__\\\\\\": 1, \\\\\\"parse_deflines\\\\\\": \\\\\\"False\\\\\\", \\\\\\"strand\\\\\\": \\\\\\"-strand both\\\\\\", \\\\\\"max_hits\\\\\\": \\\\\\"10\\\\\\"}\\", \\"__rerun_remap_job_id__\\": null, \\"blast_type\\": \\"\\\\\\"megablast\\\\\\"\\", \\"db_opts\\": \\"{\\\\\\"db_opts_selector\\\\\\": \\\\\\"histdb\\\\\\", \\\\\\"subject\\\\\\": \\\\\\"\\\\\\", \\\\\\"histdb\\\\\\": null, \\\\\\"__current_case__\\\\\\": 1, \\\\\\"database\\\\\\": \\\\\\"\\\\\\"}\\", \\"output\\": \\"{\\\\\\"out_format\\\\\\": \\\\\\"cols\\\\\\", \\\\\\"std_cols\\\\\\": [\\\\\\"qseqid\\\\\\", \\\\\\"sseqid\\\\\\", \\\\\\"qstart\\\\\\", \\\\\\"qend\\\\\\", \\\\\\"sstart\\\\\\", \\\\\\"send\\\\\\"], \\\\\\"ids_cols\\\\\\": null, \\\\\\"tax_cols\\\\\\": null, \\\\\\"__current_case__\\\\\\": 2, \\\\\\"misc_cols\\\\\\": null, \\\\\\"ext_cols\\\\\\": [\\\\\\"positive\\\\\\", \\\\\\"qlen\\\\\\", \\\\\\"slen\\\\\\", \\\\\\"salltitles\\\\\\"]}\\", \\"query\\": \\"null\\"}", \n+ "tool_version": "0.1.00", \n+ "type": "tool", \n+ "user_outputs": []\n+ }, \n+ "5": {\n+ "annotation": "", \n+ "id": 5, \n+ "input_connections": {\n+ "input_blast": {\n+ "id": 4, \n+ "output_name": "output1"\n+ }\n+ }, \n+ "inputs": [], \n+ "name": "MergeBlastResults", \n+ "outputs": [\n+ {\n+ "name": "output_merge", \n+ "type": "txt"\n+ }, \n+ {\n+ "name": "log_file", \n+ "type": "txt"\n+ }\n+ ], \n+ "position": {\n+ "left": 1604, \n+ "top": 216\n+ }, \n+ "post_job_actions": {\n+ "RenameDatasetActionoutput_merge": {\n+ "action_arguments": {\n+ "newname": "BLASTN"\n+ }, \n+ "action_type": "RenameDatasetAction", \n+ "output_name": "output_merge"\n+ }\n+ }, \n+ "tool_errors": null, \n+ "tool_id": "MergeBlastResults", \n+ "tool_state": "{\\"__page__\\": 0, \\"max_overlap_fraction\\": \\"\\\\\\"0.5\\\\\\"\\", \\"__rerun_remap_job_id__\\": null, \\"header\\": \\"\\\\\\"BLASTN\\\\\\"\\", \\"input_blast\\": \\"null\\", \\"max_overlap_length_ignored\\": \\"\\\\\\"3\\\\\\"\\"}", \n+ "tool_version": "1.03", \n+ "type": "tool", \n+ "user_outputs": []\n+ }\n+ }\n+}\n\\ No newline at end of file\n' |
b |
diff -r c52e74b98773 -r 8dfa09868059 genephys/Galaxy-Workflow-GenePhys_-_blastp.ga --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genephys/Galaxy-Workflow-GenePhys_-_blastp.ga Fri Oct 24 05:54:20 2014 -0400 |
[ |
@@ -0,0 +1,204 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "", + "format-version": "0.1", + "name": "GenePhys - blastp", + "steps": { + "0": { + "annotation": "", + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "PROTEIC GENE SEQUENCE" + } + ], + "name": "Input dataset", + "outputs": [], + "position": { + "left": 294, + "top": 254 + }, + "tool_errors": null, + "tool_id": null, + "tool_state": "{\"name\": \"PROTEIC GENE SEQUENCE\"}", + "tool_version": null, + "type": "data_input", + "user_outputs": [] + }, + "1": { + "annotation": "", + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "PROTEIN DB FILE" + } + ], + "name": "Input dataset", + "outputs": [], + "position": { + "left": 294, + "top": 411 + }, + "tool_errors": null, + "tool_id": null, + "tool_state": "{\"name\": \"PROTEIN DB FILE\"}", + "tool_version": null, + "type": "data_input", + "user_outputs": [] + }, + "2": { + "annotation": "", + "id": 2, + "input_connections": { + "input_fasta": { + "id": 1, + "output_name": "output" + } + }, + "inputs": [], + "name": "fastaGroomerForMakeBlastdb", + "outputs": [ + { + "name": "output_fasta", + "type": "fasta" + } + ], + "position": { + "left": 550, + "top": 380 + }, + "post_job_actions": { + "HideDatasetActionoutput_fasta": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output_fasta" + } + }, + "tool_errors": null, + "tool_id": "fastaGroomerForMakeBlastdb", + "tool_state": "{\"input_fasta\": \"null\", \"__rerun_remap_job_id__\": null, \"__page__\": 0}", + "tool_version": "1.00", + "type": "tool", + "user_outputs": [] + }, + "3": { + "annotation": "", + "id": 3, + "input_connections": { + "input_file": { + "id": 2, + "output_name": "output_fasta" + } + }, + "inputs": [], + "name": "NCBI BLAST+ makeblastdb", + "outputs": [ + { + "name": "outfile", + "type": "data" + } + ], + "position": { + "left": 923.5, + "top": 362 + }, + "post_job_actions": { + "HideDatasetActionoutfile": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "outfile" + } + }, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_makeblastdb/0.1.00", + "tool_state": "{\"__page__\": 0, \"mask_data_file\": \"null\", \"input_file\": \"null\", \"dbtype\": \"\\\"prot\\\"\", \"__rerun_remap_job_id__\": null, \"hash_index\": \"\\\"True\\\"\", \"tax\": \"{\\\"taxselect\\\": \\\"\\\", \\\"__current_case__\\\": 0}\", \"title\": \"\\\"\\\"\", \"parse_seqids\": \"\\\"False\\\"\"}", + "tool_version": "0.1.00", + "type": "tool", + "user_outputs": [] + }, + "4": { + "annotation": "", + "id": 4, + "input_connections": { + "db_opts|histdb": { + "id": 3, + "output_name": "outfile" + }, + "query": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [], + "name": "NCBI BLAST+ blastp", + "outputs": [ + { + "name": "output1", + "type": "tabular" + } + ], + "position": { + "left": 1222, + "top": 214 + }, + "post_job_actions": { + "HideDatasetActionoutput1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output1" + } + }, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_blastp_wrapper/0.1.00", + "tool_state": "{\"evalue_cutoff\": \"\\\"0.001\\\"\", \"__page__\": 0, \"adv_opts\": \"{\\\"matrix\\\": \\\"BLOSUM62\\\", \\\"adv_opts_selector\\\": \\\"advanced\\\", \\\"filter_query\\\": \\\"False\\\", \\\"word_size\\\": \\\"0\\\", \\\"__current_case__\\\": 1, \\\"parse_deflines\\\": \\\"False\\\", \\\"max_hits\\\": \\\"10\\\"}\", \"__rerun_remap_job_id__\": null, \"blast_type\": \"\\\"blastp\\\"\", \"db_opts\": \"{\\\"db_opts_selector\\\": \\\"histdb\\\", \\\"subject\\\": \\\"\\\", \\\"histdb\\\": null, \\\"__current_case__\\\": 1, \\\"database\\\": \\\"\\\"}\", \"output\": \"{\\\"out_format\\\": \\\"cols\\\", \\\"std_cols\\\": [\\\"qseqid\\\", \\\"sseqid\\\", \\\"qstart\\\", \\\"qend\\\", \\\"sstart\\\", \\\"send\\\"], \\\"ids_cols\\\": null, \\\"tax_cols\\\": null, \\\"__current_case__\\\": 2, \\\"misc_cols\\\": null, \\\"ext_cols\\\": [\\\"positive\\\", \\\"qlen\\\", \\\"slen\\\", \\\"salltitles\\\"]}\", \"query\": \"null\"}", + "tool_version": "0.1.00", + "type": "tool", + "user_outputs": [] + }, + "5": { + "annotation": "", + "id": 5, + "input_connections": { + "input_blast": { + "id": 4, + "output_name": "output1" + } + }, + "inputs": [], + "name": "MergeBlastResults", + "outputs": [ + { + "name": "output_merge", + "type": "txt" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 1544, + "top": 215 + }, + "post_job_actions": { + "RenameDatasetActionoutput_merge": { + "action_arguments": { + "newname": "BLASTP" + }, + "action_type": "RenameDatasetAction", + "output_name": "output_merge" + } + }, + "tool_errors": null, + "tool_id": "MergeBlastResults", + "tool_state": "{\"__page__\": 0, \"max_overlap_fraction\": \"\\\"0.5\\\"\", \"__rerun_remap_job_id__\": null, \"header\": \"\\\"BLASTP\\\"\", \"input_blast\": \"null\", \"max_overlap_length_ignored\": \"\\\"3\\\"\"}", + "tool_version": "1.03", + "type": "tool", + "user_outputs": [] + } + } +} \ No newline at end of file |
b |
diff -r c52e74b98773 -r 8dfa09868059 genephys/Galaxy-Workflow-GenePhys_-_blastx.ga --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genephys/Galaxy-Workflow-GenePhys_-_blastx.ga Fri Oct 24 05:54:20 2014 -0400 |
[ |
b'@@ -0,0 +1,204 @@\n+{\n+ "a_galaxy_workflow": "true", \n+ "annotation": "", \n+ "format-version": "0.1", \n+ "name": "GenePhys - blastx", \n+ "steps": {\n+ "0": {\n+ "annotation": "", \n+ "id": 0, \n+ "input_connections": {}, \n+ "inputs": [\n+ {\n+ "description": "", \n+ "name": "NUCLEIC GENE SEQUENCE"\n+ }\n+ ], \n+ "name": "Input dataset", \n+ "outputs": [], \n+ "position": {\n+ "left": 294, \n+ "top": 313\n+ }, \n+ "tool_errors": null, \n+ "tool_id": null, \n+ "tool_state": "{\\"name\\": \\"NUCLEIC GENE SEQUENCE\\"}", \n+ "tool_version": null, \n+ "type": "data_input", \n+ "user_outputs": []\n+ }, \n+ "1": {\n+ "annotation": "", \n+ "id": 1, \n+ "input_connections": {}, \n+ "inputs": [\n+ {\n+ "description": "", \n+ "name": "PROTEIN DB FILE"\n+ }\n+ ], \n+ "name": "Input dataset", \n+ "outputs": [], \n+ "position": {\n+ "left": 298, \n+ "top": 476\n+ }, \n+ "tool_errors": null, \n+ "tool_id": null, \n+ "tool_state": "{\\"name\\": \\"PROTEIN DB FILE\\"}", \n+ "tool_version": null, \n+ "type": "data_input", \n+ "user_outputs": []\n+ }, \n+ "2": {\n+ "annotation": "", \n+ "id": 2, \n+ "input_connections": {\n+ "input_fasta": {\n+ "id": 1, \n+ "output_name": "output"\n+ }\n+ }, \n+ "inputs": [], \n+ "name": "fastaGroomerForMakeBlastdb", \n+ "outputs": [\n+ {\n+ "name": "output_fasta", \n+ "type": "fasta"\n+ }\n+ ], \n+ "position": {\n+ "left": 550, \n+ "top": 439\n+ }, \n+ "post_job_actions": {\n+ "HideDatasetActionoutput_fasta": {\n+ "action_arguments": {}, \n+ "action_type": "HideDatasetAction", \n+ "output_name": "output_fasta"\n+ }\n+ }, \n+ "tool_errors": null, \n+ "tool_id": "fastaGroomerForMakeBlastdb", \n+ "tool_state": "{\\"input_fasta\\": \\"null\\", \\"__rerun_remap_job_id__\\": null, \\"__page__\\": 0}", \n+ "tool_version": "1.00", \n+ "type": "tool", \n+ "user_outputs": []\n+ }, \n+ "3": {\n+ "annotation": "", \n+ "id": 3, \n+ "input_connections": {\n+ "input_file": {\n+ "id": 2, \n+ "output_name": "output_fasta"\n+ }\n+ }, \n+ "inputs": [], \n+ "name": "NCBI BLAST+ makeblastdb", \n+ "outputs": [\n+ {\n+ "name": "outfile", \n+ "type": "data"\n+ }\n+ ], \n+ "position": {\n+ "left": 918.5, \n+ "top": 414\n+ }, \n+ "post_job_actions": {\n+ "HideDatasetActionoutfile": {\n+ "action_arguments": {}, \n+ "action_type": "HideDatasetAction", \n+ "output_name": "outfile"\n+ }\n+ }, \n+ "tool_errors": null, \n+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_makeblastdb/0.1.00", \n+ "tool_state": "{\\"__page__\\": 0, \\"mask_data_file\\": \\"null\\", \\"input_file\\": \\"null\\", \\"dbtype\\": \\"\\\\\\"prot\\\\\\"\\", \\"__rerun_remap_job_id__\\": null, \\"hash_index\\": \\"\\\\\\"True\\\\\\"\\", \\"tax\\": \\"{\\\\\\"taxselect\\\\\\": \\\\\\"\\\\\\'..b'rent_case__\\\\\\": 0}\\", \\"title\\": \\"\\\\\\"\\\\\\"\\", \\"parse_seqids\\": \\"\\\\\\"False\\\\\\"\\"}", \n+ "tool_version": "0.1.00", \n+ "type": "tool", \n+ "user_outputs": []\n+ }, \n+ "4": {\n+ "annotation": "", \n+ "id": 4, \n+ "input_connections": {\n+ "db_opts|histdb": {\n+ "id": 3, \n+ "output_name": "outfile"\n+ }, \n+ "query": {\n+ "id": 0, \n+ "output_name": "output"\n+ }\n+ }, \n+ "inputs": [], \n+ "name": "NCBI BLAST+ blastx", \n+ "outputs": [\n+ {\n+ "name": "output1", \n+ "type": "tabular"\n+ }\n+ ], \n+ "position": {\n+ "left": 1276.5, \n+ "top": 291\n+ }, \n+ "post_job_actions": {\n+ "HideDatasetActionoutput1": {\n+ "action_arguments": {}, \n+ "action_type": "HideDatasetAction", \n+ "output_name": "output1"\n+ }\n+ }, \n+ "tool_errors": null, \n+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_blastx_wrapper/0.1.00", \n+ "tool_state": "{\\"evalue_cutoff\\": \\"\\\\\\"0.001\\\\\\"\\", \\"__page__\\": 0, \\"adv_opts\\": \\"{\\\\\\"matrix\\\\\\": \\\\\\"BLOSUM62\\\\\\", \\\\\\"adv_opts_selector\\\\\\": \\\\\\"advanced\\\\\\", \\\\\\"ungapped\\\\\\": \\\\\\"False\\\\\\", \\\\\\"filter_query\\\\\\": \\\\\\"False\\\\\\", \\\\\\"word_size\\\\\\": \\\\\\"0\\\\\\", \\\\\\"__current_case__\\\\\\": 1, \\\\\\"parse_deflines\\\\\\": \\\\\\"False\\\\\\", \\\\\\"strand\\\\\\": \\\\\\"-strand both\\\\\\", \\\\\\"max_hits\\\\\\": \\\\\\"10\\\\\\"}\\", \\"__rerun_remap_job_id__\\": null, \\"db_opts\\": \\"{\\\\\\"db_opts_selector\\\\\\": \\\\\\"histdb\\\\\\", \\\\\\"subject\\\\\\": \\\\\\"\\\\\\", \\\\\\"histdb\\\\\\": null, \\\\\\"__current_case__\\\\\\": 1, \\\\\\"database\\\\\\": \\\\\\"\\\\\\"}\\", \\"query_gencode\\": \\"\\\\\\"1\\\\\\"\\", \\"output\\": \\"{\\\\\\"out_format\\\\\\": \\\\\\"cols\\\\\\", \\\\\\"std_cols\\\\\\": [\\\\\\"qseqid\\\\\\", \\\\\\"sseqid\\\\\\", \\\\\\"qstart\\\\\\", \\\\\\"qend\\\\\\", \\\\\\"sstart\\\\\\", \\\\\\"send\\\\\\"], \\\\\\"ids_cols\\\\\\": null, \\\\\\"tax_cols\\\\\\": null, \\\\\\"__current_case__\\\\\\": 2, \\\\\\"misc_cols\\\\\\": null, \\\\\\"ext_cols\\\\\\": [\\\\\\"positive\\\\\\", \\\\\\"qlen\\\\\\", \\\\\\"slen\\\\\\", \\\\\\"salltitles\\\\\\"]}\\", \\"query\\": \\"null\\"}", \n+ "tool_version": "0.1.00", \n+ "type": "tool", \n+ "user_outputs": []\n+ }, \n+ "5": {\n+ "annotation": "", \n+ "id": 5, \n+ "input_connections": {\n+ "input_blast": {\n+ "id": 4, \n+ "output_name": "output1"\n+ }\n+ }, \n+ "inputs": [], \n+ "name": "MergeBlastResults", \n+ "outputs": [\n+ {\n+ "name": "output_merge", \n+ "type": "txt"\n+ }, \n+ {\n+ "name": "log_file", \n+ "type": "txt"\n+ }\n+ ], \n+ "position": {\n+ "left": 1599, \n+ "top": 291\n+ }, \n+ "post_job_actions": {\n+ "RenameDatasetActionoutput_merge": {\n+ "action_arguments": {\n+ "newname": "BLASTX"\n+ }, \n+ "action_type": "RenameDatasetAction", \n+ "output_name": "output_merge"\n+ }\n+ }, \n+ "tool_errors": null, \n+ "tool_id": "MergeBlastResults", \n+ "tool_state": "{\\"__page__\\": 0, \\"max_overlap_fraction\\": \\"\\\\\\"0.5\\\\\\"\\", \\"__rerun_remap_job_id__\\": null, \\"header\\": \\"\\\\\\"BLASTX\\\\\\"\\", \\"input_blast\\": \\"null\\", \\"max_overlap_length_ignored\\": \\"\\\\\\"3\\\\\\"\\"}", \n+ "tool_version": "1.03", \n+ "type": "tool", \n+ "user_outputs": []\n+ }\n+ }\n+}\n\\ No newline at end of file\n' |
b |
diff -r c52e74b98773 -r 8dfa09868059 genephys/Galaxy-Workflow-GenePhys_-_tblastx.ga --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genephys/Galaxy-Workflow-GenePhys_-_tblastx.ga Fri Oct 24 05:54:20 2014 -0400 |
[ |
b'@@ -0,0 +1,204 @@\n+{\n+ "a_galaxy_workflow": "true", \n+ "annotation": "", \n+ "format-version": "0.1", \n+ "name": "GenePhys - tblastx", \n+ "steps": {\n+ "0": {\n+ "annotation": "", \n+ "id": 0, \n+ "input_connections": {}, \n+ "inputs": [\n+ {\n+ "description": "", \n+ "name": "NUCLEIC GENE SEQUENCE"\n+ }\n+ ], \n+ "name": "Input dataset", \n+ "outputs": [], \n+ "position": {\n+ "left": 294, \n+ "top": 222\n+ }, \n+ "tool_errors": null, \n+ "tool_id": null, \n+ "tool_state": "{\\"name\\": \\"NUCLEIC GENE SEQUENCE\\"}", \n+ "tool_version": null, \n+ "type": "data_input", \n+ "user_outputs": []\n+ }, \n+ "1": {\n+ "annotation": "", \n+ "id": 1, \n+ "input_connections": {}, \n+ "inputs": [\n+ {\n+ "description": "", \n+ "name": "NUCLEIC DB FILE"\n+ }\n+ ], \n+ "name": "Input dataset", \n+ "outputs": [], \n+ "position": {\n+ "left": 294, \n+ "top": 379\n+ }, \n+ "tool_errors": null, \n+ "tool_id": null, \n+ "tool_state": "{\\"name\\": \\"NUCLEIC DB FILE\\"}", \n+ "tool_version": null, \n+ "type": "data_input", \n+ "user_outputs": []\n+ }, \n+ "2": {\n+ "annotation": "", \n+ "id": 2, \n+ "input_connections": {\n+ "input_fasta": {\n+ "id": 1, \n+ "output_name": "output"\n+ }\n+ }, \n+ "inputs": [], \n+ "name": "fastaGroomerForMakeBlastdb", \n+ "outputs": [\n+ {\n+ "name": "output_fasta", \n+ "type": "fasta"\n+ }\n+ ], \n+ "position": {\n+ "left": 557, \n+ "top": 353\n+ }, \n+ "post_job_actions": {\n+ "HideDatasetActionoutput_fasta": {\n+ "action_arguments": {}, \n+ "action_type": "HideDatasetAction", \n+ "output_name": "output_fasta"\n+ }\n+ }, \n+ "tool_errors": null, \n+ "tool_id": "fastaGroomerForMakeBlastdb", \n+ "tool_state": "{\\"input_fasta\\": \\"null\\", \\"__rerun_remap_job_id__\\": null, \\"__page__\\": 0}", \n+ "tool_version": "1.00", \n+ "type": "tool", \n+ "user_outputs": []\n+ }, \n+ "3": {\n+ "annotation": "", \n+ "id": 3, \n+ "input_connections": {\n+ "input_file": {\n+ "id": 2, \n+ "output_name": "output_fasta"\n+ }\n+ }, \n+ "inputs": [], \n+ "name": "NCBI BLAST+ makeblastdb", \n+ "outputs": [\n+ {\n+ "name": "outfile", \n+ "type": "data"\n+ }\n+ ], \n+ "position": {\n+ "left": 925.5, \n+ "top": 329\n+ }, \n+ "post_job_actions": {\n+ "HideDatasetActionoutfile": {\n+ "action_arguments": {}, \n+ "action_type": "HideDatasetAction", \n+ "output_name": "outfile"\n+ }\n+ }, \n+ "tool_errors": null, \n+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_makeblastdb/0.1.00", \n+ "tool_state": "{\\"__page__\\": 0, \\"mask_data_file\\": \\"null\\", \\"input_file\\": \\"null\\", \\"dbtype\\": \\"\\\\\\"nucl\\\\\\"\\", \\"__rerun_remap_job_id__\\": null, \\"hash_index\\": \\"\\\\\\"True\\\\\\"\\", \\"tax\\": \\"{\\\\\\"taxselect\\\\\\": \\\\\\"\\\\'..b'rent_case__\\\\\\": 0}\\", \\"title\\": \\"\\\\\\"\\\\\\"\\", \\"parse_seqids\\": \\"\\\\\\"False\\\\\\"\\"}", \n+ "tool_version": "0.1.00", \n+ "type": "tool", \n+ "user_outputs": []\n+ }, \n+ "4": {\n+ "annotation": "", \n+ "id": 4, \n+ "input_connections": {\n+ "db_opts|histdb": {\n+ "id": 3, \n+ "output_name": "outfile"\n+ }, \n+ "query": {\n+ "id": 0, \n+ "output_name": "output"\n+ }\n+ }, \n+ "inputs": [], \n+ "name": "NCBI BLAST+ tblastx", \n+ "outputs": [\n+ {\n+ "name": "output1", \n+ "type": "tabular"\n+ }\n+ ], \n+ "position": {\n+ "left": 1239, \n+ "top": 189\n+ }, \n+ "post_job_actions": {\n+ "HideDatasetActionoutput1": {\n+ "action_arguments": {}, \n+ "action_type": "HideDatasetAction", \n+ "output_name": "output1"\n+ }\n+ }, \n+ "tool_errors": null, \n+ "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_tblastx_wrapper/0.1.00", \n+ "tool_state": "{\\"evalue_cutoff\\": \\"\\\\\\"0.001\\\\\\"\\", \\"__page__\\": 0, \\"adv_opts\\": \\"{\\\\\\"matrix\\\\\\": \\\\\\"BLOSUM62\\\\\\", \\\\\\"adv_opts_selector\\\\\\": \\\\\\"advanced\\\\\\", \\\\\\"filter_query\\\\\\": \\\\\\"False\\\\\\", \\\\\\"word_size\\\\\\": \\\\\\"0\\\\\\", \\\\\\"__current_case__\\\\\\": 1, \\\\\\"parse_deflines\\\\\\": \\\\\\"False\\\\\\", \\\\\\"db_gencode\\\\\\": \\\\\\"1\\\\\\", \\\\\\"strand\\\\\\": \\\\\\"-strand both\\\\\\", \\\\\\"max_hits\\\\\\": \\\\\\"10\\\\\\"}\\", \\"__rerun_remap_job_id__\\": null, \\"db_opts\\": \\"{\\\\\\"db_opts_selector\\\\\\": \\\\\\"histdb\\\\\\", \\\\\\"subject\\\\\\": \\\\\\"\\\\\\", \\\\\\"histdb\\\\\\": null, \\\\\\"__current_case__\\\\\\": 1, \\\\\\"database\\\\\\": \\\\\\"\\\\\\"}\\", \\"query_gencode\\": \\"\\\\\\"1\\\\\\"\\", \\"output\\": \\"{\\\\\\"out_format\\\\\\": \\\\\\"cols\\\\\\", \\\\\\"std_cols\\\\\\": [\\\\\\"qseqid\\\\\\", \\\\\\"sseqid\\\\\\", \\\\\\"qstart\\\\\\", \\\\\\"qend\\\\\\", \\\\\\"sstart\\\\\\", \\\\\\"send\\\\\\"], \\\\\\"ids_cols\\\\\\": null, \\\\\\"tax_cols\\\\\\": null, \\\\\\"__current_case__\\\\\\": 2, \\\\\\"misc_cols\\\\\\": null, \\\\\\"ext_cols\\\\\\": [\\\\\\"positive\\\\\\", \\\\\\"qlen\\\\\\", \\\\\\"slen\\\\\\", \\\\\\"salltitles\\\\\\"]}\\", \\"query\\": \\"null\\"}", \n+ "tool_version": "0.1.00", \n+ "type": "tool", \n+ "user_outputs": []\n+ }, \n+ "5": {\n+ "annotation": "", \n+ "id": 5, \n+ "input_connections": {\n+ "input_blast": {\n+ "id": 4, \n+ "output_name": "output1"\n+ }\n+ }, \n+ "inputs": [], \n+ "name": "MergeBlastResults", \n+ "outputs": [\n+ {\n+ "name": "output_merge", \n+ "type": "txt"\n+ }, \n+ {\n+ "name": "log_file", \n+ "type": "txt"\n+ }\n+ ], \n+ "position": {\n+ "left": 1529, \n+ "top": 192\n+ }, \n+ "post_job_actions": {\n+ "RenameDatasetActionoutput_merge": {\n+ "action_arguments": {\n+ "newname": "TBLASTX"\n+ }, \n+ "action_type": "RenameDatasetAction", \n+ "output_name": "output_merge"\n+ }\n+ }, \n+ "tool_errors": null, \n+ "tool_id": "MergeBlastResults", \n+ "tool_state": "{\\"__page__\\": 0, \\"max_overlap_fraction\\": \\"\\\\\\"0.5\\\\\\"\\", \\"__rerun_remap_job_id__\\": null, \\"header\\": \\"\\\\\\"TBLASTX\\\\\\"\\", \\"input_blast\\": \\"null\\", \\"max_overlap_length_ignored\\": \\"\\\\\\"3\\\\\\"\\"}", \n+ "tool_version": "1.03", \n+ "type": "tool", \n+ "user_outputs": []\n+ }\n+ }\n+}\n\\ No newline at end of file\n' |
b |
diff -r c52e74b98773 -r 8dfa09868059 genephys/GenePhys.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genephys/GenePhys.pl Fri Oct 24 05:54:20 2014 -0400 |
[ |
b'@@ -0,0 +1,335 @@\n+#!/usr/bin/perl\n+#V1.1.0 integrated gene extraction \n+#V1.0.2 integrated segment fasta extraction\n+#V1.0.1 added log and option\n+#V1.0.0\n+use strict;\n+use warnings;\n+use Getopt::Long;\n+\n+my $input_blast_files;\n+my $input_genes_position_file;\n+my $input_assembly_file;\n+my $input_markers_position_file;\n+my $input_markers_file;\n+my $log_file;\n+my $output_fasta_file;\n+my $output_segment_file;\n+my $output_genes_list_file;\n+my $EXTRACT_SEQ = "NO";\n+my $WINDOW = 200000;\n+my $OFFSET = 100000;\n+my $MAX_BLAST_LINES = 1;\n+\n+GetOptions (\n+"input_assembly_file=s" => \\$input_assembly_file,\n+"input_markers_position_file=s" => \\$input_markers_position_file,\n+"input_markers_file=s" => \\$input_markers_file,\n+"log_file=s" => \\$log_file,\n+"output_fasta_file=s" => \\$output_fasta_file,\n+"output_segment_file=s" => \\$output_segment_file,\n+"extractseq=s" => \\$EXTRACT_SEQ,\n+"window=i" => \\$WINDOW,\n+"offset=i" =>\\$OFFSET,\n+"input_blast_files=s" => \\$input_blast_files,\n+"input_genes_position_file=s"=> \\$input_genes_position_file,\n+"output_genes_list_file=s"=>\\$output_genes_list_file,\n+"max_blast_lines=i" => \\$MAX_BLAST_LINES\n+) or die("Error in command line arguments\\n");\n+\n+open(LF, ">$log_file") or die("Can\'t open $log_file\\n");\n+#print LF $EXTRACT_SEQ."\\n";\n+\n+my $current_annotation="";\n+my @list_marquer;\n+my %chr;\n+my %position;\n+\n+open(MP, $input_markers_position_file) or die("Can\'t open $input_markers_position_file\\n");\n+\n+my $compt=0;\n+while (my $line=<MP>){\n+\t$compt++;\n+\tmy @cols = split(/\\t/,$line);\n+\tif ($#cols != 3){\n+\t\tprint STDERR "Error in marker position file format\\n$compt : $line\\n";\n+\t\texit(0);\n+\t}\n+\tmy %current;\n+\t# Number#Map#Name#Chr#Position#GeneAT#FunctionAT\n+\tmy $Name = $cols[0];\n+\tmy $Locus = $cols[1];\n+\tmy $Chr = $cols[2];\n+\tmy $Position = $cols[3];\n+\n+\n+\t$chr{$Name} = $Chr;\n+\t$position{$Name} = $Position;\n+\t\n+\t### Modification 0.9.9\n+\tif ($Locus ne $Name){ \n+\t\t$chr{$Locus} = $Chr;\n+\t\t$position{$Locus} = $Position;\t\n+\t}\n+\t###\n+\t\n+}\n+close (MP);\n+\n+open(MA, $input_markers_file) or die("Can\'t open $input_markers_file\\n");\n+while (my $line=<MA>){\n+\tmy @cols = split (/\\s+/,$line);\n+\tfor (my $i=0;$i<=$#cols;$i++){\n+\t\tmy $current = $cols[$i];\n+\t\tchomp($current);\n+\t\tif ($current !~ /^\\s+$/){\n+\t\t\tpush(@list_marquer,$current);\n+\t\t}\n+\t}\n+}\n+close (MA);\n+\n+my %coord_by_chr;\n+for (my $i=0;$i<=$#list_marquer;$i++){\n+\tmy $current_name = $list_marquer[$i];\n+\tmy $current_chr = $chr{$current_name};\n+\tmy $current_position = $position{$current_name};\n+\t\n+\tif ($current_position =~ /^\\d+$/){\n+\t\tmy @tbl_coord_for_current_chr;\n+\t\tif ($coord_by_chr{$current_chr}){\n+\t\t\t@tbl_coord_for_current_chr = @{$coord_by_chr{$current_chr}};\n+\t\t}\n+\t\tpush(@tbl_coord_for_current_chr,$current_position);\n+\t\t$coord_by_chr{$current_chr}=\\@tbl_coord_for_current_chr;\n+\t}\n+\telsif (($current_position =~/\\s*-\\s*/)||($current_position =~/none/i)){\n+\t\t\n+\t}\n+\telse {\n+\t\tchomp($current_position);\n+\t\tprint STDERR "Error Parsing $current_name\\tposition not recognized : $current_position \\n";\n+\t\tprint $list_marquer[$i],"\\n";\n+\t}\n+}\n+\n+open(OS, ">$output_segment_file") or die ("Can\'t open $output_segment_file\\n");\n+\n+my @segment_chr;\n+my @segment_start;\n+my @segment_end;\n+\n+foreach my $key (sort keys %coord_by_chr){\n+\tmy @tbl_coord = @{$coord_by_chr{$key}};\n+\t@tbl_coord = sort { $a <=> $b } @tbl_coord;\n+\tmy $current_start;\n+\tmy $current_stop;\n+\tmy $current_start_with_offset;\n+\tmy $current_stop_with_offset;\n+\t\n+\tfor (my $i=0;$i<=$#tbl_coord;$i++){\n+\t\tif (!$current_start){$current_start=$tbl_coord[$i];$current_stop=$tbl_coord[$i]}\n+\t\t\n+\t\t# print "$i : $current_start / $current_stop\\n";\n+\t\tif ($tbl_coord[$i]>$current_stop+$WINDOW){\n+\t\t\t#OFFSET\n+\t\t\tif ($current_start>$OFFSET){$current_start_with_offset=$current_start-$OFFSET;}else{$current_start_with_offset=1;}\n+\t\t\t$current_stop_with_offset = $current_stop + $OFFSET;\n+\t\t\t#######\n+\t\t\tprint OS $key,":",$current_start_with_offset,"..",$current_stop_with_offset,"\\n";\n+\t\t\tpush(@segment_chr,$key);\n+\t\t\tpush(@segment_start,$'..b'ligne=~/^([ATGCNXatgcnx]+)\\s*$/){\n+\t\t\t\t$current_seq .= $1;\n+\t\t\t}\n+\t\t\telse {\n+\t\t\t\tprint STDERR "Erreur Parsing n\xc2\xb01\\n$ligne\\n";\n+\t\t\t}\n+\t\t}\n+\t}\n+\n+\t#TRAITEMENT DU DERNIER\n+\tif ($current_header){\n+\t\t$genome{$current_header} = $current_seq;\n+\t\tundef($current_seq);\n+\t}\n+\tclose (AF);\n+\n+\topen(OF, ">$output_fasta_file") or die ("Can\'t open $output_fasta_file\\n");\n+\tfor (my $i=0;$i<=$#segment_chr;$i++){\n+\t\tmy $compt=0;\n+\t\tmy $current_seq="";\n+\t\tprint OF ">",$segment_chr[$i],":",$segment_start[$i],"..",$segment_end[$i]."\\n";\n+\t\t### Modification 0.9.9\n+\t\tif ($segment_end[$i]>length($genome{$segment_chr[$i]})){\n+\t\t\t$segment_end[$i] = length($genome{$segment_chr[$i]});\n+\t\t}\n+\t\t###\n+\n+\t\tmy @SEQ = split(//,$genome{$segment_chr[$i]});\n+\t\tfor (my $coord = $segment_start[$i]-1; $coord<=$segment_end[$i]-1;$coord++){\n+\t\t\t$compt++;\n+\t\t\tif ($compt > 60 ){\n+\t\t\t\t$current_seq .= "\\n";\n+\t\t\t\t$compt=1;\n+\t\t\t}\n+\t\t\t$current_seq .= $SEQ[$coord];\n+\t\t\n+\t\t}\n+\t\tprint OF "$current_seq\\n";\n+\t}\n+\tclose (OF);\n+}\n+\n+### GENE and BLAST Extraction\n+my @blast_by_base;\n+my @header;\n+\n+\n+my @blastfiles = split(/\\,/,$input_blast_files);\n+for (my $i=0;$i<=$#blastfiles;$i++){\n+\tmy $current_blast_file = $blastfiles[$i];\n+\tmy $current_blast_header = "DEFAULT";\n+\tmy %current_blast; \n+\topen (B,"$current_blast_file") or die ("Can\'t open $current_blast_file\\n");\n+\twhile (my $line =<B>){\n+\t\tif ($line =~ /^\\#\\#(.*?)$/){\n+\t\t\t$current_blast_header = $1;\n+\t\t\tprint LF $current_blast_header."\\n";\n+\t\t}\n+\t\telsif ($line =~ /^\\#/){\n+\t\t\t# blast file column legend\n+\t\t}\n+\t\telse {\n+\t\t\tmy @fields = split(/\\s+/,$line);\n+\t\t\tmy $gene_id = $fields[0];\n+\t\t\tmy @blast_for_this_gene;\n+\t\t\tif ($current_blast{$gene_id}){\n+\t\t\t\t@blast_for_this_gene = @{$current_blast{$gene_id}};\n+\t\t\t}\t\t\t\n+\t\t\t\n+\t\t\tif ($#blast_for_this_gene<$MAX_BLAST_LINES-1){\n+\t\t\t\tpush(@blast_for_this_gene,$line);\n+\t\t\t\tprint LF $gene_id,"\\n";\n+\t\t\t}\n+\t\t\t$current_blast{$gene_id}=\\@blast_for_this_gene;\n+\t\t}\n+\t}\n+\tclose(B);\n+\tpush (@blast_by_base,\\%current_blast);\n+\tpush (@header,$current_blast_header);\n+}\n+\n+\n+open (OGL,">$output_genes_list_file") or die ("Can\'t open $output_genes_list_file\\n");\n+\n+for (my $i=0;$i<=$#segment_chr;$i++){\n+\tmy $segment_chr = $segment_chr[$i];\n+\tmy $segment_start = $segment_start[$i];\n+\tmy $segment_end = $segment_end[$i];\n+\n+\tprint OGL "#",$segment_chr[$i],":",$segment_start[$i],"..",$segment_end[$i],"\\n";\n+\t\n+\topen(IG, $input_genes_position_file) or die("Can\'t open $input_genes_position_file\\n");\n+\twhile (my $gene_desc=<IG>){\n+\t\tmy @gene_desc = split(/\\s+/,$gene_desc);\n+\t\tif ($#gene_desc != 4){\n+\t\t\tprint STDERR "Error in gene position file format\\n$gene_desc\\n";\n+\t\t\texit(0);\n+\t\t}\n+\t\tmy $gene_id = $gene_desc[0];\n+\t\tmy $cds_id = $gene_desc[1];\n+\t\tmy $gene_chr = $gene_desc[2];\n+\t\tmy $gene_start = $gene_desc[3];\n+\t\tmy $gene_end = $gene_desc[4];\n+\t\tif ($segment_chr eq $gene_chr){\n+\t\t\tif ((($gene_start>=$segment_start)&&($gene_start<=$segment_end))||(($gene_end>=$segment_start)&&($gene_end<=$segment_end))){\n+\t\t\t\tprint OGL $gene_id," / ",$cds_id,"\\n";\n+\t\t\t\t\n+\t\t\t\tfor (my $i=0;$i<=$#blast_by_base;$i++){\n+\t\t\t\t\t#print LF $header[$i]."\\n";\n+\t\t\t\t\tmy %current_blast = %{$blast_by_base[$i]};\n+\t\t\t\t\tif ($current_blast{$cds_id}){\n+\t\t\t\t\t\tmy @blast_by_gene = @{$current_blast{$cds_id}};\n+\t\t\t\t\t\t#print LF $#blast_by_gene."\\n";\n+\t\t\t\t\t\tfor (my $j=0;$j<=$#blast_by_gene;$j++){\n+\t\t\t\t\t\t\tmy @fields = split(/\\t/,$blast_by_gene[$j]);\n+\t\t\t\t\t\t\tprint OGL $header[$i],"\\t";\n+\t\t\t\t\t\t\tprint OGL $fields[1],"\\t";\n+\t\t\t\t\t\t\tprint OGL $fields[3],"\\t";\n+\t\t\t\t\t\t\tprint OGL $fields[4],"\\t";\n+\t\t\t\t\t\t\tprint OGL $fields[5],"\\t";\n+\t\t\t\t\t\t\tprint OGL $fields[10],"\\t";\n+\t\t\t\t\t\t\tprint OGL $fields[6],"..",$fields[7],"(",$fields[11],")","\\t";\n+\t\t\t\t\t\t\tprint OGL $fields[8],"..",$fields[9],"(",$fields[12],")","\\t";\n+\t\t\t\t\t\t\tprint OGL $fields[13];\n+\t\t\t\t\t\t}\n+\t\t\t\t\t\tprint OGL "\\n";\n+\t\t\t\t\t}\n+\t\t\t\t\telse {\n+\t\t\t\t\t\tprint OGL $header[$i],"\\t","No BLAST results\\n";\n+\t\t\t\t\t\tprint LF $gene_id," / ",$cds_id,"\\n";\n+\t\t\t\t\t}\n+\t\t\t\t}\n+\t\t\t}\n+\t\t}\n+\n+\t}\n+\tclose(IG);\n+}\n+\n+\n+close (OGL);\n+\n+close (LF);\n+\n' |
b |
diff -r c52e74b98773 -r 8dfa09868059 genephys/GenePhys.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genephys/GenePhys.xml Fri Oct 24 05:54:20 2014 -0400 |
b |
@@ -0,0 +1,48 @@ +<tool id="GenePhys" name="GenePhys" version="1.1"> +<description>Extract the genes underlying a genetic segment defined by genetic markers</description> +<command interpreter="perl"> + + #if $extractseq.do_extractseq=="YES" + GenePhys.pl -extractseq $extractseq.do_extractseq -input_assembly_file $input_assembly_file -input_markers_position_file $input_markers_position_file -input_markers_file $input_markers_file -window $window -offset $offset -output_segment_file $output_segment_file -output_fasta_file $output_fasta_file -log_file $log_file -input_blast_files $input_blast_files -max_blast_lines $max_blast_lines -input_genes_position_file $input_genes_position_file -output_genes_list_file $output_genes_list_file + #else + GenePhys.pl -input_markers_position_file $input_markers_position_file -input_markers_file $input_markers_file -window $window -offset $offset -output_segment_file $output_segment_file -output_fasta_file $output_fasta_file -log_file $log_file -input_blast_files $input_blast_files -max_blast_lines $max_blast_lines -input_genes_position_file $input_genes_position_file -output_genes_list_file $output_genes_list_file + #end if + + + + +</command> +<inputs> + <param name="input_markers_file" type="data" format="txt" label="Select a suitable input MARKERS file from your history"/> + <param name="input_markers_position_file" type="data" format="txt" label="Select a suitable input MARKERS POSITION file from your history"/> + <param name="input_genes_position_file" type="data" format="txt" label="Select a suitable input GENE POSITION file from your history"/> + <param name="input_blast_files" type="data" format="txt" multiple="true" label="Select a BLAST files from your history"/> + <param name="window" type="integer" value="200000" label="Maximum distance between markers of a segment (Threshold for splitting a segment)"/> + <param name="offset" type="integer" value="100000" label="Additionnal segment size in 5' and 3' (security marging)"/> + <param name="max_blast_lines" type="integer" value="3" label="Select the maximum number of BLAST best match to show"/> + + <conditional name="extractseq"> + <param name="do_extractseq" type="select" label="Extract genomic segment"> + <option value="NO">NO</option> + <option value="YES">YES</option> + </param> + <when value="YES"> + <param name="input_assembly_file" type="data" format="fasta" label="Select a suitable input ASSEMBLY file from your history"/> + </when> + <when value="NO"></when> + </conditional> + +</inputs> +<outputs> + <data name="output_segment_file" format="txt" label="SEGMENT ${tool.name} on ${on_string}"/> + <data name="output_fasta_file" format="fasta" label="FASTA ${tool.name} on ${on_string}"/> + <data name="output_genes_list_file" format="txt" label="GENES ${tool.name} on ${on_string}"/> + <data name="log_file" format="txt" label="LOG ${tool.name} on ${on_string}"/> +</outputs> + +<help> + + + +</help> +</tool> |
b |
diff -r c52e74b98773 -r 8dfa09868059 genephys/MergeBlastResults.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genephys/MergeBlastResults.pl Fri Oct 24 05:54:20 2014 -0400 |
[ |
b'@@ -0,0 +1,545 @@\n+#!/usr/bin/perl\n+#V1.0.3 header added\n+#V1.0.2 suppressed the final sort (very heavyload) and replaced it by another level of hash\n+#V1.0.1 added log, option parameters\n+use strict;\n+use warnings;\n+use Getopt::Long;\n+\n+my $inputblast;\n+my $outputjoin;\n+my $log_file;\n+my $MAX_OVERLAP_FRACTION = 0.5;\n+my $MAX_OVERLAP_LENGTH_IGNORED = 3;\n+my $VERBOSE = "OFF";\n+my $ALLOWED_GAP_FRACTION_FOR_MERGING = 0.3;\n+my $HEADER ="";\n+\n+GetOptions (\n+"input_blasttab_file=s" => \\$inputblast,\n+"output_joinmatch_file=s" => \\$outputjoin,\n+"log_file=s" => \\$log_file,\n+"header=s" => \\$HEADER,\n+"max_overlap_fraction=f" => \\$MAX_OVERLAP_FRACTION,\n+"max_overlap_length_ignored=i" =>\\$MAX_OVERLAP_LENGTH_IGNORED\n+) or die("Error in command line arguments\\n");\n+\n+open(IB, $inputblast) or die ("Can\'t open $inputblast \\n");\n+open (LF,">$log_file") or die("Can\'t open $log_file\\n");\n+\n+\n+my %match_by_query;\n+\n+my @query_keys;\n+my %querys;\n+\n+\n+my $stats_nb_match=0;\n+my $stats_included=0;\n+my $stats_large_overlapping=0;\n+my %stats_query_coverage;\n+$stats_query_coverage{"0-10%"}=0;\n+$stats_query_coverage{"10-20%"}=0;\n+$stats_query_coverage{"20-30%"}=0;\n+$stats_query_coverage{"30-40%"}=0;\n+$stats_query_coverage{"40-50%"}=0;\n+$stats_query_coverage{"50-60%"}=0;\n+$stats_query_coverage{"60-70%"}=0;\n+$stats_query_coverage{"70-80%"}=0;\n+$stats_query_coverage{"80-90%"}=0;\n+$stats_query_coverage{"90-100%"}=0;\n+\n+my $current_query="";\n+while (my $ligne = <IB>){\n+\tmy @fields = split (/\\t/,$ligne);\n+\tif ($#fields != 9){\n+\t\tprint STDERR "Invalid blasttab format, must have 10 columns\\n";\n+\t\texit(0);\n+\t}\n+\t$stats_nb_match++;\n+\tmy %match;\n+\t$match{"Query"}=$fields[0];\n+\tif (!$querys{$match{"Query"}}){\n+\t\tpush(@query_keys,$match{"Query"});\n+\t\t$querys{$match{"Query"}} = 1;\n+\t}\n+\t$match{"Subject_id"}=$fields[1];\n+\t$match{"Orientation"}="+";\n+\t$match{"Query_start"}=$fields[2];\n+\t$match{"Query_end"}=$fields[3];\n+\t$match{"Subject_start"}=$fields[4];\n+\t$match{"Subject_end"}=$fields[5];\n+\t\t\t\n+\n+\tif ($fields[2]>$fields[3]){\n+\t\t$match{"Query_start"}=$fields[3];\n+\t\t$match{"Query_end"}=$fields[2];\n+\t\t$match{"Orientation"}="-";\n+\t\t#print "- $ligne";\n+\t}\n+\t\n+\tif ($fields[4]>$fields[5]){\n+\t\t$match{"Subject_start"}=$fields[5];\n+\t\t$match{"Subject_end"}=$fields[4];\n+\t\t$match{"Orientation"}="-";\n+\t\t#print "- $ligne";\n+\t}\n+\t$match{"Similarity"}=$fields[6];\n+\t$match{"Query_length"}=$fields[7];\n+\t$match{"Subject_length"}=$fields[8];\n+\tchomp($fields[9]);\n+\t$match{"Subject"}=$fields[9];\n+\t\n+\t$match{"Ligne"}=$ligne;\n+\t\n+\tmy $querykey = $match{"Query"};\n+\tmy $key = $match{"Query"}."##".$match{"Subject"}."##".$match{"Orientation"};\n+\tif ($match{"Subject_length"}==0){\n+\t\tprint LF "Match 0",$ligne,"\\n",$match{"Subject_length"},"\\n";\n+\t}\t\n+\tmy %match_by_query_and_subject;\n+\tmy @match_table;\n+\n+\tif ($match_by_query{$querykey}){\n+\t\t%match_by_query_and_subject = %{$match_by_query{$querykey}};\n+\t}\n+\tif ($match_by_query_and_subject{$key}){\n+\t\t@match_table=@{$match_by_query_and_subject{$key}};\n+\t}\n+\n+\tpush (@match_table,\\%match);\n+\t$match_by_query_and_subject{$key} = \\@match_table;\n+\t$match_by_query{$querykey}= \\%match_by_query_and_subject;\n+}\n+\n+close (IB);\n+\n+#print LF "NB query : $#query_keys\\n";\n+#foreach my $querykey (sort @query_keys){\n+#\tmy %current_match_by_query_and_subject = %{$match_by_query{$querykey}};\n+#\tforeach my $key (sort {$a cmp $b} keys %current_match_by_query_and_subject){\n+#\t\tmy @current_match_table = sort sortbyquerycoord @{$current_match_by_query_and_subject{$key}};\n+#\t\tfor (my $i=0;$i<=$#current_match_table;$i++){\n+#\t\t\tmy %current_match = %{$current_match_table[$i]};\n+#\t\t\tprint LF $current_match{"Ligne"}."\\n";\n+#\t\t}\n+#\t}\n+#}\n+#exit(0);\n+\n+open (OJ, ">$outputjoin") or die ("Can\'t open $outputjoin \\n");\n+print OJ "##",$HEADER,"\\n";\n+print OJ "#Query\\tSubject_Id\\torientation\\tQuery_coverage\\tSubject_coverage\\tIdentity\\tmin_query\\tmax_query\\tmin_subject\\tmax_subject\\tNBmatch\\tq_length\\tsub_length\\tsubject\\n";\n+\t\n+foreach my $querykey (sort @query_keys){\n+\tmy %current_match'..b'elsif($Subject_coverage<0.7){$stats_query_coverage{"60-70%"}++;}\n+\t\telsif($Subject_coverage<0.8){$stats_query_coverage{"70-80%"}++;}\n+\t\telsif($Subject_coverage<0.9){$stats_query_coverage{"80-90%"}++;}\n+\t\telse{$stats_query_coverage{"90-100%"}++;}\n+\n+\t\tif ($VERBOSE eq "ON"){\n+\t\t\tprint LF "Final\\n";\n+\t\t\tprint LF $Query,"\\t",$Subject_Id,"\\t",$orientation,"\\t",$min_query,"\\t",$max_query,"\\t",$min_subject,"\\t",$max_subject,"\\t",$sub_length,"\\t";\n+\t\t\tprint LF "NB:",$nb_match,"\\t","O:",$overlap_length,"\\t","CQ:",$nb_covered_query,"\\t","CS:",$nb_covered_subject,"\\t",$Query_coverage,"\\t",$Subject_coverage,"\\t",$Identity,"\\n";\n+\n+\t\t}\n+\n+\t\tif ($subject=~/^(.*?)\\s*$/){\n+\t\t\t$subject = $1;\n+\t\t}\n+\t\tmy %current_match_joined;\n+\t\t$current_match_joined{"Query"}=$Query;\n+\t\t$current_match_joined{"Query_start"}=$min_query;\n+\t\t$current_match_joined{"Query_end"}=$max_query;\n+\t\t$current_match_joined{"Query_length"}=$q_length;\n+\t\t$current_match_joined{"QCoverage"} = $Query_coverage;\n+\t\t$current_match_joined{"Subject_id"}=$Subject_Id;\n+\t\t$current_match_joined{"Subject"}=$subject;\n+\t\t$current_match_joined{"Subject_start"}=$min_subject;\n+\t\t$current_match_joined{"Subject_end"}=$max_subject;\n+\t\t$current_match_joined{"Subject_length"}=$sub_length;\n+\t\t$current_match_joined{"SCoverage"} = $Subject_coverage;\n+\t\t$current_match_joined{"Similarity"}=$Identity;\n+\t\tmy $NBmatch = $nb_match-$overlap_length;\n+\t\t$current_match_joined{"Nbmatch"}=$NBmatch;\n+\t\t$current_match_joined{"Display"}="$Query\\t$Subject_Id\\t$orientation\\t$Query_coverage%\\t$Subject_coverage%\\t$Identity%\\t$min_query\\t$max_query\\t$min_subject\\t$max_subject\\t$NBmatch\\t$q_length\\t$sub_length\\t$subject";\n+\n+\t\tpush(@match_joined,\\%current_match_joined);\n+\t\t#print OJ $match_joined{"Display"},"\\n";\n+\t}\n+\tmy @match_joined_sorted = sort sortbyrelevanceandsubject @match_joined;\n+\tfor (my $i=0;$i<=$#match_joined_sorted;$i++){\n+\t\tmy %match = %{$match_joined_sorted[$i]};\n+\t\tprint OJ $match{"Display"},"\\n";\n+\t}\n+}\n+\n+\n+#my %all_match_joined_best;\n+\n+#foreach my $key (sort sortkey keys %all_match_joined){\n+#\tmy %match = %{$all_match_joined{$key}};\n+#\tprint OJ $match{"Display"},"\\n";\n+#}\n+\n+#close (OB);\n+close (OJ);\n+\n+\n+print LF "Nb query : $#query_keys\\n";\n+print LF "Nb match : $stats_nb_match\\n";\n+print LF "Nb match filtered included / too large overlap : $stats_included / $stats_large_overlapping \\n";\n+print LF "Query coverage\\n";\n+print LF "percent:\\t";\n+foreach my $key (sort {$a cmp $b} keys %stats_query_coverage) {\n+\tprint LF $key,"\\t";\n+}\n+print LF "\\n number :\\t";\n+foreach my $key (sort {$a cmp $b} keys %stats_query_coverage) {\n+\tprint LF $stats_query_coverage{$key},"\\t";\n+}\n+print LF "\\n";\n+\n+\n+close (LF);\n+\n+\n+# for (my $i=0;$i<=$#all_match_joined;$i++){\n+\t# my $match_joined = %{$all_match_joined[$i]};\n+\t# print $match_joined{"Query"},"\\t",$match_joined{"Subject"},"\\t",$match_joined{"Subject_id"},"\\t",$match_joined{"Similarity"},"\\t",$match_joined{"Query_length"},"\\t",$match_joined{"Subject_length"},"\\n";\n+# }\n+\n+\n+sub mysort{\n+\tmy %matcha=%{$a};\n+\tmy %matchb=%{$b};\n+\t\n+\t#print "TEST : ",$matcha{"Query_start"}, " / ", $matchb{"Query_start"},"\\n";\n+\t\n+\t$matcha{"Query_start"} <=> $matchb{"Query_start"}\n+\t||\n+\t$matcha{"Query_end"} <=> $matchb{"Query_end"}\n+\t\n+}\n+\n+sub sortbyquerycoord{\n+\tmy %matcha=%{$a};\n+\tmy %matchb=%{$b};\n+\t\n+\t#print "TEST : ",$matcha{"Query_start"}, " / ", $matchb{"Query_start"},"\\n";\n+\t\n+\t$matcha{"Query_start"} <=> $matchb{"Query_start"}\n+\t||\n+\t$matcha{"Query_end"} <=> $matchb{"Query_end"}\n+\t\n+}\n+\n+sub sortbyrelevanceandsubject{\n+\tmy %matcha=%{$a};\n+\tmy %matchb=%{$b};\n+\t\n+\t$matchb{"Nbmatch"} <=> $matcha{"Nbmatch"}\n+\t||\n+\t$matchb{"QCoverage"} <=> $matcha{"QCoverage"}\n+\t||\n+\t$matcha{"Subject"} cmp $matchb{"Subject"}\n+}\n+\n+\n+sub sortkey {\n+\tmy @fieldsa = split (/\\#/,$a);\n+\tmy @fieldsb = split (/\\#/,$b);\n+\t\n+\t#print "$a\\n$b\\n";\n+\t#print $fieldsa[0]," cmp ",$fieldsb[0],"\\n";\n+\t#exit(0);\n+\n+\t$fieldsa[0] cmp $fieldsb[0]\n+\t||\n+\t$fieldsa[1] cmp $fieldsb[1]\n+\t||\n+\t$fieldsa[2] <=> $fieldsb[2]\n+\n+}\n' |
b |
diff -r c52e74b98773 -r 8dfa09868059 genephys/MergeBlastResults.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genephys/MergeBlastResults.xml Fri Oct 24 05:54:20 2014 -0400 |
b |
@@ -0,0 +1,21 @@ +<tool id="MergeBlastResults" name="MergeBlastResults" version="1.03"> +<description>Parse Blast result (Tabular) to merge feature</description> +<command interpreter="perl"> + MergeBlastResults.pl -input_blasttab_file $input_blast -output_joinmatch_file $output_merge -log_file $log_file -max_overlap_fraction $max_overlap_fraction -max_overlap_length_ignored $max_overlap_length_ignored -header $header +</command> +<inputs> + <param name="input_blast" type="data" format="txt" label="Select a suitable input BLASTTAB (10 columns) file from your history"/> + <param name="max_overlap_fraction" type="float" value="0.5" label="Maximum overlap fraction between two match (other wise considered as duplicated)"/> + <param name="max_overlap_length_ignored" type="integer" value="3" label="Maximum overlap length ignored"/> + <param name="header" type="text" value="" label="Header for the blast file"/> +</inputs> +<outputs> + <data name="output_merge" format="txt" label="${tool.name} MERGE on ${on_string}"/> + <data name="log_file" format="txt" label="${tool.name} LOG on ${on_string}"/> +</outputs> + +<help> + +</help> +</tool> + |
b |
diff -r c52e74b98773 -r 8dfa09868059 genephys/extractgenesfromsegment.pl --- a/genephys/extractgenesfromsegment.pl Wed Aug 20 12:42:40 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,223 +0,0 @@ -#!/usr/bin/perl -use strict; - -my $input_gene_file = $ARGV[0]; -my $input_segment_file = $ARGV[1]; -my $output_seq_nuc = $ARGV[2]; -my $output_seq_prot = $ARGV[3]; - -open(IG, $input_gene_file) or die("Can't open $input_gene_file\n"); -open(IS, $input_segment_file) or die("Can't open $input_segment_file\n"); -open (ON,">$output_seq_nuc") or die ("Can't open $output_seq_nuc\n"); -open (OP,">$output_seq_prot") or die ("Can't open $output_seq_prot\n"); - -my $current_annotation=""; -my @gene_annotation; -my @list_gene; -my @current_gene; -my %current_gene_annotation; - -# while ((my $line=<IG>)&&($#list_gene<5)){ -while (my $line=<IG>){ - if ($line =~/\<Gene\>/){ - if (@current_gene){ - my %current_gene_annotation = %{&extract_annotation(\@current_gene)}; - - - push(@list_gene,\%current_gene_annotation); - undef @current_gene; - } - push (@current_gene,$line); - - } - else { - push (@current_gene,$line); - } -} -close(IG); - -# for (my $i=0;$i<=$#list_gene;$i++){ - # my %current_gene_annotation = %{$list_gene[$i]}; - # foreach my $key (keys %current_gene_annotation){ - # print "TEST ",$key,"\t",$current_gene_annotation{$key},"\n"; - # } -# } - -# my @segment_chr; -# my @segment_start; -# my @segment_stop; - -while (my $line=<IS>){ - print "\n$line"; - if ($line =~/(.*?)\:(\d+)\.\.(\d+)/){ - my $chr = $1; - my $start = $2; - my $stop = $3; - - my @list_gene_selected = @{&extract_gene_from_position($chr,$start,$stop,\@list_gene)}; - - if ($#list_gene_selected>=0){ - for (my $i=0;$i<=$#list_gene_selected;$i++){ - my %current = %{$list_gene_selected[$i]},"\n"; - print $current{"00 BN_Id"},"\t",$current{"01 BN_Position"},"\t",$current{"02 ATH_Function"},"\t",$current{"03 ATH_Id"},"\n"; - - my $seq = $current{"04 Sequence"}; - my $formated_seq; - my @SEQ = split(//,$seq); - my $compt_seq=0; - for (my $i=0;$i<=$#SEQ;$i++){ - if ($SEQ[$i] =~ /[ATGNCXatgcnx]/){ - if ($compt_seq == 60){ - $formated_seq .="\n"; - $compt_seq=0; - } - $formated_seq.= $SEQ[$i]; - $compt_seq ++; - } - } - print ON ">",$current{"01 BN_Position"}," (",$current{"00 BN_Id"},")","\n",$formated_seq,"\n"; - - my $prot = $current{"05 Protein"}; - my $formated_prot; - my @PROT = split(//,$prot); - my $compt_prot=0; - for (my $i=0;$i<=$#PROT;$i++){ - if ($PROT[$i] =~ /[A-Za-z\*\+]/){ - if ($compt_prot == 60){ - $formated_prot .="\n"; - $compt_prot=0; - } - $formated_prot.= $PROT[$i]; - $compt_prot ++; - } - } - print OP ">",$current{"01 BN_Position"}," (",$current{"00 BN_Id"},")","\n",$formated_prot,"\n"; - - # foreach my $key (sort keys %current){ - # print " ",$key,"\t",$current{$key},"\n"; - # } - # print "\n"; - } - } - else { - print " NO GENE FOUND\n"; - } - } - else { - print "Error Parsing n°2 : $line\n"; - } -} - -close (IS); - -close (ON); -close (OP); - - -# my @list_gene_selected = @{&extract_gene_from_position("chrA01",1437,3000,\@list_gene)}; - - -# for (my $i=0;$i<=$#list_gene_selected;$i++){ - # my %current = %{$list_gene_selected[$i]},"\n"; - # foreach my $key (keys %current){ - # print $key,"\t",$current{$key},"\n"; - # } -# } - - - - - - - -sub extract_annotation{ - my $ref = shift; - my @gene = @$ref; - my %gene_annotation; - for (my $i=0;$i<=$#gene;$i++){ - #print "TEST : $gene[$i]\n"; - if ($gene[$i]=~/\<Id\>(.*?)\<\/Id\>/){ - $gene_annotation{"00 BN_Id"} = $1; - } - elsif ($gene[$i]=~/\<Position\>(.*?)\<\/Position\>/){ - $gene_annotation{"01 BN_Position"} = $1; - } - elsif ($gene[$i]=~/\<ATH_Function\>(.*?)\<\/ATH_Function\>/){ - $gene_annotation{"02 ATH_Function"} = $1; - } - elsif ($gene[$i]=~/\<SId\>(.*?)\<\/SId\>/){ - $gene_annotation{"03 ATH_Id"} = $1; - } - elsif ($gene[$i]=~/\<CDS_Sequence\>(.*?)\<\/CDS_Sequence\>/){ #modif 1.11 - $gene_annotation{"04 Sequence"} = $1; - } - elsif ($gene[$i]=~/\<Protein\>(.*?)\<\/Protein\>/){ - $gene_annotation{"05 Protein"} = $1; - # print "TEST : $1\n"; - # exit (0); - } - } - if ((!$gene_annotation{"00 BN_Id"})||(!$gene_annotation{"01 BN_Position"})||(!$gene_annotation{"04 Sequence"})||(!$gene_annotation{"05 Protein"})){ - - print "Erreur Parsing n°3\n"; - print "Id :",$gene_annotation{"00 BN_Id"},"\n"; - print "Position : ",$gene_annotation{"01 BN_Position"},"\n"; - print "ATH Function : ",$gene_annotation{"02 ATH_Function"},"\n"; - print "ATH Id : ",$gene_annotation{"03 ATH_Id"},"\n"; - print "CDS seq : ",$gene_annotation{"04 Sequence"},"\n"; - print "CDS prot : ",$gene_annotation{"05 Protein"},"\n"; - for (my $i=0;$i<=$#gene;$i++){ - print $gene[$i],"\n"; - } - - exit(0); - - } - - return \%gene_annotation; -} - - -sub extract_gene_from_position{ - my $chr = shift; - my $start = shift; - my $end = shift; - - my $ref = shift; - my @list_gene = @$ref; - my @list_gene_selected; - - for (my $i=0;$i<=$#list_gene;$i++){ - my %current_gene_annotation = %{$list_gene[$i]}; - my $current_position = $current_gene_annotation{"01 BN_Position"}; - my $current_chr; - my $current_start; - my $current_end; - - #Extraction de la position - if ($current_position =~ /^(.*?)\:(\d+)[\.]+(\d+)/){ # modif 1.11 - $current_chr = $1; - $current_start = $2; - $current_end = $3; - if ($current_start > $current_end){ - ($current_start,$current_end) = ($current_end,$current_start); - } - } - else { - print "Erreur Parsing n°1\npos : $current_position\n"; - exit(0); - } - #Test de selection - if ($chr eq $current_chr){ - if ( - ($current_end>=$start)&&($current_end<=$end) || - ($current_start>=$start)&&($current_start<=$end) - ) - { - push(@list_gene_selected,$list_gene[$i]); - } - } - - } - return \@list_gene_selected; -} \ No newline at end of file |
b |
diff -r c52e74b98773 -r 8dfa09868059 genephys/extractgenesfromsegment.xml --- a/genephys/extractgenesfromsegment.xml Wed Aug 20 12:42:40 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,19 +0,0 @@ -<tool id="extractgenesfromsegment" name="extractgenesfromsegment" version="0.01"> -<description>Extract gene sequence (nucleic, proteic) and function</description> -<command interpreter="perl"> - extractgenesfromsegment.pl $input_genexml $input_segment $output_gene_nuc $output_gene_prot > $output_gene_function -</command> -<inputs> - <param name="input_genexml" type="data" format="xml" label="Select a suitable input GENEXML file from your history"/> - <param name="input_segment" type="data" format="txt" label="Select a suitable input SEGMENT file from your history"/> -</inputs> -<outputs> - <data name="output_gene_nuc" format="fasta" label="${tool.name} NUC on ${on_string}"/> - <data name="output_gene_prot" format="fasta" label="${tool.name} PROT on ${on_string}"/> - <data name="output_gene_function" format="txt" label="${tool.name} FUNCTION on ${on_string}"/> -</outputs> - -<help> - -</help> -</tool> |
b |
diff -r c52e74b98773 -r 8dfa09868059 genephys/extractgenomicsegment.pl --- a/genephys/extractgenomicsegment.pl Wed Aug 20 12:42:40 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,176 +0,0 @@ -#!/usr/bin/perl -#V1.10 -use strict; - - -my $inputfile1 = $ARGV[0]; -my $inputfile2 = $ARGV[1]; -my $WINDOW = $ARGV[2]; -my $OFFSET = $ARGV[3]; - -if (!$WINDOW){$WINDOW = 200000;} -if (!$OFFSET){$OFFSET = 100000;} - -open(IF1, $inputfile1) or die("Can't open $inputfile1\n"); -open(IF2, $inputfile2) or die("Can't open $inputfile2\n"); -my $current_annotation=""; -my @list_marquer; -my %chr; -my %position; - -# print "$inputfile2\n"; - -while (my $line=<IF1>){ - my @cols = split(/\t/,$line); - my %current; - # Number#Map#Name#Chr#Position#GeneAT#FunctionAT - - my $Number = $cols[0]; - my $Map = $cols[2]; - my $Name = $cols[7]; - my $Locus = $cols[8]; - my $Chr = $cols[19]; - my $Position = $cols[20]; - $Position =~ s/\s+//g; - my $GeneAT=$cols[32]; - my $FunctionAT=$cols[37]; - $chr{$Name} = $Chr; - $position{$Name} = $Position; - - ### Modification 1.10 - if ($Locus ne $Name){ - $chr{$Locus} = $Chr; - $position{$Locus} = $Position; - } - ### - - #print "$Number#$Map#$Name#$Chr#$Position#$GeneAT#$FunctionAT\n"; -} -close (IF1); - -# my @key = keys(%chr); -# for (my $i=0;$i<=$#key;$i++){ - # print $key[$i],"\n"; -# } - -while (my $line=<IF2>){ - my @cols = split (/\s+/,$line); - for (my $i=0;$i<=$#cols;$i++){ - my $current = $cols[$i]; - chomp($current); - if ($current !~ /^\s+$/){ - push(@list_marquer,$current); - } - } -} -close (IF2); - -my %coord_by_chr; -for (my $i=0;$i<=$#list_marquer;$i++){ - my $current_name = $list_marquer[$i]; - my $current_chr = $chr{$current_name}; - my $current_position = $position{$current_name}; - - if ($current_position =~ /^\d+$/){ - my @tbl_coord_for_current_chr; - if ($coord_by_chr{$current_chr}){ - @tbl_coord_for_current_chr = @{$coord_by_chr{$current_chr}}; - } - push(@tbl_coord_for_current_chr,$current_position); - $coord_by_chr{$current_chr}=\@tbl_coord_for_current_chr; - } - elsif (($current_position eq "-")||($current_position =~/none/i)){ - - } - else { - chomp($current_position); - #$current_position =~ s/\s+//g; - print STDERR "Error Parsing $current_name\tposition not recognized : $current_position \n"; - print $list_marquer[$i],"\n"; - #exit(0); - } -} - -# foreach my $key (keys %coord_by_chr){ - # my @tbl_coord = @{$coord_by_chr{$key}}; - # print "\n$key\n"; - # @tbl_coord = sort { $a <=> $b } @tbl_coord; - # for (my $i=0;$i<=$#tbl_coord;$i++){ - # print $tbl_coord[$i],"\n"; - # } -# } - -foreach my $key (sort keys %coord_by_chr){ - my @tbl_coord = @{$coord_by_chr{$key}}; - # print "TEST : $key\n"; - @tbl_coord = sort { $a <=> $b } @tbl_coord; - my $current_start; - my $current_stop; - my $current_start_offset; - my $current_stop_offset; - - - for (my $i=0;$i<=$#tbl_coord;$i++){ - if (!$current_start){$current_start=$tbl_coord[$i];$current_stop=$tbl_coord[$i]} - - # print "$i : $current_start / $current_stop\n"; - if ($tbl_coord[$i]>$current_stop+$WINDOW){ - #OFFSET - if ($current_start>$OFFSET){$current_start_offset=$current_start-$OFFSET;}else{$current_start_offset=1;} - $current_stop_offset = $current_stop + $OFFSET; - ####### - print $key,":",$current_start_offset,"..",$current_stop_offset,"\n"; - - $current_start = $tbl_coord[$i]; - $current_stop = $tbl_coord[$i]; - - if ($i==$#tbl_coord){ - #OFFSET - if ($current_start>$OFFSET){$current_start_offset=$current_start-$OFFSET;}else{$current_start_offset=1;} - $current_stop_offset = $current_stop + $OFFSET; - ####### - print $key,":",$current_start_offset,"..",$current_stop_offset,"\n"; - } - } - else { - $current_stop=$tbl_coord[$i]; - if ($i==$#tbl_coord){ - #OFFSET - if ($current_start>$OFFSET){$current_start_offset=$current_start-$OFFSET;}else{$current_start_offset=1;} - $current_stop_offset = $current_stop + $OFFSET; - ####### - print $key,":",$current_start_offset,"..",$current_stop_offset,"\n"; - } - } - } -} -#Traitement du dernier - -# if ($#tbl_coord == 0){ - # print $key,":",$tbl_coord[$i],"\n"; -# } -# else { - # if ($i==0){ - # push (@current_table,$tbl_coord[$i]); - # } - # else { - # if ($tbl_coord[$i]>$current_table[$#current_table]+$WINDOW){ - # print $key,":",$current_table[0],":",$current_table[$#current_table],"\n"; - # undef @current_table; - # push (@current_table,$tbl_coord[$i]); - # } - # else { - # push (@current_table,$tbl_coord[$i]); - # } - # } -# } - - -# print "\n"; -# foreach my $key (keys %coord_by_chr){ - # print "\n$key\n"; - # @tbl_coord = sort { $a <=> $b } @tbl_coord; - # for (my $i=0;$i<=$#tbl_coord;$i++){ - # print $tbl_coord[$i],"\n"; - # } -# } |
b |
diff -r c52e74b98773 -r 8dfa09868059 genephys/extractgenomicsegment.xml --- a/genephys/extractgenomicsegment.xml Wed Aug 20 12:42:40 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,21 +0,0 @@ -<tool id="extractgenomicsegment" name="extractgenomicsegment" version="0.01"> -<description>Extract the coordinate of genomic segment containing the genetic markers</description> -<command interpreter="perl"> - extractgenomicsegment.pl $input_geneticmap $input_markers $window $offset > $output_file -</command> -<inputs> -<param name="input_markers" type="data" format="txt" label="Select a suitable input MARKERS file from your history"/> -<param name="input_geneticmap" type="data" format="txt" label="Select a suitable input GENETIC MAP file from your history"/> -<param name="window" type="integer" value="200000" label="Maximum distance between markers of a segment (Threshold for splitting a segment)"/> -<param name="offset" type="integer" value="100000" label="Additionnal segment size in 5' and 3' (security marging)"/> -</inputs> -<outputs> - <data name="output_file" format="fasta" label="${tool.name} on ${on_string}"/> -</outputs> - -<help> - - - -</help> -</tool> |
b |
diff -r c52e74b98773 -r 8dfa09868059 genephys/extractgenomicsequencefromsegment.pl --- a/genephys/extractgenomicsequencefromsegment.pl Wed Aug 20 12:42:40 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,90 +0,0 @@ -#!/usr/bin/perl -#V1.10 -my $inputsegment = $ARGV[0]; -my $inputfasta = $ARGV[1]; - -open(IS, $inputsegment) or die ("Can't open $inputsegment\n"); -open(IF, $inputfasta) or die ("Can't open $inputfasta\n"); - - -my @header; -my @start; -my @end; -my @segment_header; - -while (my $ligne = <IS>){ - if ($ligne=~/(.*?):(\d+)\.+(\d+)/){ - push (@header,$1); - push (@start,$2); - push (@end,$3); - push (@segment_header,$1.":".$2."..".$3); - } -} - -close (IS); - -#print "TEST : $#header\n"; - -my %genome; - -my $current_header; -my $current_seq=""; -while (my $ligne = <IF>){ - if ($ligne =~ /^\>(.*?)\s*$/){ - if ($current_header){ - $genome{$current_header} = $current_seq; - } - - # my $length = length($current_seq); - # print "TEST : $current_header\t$length\n"; - # print "TEST : $current_header\n"; - $current_header=$1; - $current_seq = ""; - $current_position=0; - } - else { - if ($ligne=~/^([ATGCNXatgcnx]+)\s*$/){ - $current_seq .= $1; - } - else { - print STDERR "Erreur Parsing n°1\n$ligne\n"; - } - } -} - -#TRAITEMENT DU DERNIER -if ($current_header){ - $genome{$current_header} = $current_seq; - undef($current_seq); -} - -# foreach my $key (keys %genome){ - # print $key,"\t",length($genome{$key}),"\n"; -# } - -for (my $i=0;$i<=$#header;$i++){ - my $compt=0; - my $current_seq=""; - print ">",$header[$i],":",$start[$i],"..",$end[$i],"\n"; - ### Modification 1.10 - if ($end[$i]>length($genome{$header[$i]})){ - $end[$i] = length($genome{$header[$i]}); - } - ### - - my @SEQ = split(//,$genome{$header[$i]}); - for (my $coord = $start[$i]-1; $coord<=$end[$i]-1;$coord++){ - $compt++; - # print "TEST : $compt\n"; - if ($compt > 60 ){ - $current_seq .= "\n"; - $compt=1; - } - $current_seq .= $SEQ[$coord]; - - } - print "$current_seq\n"; -} - -close (IF); - |
b |
diff -r c52e74b98773 -r 8dfa09868059 genephys/extractgenomicsequencefromsegment.xml --- a/genephys/extractgenomicsequencefromsegment.xml Wed Aug 20 12:42:40 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,19 +0,0 @@ -<tool id="extractgenomicsequencefromsegment" name="extractgenomicsequencefromsegment" version="0.01"> -<description>Extract the genomic sequence corresponding to a genomic segment (format : chr:start..stop)</description> -<command interpreter="perl"> - extractgenomicsequencefromsegment.pl $input_segment $input_assembly > $output_file -</command> -<inputs> -<param name="input_segment" type="data" format="txt" label="Select a suitable input SEGMENT file from your history"/> -<param name="input_assembly" type="data" format="fasta" label="Select a suitable input ASSEMBLY file from your history"/> -</inputs> -<outputs> - <data name="output_file" format="fasta" label="${tool.name} on ${on_string}"/> -</outputs> - -<help> - - - -</help> -</tool> |
b |
diff -r c52e74b98773 -r 8dfa09868059 genephys/fastaGroomerForMakeBlastdb.pl --- a/genephys/fastaGroomerForMakeBlastdb.pl Wed Aug 20 12:42:40 2014 -0400 +++ b/genephys/fastaGroomerForMakeBlastdb.pl Fri Oct 24 05:54:20 2014 -0400 |
[ |
@@ -1,4 +1,5 @@ #!/usr/bin/perl +#V1.0.0 my $inputfasta = $ARGV[0]; open(IB, $inputfasta) or die ("Can't open $inputfasta \n"); |
b |
diff -r c52e74b98773 -r 8dfa09868059 genephys/fastaGroomerForMakeBlastdb.xml --- a/genephys/fastaGroomerForMakeBlastdb.xml Wed Aug 20 12:42:40 2014 -0400 +++ b/genephys/fastaGroomerForMakeBlastdb.xml Fri Oct 24 05:54:20 2014 -0400 |
b |
@@ -1,4 +1,4 @@ -<tool id="fastaGroomerForMakeBlastdb" name="fastaGroomerForMakeBlastdb" version="0.01"> +<tool id="fastaGroomerForMakeBlastdb" name="fastaGroomerForMakeBlastdb" version="1.00"> <description>fasta Groomer For MakeBlastdb</description> <command interpreter="perl"> fastaGroomerForMakeBlastdb.pl $input_fasta > $output_fasta |
b |
diff -r c52e74b98773 -r 8dfa09868059 genephys/mergeAllBestBlast.pl --- a/genephys/mergeAllBestBlast.pl Wed Aug 20 12:42:40 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,47 +0,0 @@ -#!/usr/bin/perl -my $inputblastn = $ARGV[0]; -my $inputtblastx = $ARGV[1]; -my $inputblastx = $ARGV[2]; -my $inputblastp = $ARGV[3]; - -open(IN, $inputblastn) or die ("Can't open $inputblastn \n"); -open(ITX, $inputtblastx) or die ("Can't open $inputtblastx \n"); -open(IX, $inputblastx) or die ("Can't open $inputblastx \n"); -open(IP, $inputblastp) or die ("Can't open $inputblastp \n"); - -my %blastx; -my %tblastx; -my %blastp; - -while (my $ligne = <ITX>){ - my @fields = split (/\t/,$ligne); - chomp($ligne); - $tblastx{$fields[0]} = $ligne; -} -close (ITX); - -while (my $ligne = <IX>){ - my @fields = split (/\t/,$ligne); - chomp($ligne); - $blastx{$fields[0]} = $ligne; -} -close (IX); - -while (my $ligne = <IP>){ - my @fields = split (/\t/,$ligne); - chomp($ligne); - $blastp{$fields[0]} = $ligne; -} -close (IP); - - -while (my $ligne = <IN>){ - my @fields = split (/\t/,$ligne); - my $query = $fields[0]; - print "BLASTN\t$ligne"; - print "TBLASTX\t",$tblastx{$query},"\n"; - print "BLASTX\t",$blastx{$query},"\n"; - print "BLASTP\t",$blastp{$query},"\n\n"; - -} -close (IN); |
b |
diff -r c52e74b98773 -r 8dfa09868059 genephys/mergeAllBestBlast.xml --- a/genephys/mergeAllBestBlast.xml Wed Aug 20 12:42:40 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,19 +0,0 @@ -<tool id="mergeAllBestBlast" name="mergeAllBestBlast" version="0.01"> -<description>Merge best results from Blast</description> -<command interpreter="perl"> - mergeAllBestBlast.pl $input_blastn $input_tblastx $input_blastx $input_blastp > $output_results -</command> -<inputs> - <param name="input_blastn" type="data" format="txt" label="Select a suitable input BEST BLASTN file from your history"/> - <param name="input_tblastx" type="data" format="txt" label="Select a suitable input BEST TBLASTX file from your history"/> - <param name="input_blastx" type="data" format="txt" label="Select a suitable input BEST BLASTX file from your history"/> - <param name="input_blastp" type="data" format="txt" label="Select a suitable input BEST BLASTP file from your history"/> -</inputs> -<outputs> - <data name="output_results" format="txt" label="${tool.name} on ${on_string}"/> -</outputs> - -<help> - -</help> -</tool> |
b |
diff -r c52e74b98773 -r 8dfa09868059 genephys/parseblasttab.pl --- a/genephys/parseblasttab.pl Wed Aug 20 12:42:40 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,511 +0,0 @@\n-#!/usr/bin/perl\n-my $inputblast = $ARGV[0];\n-my $outputjoin = $ARGV[1];\n-my $outputbest = $ARGV[2];\n-open(IB, $inputblast) or die ("Can\'t open $inputblast \\n");\n-open (OJ, ">$outputjoin") or die ("Can\'t open $outputjoin \\n");\n-open (OB, ">$outputbest") or die ("Can\'t open $outputbest \\n");\n-\n-my %all_match;\n-my @all_match_joined;\n-\n-\n-my $MAX_OVERLAP_FRACTION = 0.5;\n-my $MAX_OVERLAP_LENGTH_IGNORED = 3;\n-\n-\n-while (my $ligne = <IB>){\n-\tmy @fields = split (/\\t/,$ligne);\n-\tmy %match;\n-\t$match{"Query"}=$fields[0];\n-\t$match{"Subject_id"}=$fields[1];\n-\t$match{"Subject_start"}=$fields[8];\n-\t$match{"Subject_end"}=$fields[9];\n-\t$match{"Similarity"}=$fields[13];\n-\t$match{"Query_length"}=$fields[14];\n-\t$match{"Subject_length"}=$fields[15];\n-\t$match{"Subject"}=$fields[16];\n-\t\n-\tif ($fields[6]<=$fields[7]){\n-\t\t$match{"Query_start"}=$fields[6];\n-\t\t$match{"Query_end"}=$fields[7];\n-\t\t$match{"Orientation"}="+";\n-\t\t#print "+ $ligne";\n-\t}\n-\telse {\n-\t\t$match{"Query_start"}=$fields[7];\n-\t\t$match{"Query_end"}=$fields[6];\n-\t\t$match{"Orientation"}="-";\n-\t\t#print "- $ligne";\n-\t}\n-\t\n-\tif ($fields[9]<=$fields[8]){\n-\t\t$match{"Subject_start"}=$fields[9];\n-\t\t$match{"Subject_end"}=$fields[8];\n-\t\t$match{"Orientation"}="+";\n-\t\t#print "+ $ligne";\n-\t}\n-\telse {\n-\t\t$match{"Subject_start"}=$fields[8];\n-\t\t$match{"Subject_end"}=$fields[9];\n-\t\t$match{"Orientation"}="-";\n-\t\t#print "- $ligne";\n-\t}\n-\t\n-\t$match{"Ligne"}=$ligne;\n-\tmy $key = $match{"Query"}."##".$match{"Subject"}."##".$match{"Orientation"};\n-\tif ($match{"Subject_length"}==0){\n-\t\tprint $ligne,"\\n",$match{"Subject_length"},"\\n";\n-\t}\t\n-\tmy @match_table;\n-\n-\tif ($all_match{$key}){\n-\t\t@match_table = @{$all_match{$key}};\n-\t}\n-\tpush (@match_table,\\%match);\n-\t$all_match{$key} = \\@match_table;\n-}\n-\n-foreach my $key (keys %all_match){\n-\tmy @match_table = @{$all_match{$key}};\n-\t#### Sort\n-\t@match_table = sort mysort @match_table;\n-\t\n-\t\n-\tmy @duplicate;\n-\tmy @overlap;\n-\tfor (my $i=0;$i<=$#match_table;$i++){\n-\t\tpush (@duplicate,0);\n-\t}\n-\tprint "\\nTable Match ($#match_table)\\n";\n-\tfor (my $i=0;$i<=$#match_table;$i++){\n-\t\tmy %match=%{$match_table[$i]};\n-\t\tprint $match{"Query"},"\\t",$match{"Subject_id"},"\\t",$match{"Orientation"},"\\t",$match{"Query_start"},"\\t",$match{"Query_end"},"\\t";\n-\t\tprint $match{"Subject_start"},"\\t",$match{"Subject_end"},"\\t",$match{"Subject_length"},"\\t",$match{"Similarity"},"\\n";\n-\t}\n-\t\n-\t#Scan d\'inclusion strict\n-\tfor (my $i=0;$i<=$#match_table;$i++){\n-\t\tmy %match1=%{$match_table[$i]};\n-\t\tfor (my $j=0;$j<=$#match_table;$j++){\n-\t\t\tif (($j != $i)&&($duplicate[$j]==0)){ # On scan dans les deux sens, pas seuelment $j = $i+1 a cause du last;\n-\t\t\t\tmy %match2=%{$match_table[$j]};\n-\t\t\t\t# Inclus Subject\n-\t\t\t\tif (($match1{"Subject_start"}>=$match2{"Subject_start"})&&($match1{"Subject_end"}<=$match2{"Subject_end"}))\n-\t\t\t\t{\n-\t\t\t\t\t$duplicate[$i]=1;\n-\t\t\t\t\t# print $i," : 1 : ",$match1{"Query"},"\\t",$match1{"Subject_id"},"\\t",$match1{"Query_start"},"\\t",$match1{"Query_end"},"\\t",$match1{"Subject_start"},"\\t",$match1{"Subject_end"},"\\n";\n-\t\t\t\t\t# print $j," : 1 : ",$match2{"Query"},"\\t",$match2{"Subject_id"},"\\t",$match2{"Query_start"},"\\t",$match2{"Query_end"},"\\t",$match2{"Subject_start"},"\\t",$match2{"Subject_end"},"\\n";\n-\t\n-\t\t\t\t\tlast;\n-\t\t\t\t}\n-\t\t\t\t# Inclus Query\n-\t\t\t\telsif (($match1{"Query_start"}>=$match2{"Query_start"})&&($match1{"Query_end"}<=$match2{"Query_end"}))\n-\t\t\t\t{\n-\t\t\t\t\t$duplicate[$i]=2;\n-\t\t\t\t\t# print $i," : 2 : ",$match1{"Query"},"\\t",$match1{"Subject_id"},"\\t",$match1{"Query_start"},"\\t",$match1{"Query_end"},"\\t",$match1{"Subject_start"},"\\t",$match1{"Subject_end"},"\\n";\n-\t\t\t\t\t# print $j," : 2 : ",$match2{"Query"},"\\t",$match2{"Subject_id"},"\\t",$match2{"Query_start"},"\\t",$match2{"Query_end"},"\\t",$match2{"Subject_start"},"\\t",$match2{"Subject_end"},"\\n";\n-\t\t\t\t\tlast;\n-\t\t\t\t}\n-\n-\t\t\t}\n-\t\t}\n-\t}\n-\t\n-\tmy @match_table_filtered;\n-\tfor (my $i=0;$i<=$#match_table;$i++){\n-\t\tif ($duplicate[$i] == 0){\n-\t\t\tpush (@match_table_filtered,$match_table[$i]);\n-\t\t}\n-\t}\n-\t\n-\tif ($#match_table > $#m'..b'ntf("%.2f",$nb_covered_subject*100/$sub_length);\n-\t$Query_coverage = sprintf("%.2f",$nb_covered_query*100/$q_length);\n-\t\n-\tprint "Final\\n";\n-\tprint $Query,"\\t",$Subject_Id,"\\t",$orientation,"\\t",$min_query,"\\t",$max_query,"\\t",$min_subject,"\\t",$max_subject,"\\t",$sub_length,"\\t";\n-\tprint "NB:",$nb_match,"\\t","O:",$overlap_length,"\\t","CQ:",$nb_covered_query,"\\t","CS:",$nb_covered_subject,"\\t",$Query_coverage,"\\t",$Subject_coverage,"\\t",$Identity,"\\n";\n-\n-\tif ($subject=~/^(.*?)\\s*$/){\n-\t\t$subject = $1;\n-\t}\n-\t\n-\tmy %match_joined;\n-\t$match_joined{"Query"}=$Query;\n-\t$match_joined{"Query_start"}=$min_query;\n-\t$match_joined{"Query_end"}=$max_query;\n-\t$match_joined{"Query_length"}=$q_length;\n-\t$match_joined{"QCoverage"} = $Query_coverage;\n-\t$match_joined{"Subject_id"}=$Subject_Id;\n-\t$match_joined{"Subject"}=$subject;\n-\t$match_joined{"Subject_start"}=$min_subject;\n-\t$match_joined{"Subject_end"}=$max_subject;\n-\t$match_joined{"Subject_length"}=$sub_length;\n-\t$match_joined{"SCoverage"} = $Subject_coverage;\n-\t$match_joined{"Similarity"}=$Identity;\n-\t$match_joined{"Nbmatch"}=$nb_match-$overlap_length;\n-\t$match_joined{"Display"}="$Query\\t$Subject_Id\\t$orientation\\t$Query_coverage%\\t$Subject_coverage%\\t$Identity%\\t$min_query\\t$max_query\\t$min_subject\\t$max_subject\\t$q_length\\t$sub_length\\t$subject";\n-\t\n-\tmy $chr;\n-\tmy $start;\n-\tmy $end;\n-\t\n-\tif ($match_joined{"Query"}=~/(.*?)\\:(\\d+)[\\.]+(\\d+)/){\n-\t\t$chr =$1;\n-\t\t$start = $2;\n-\t\t$end = $3;\n-\t\t\n-\t}\n-\telse {\n-\t\tprint "Error Parsing Query : ",$match_joined{"Query"},"\\n";\n-\t\texit(0);\n-\t}\n-\t\n-\tmy $subid = $match_joined{"Subject_id"};\n-\tmy $nb = $nb_match-$overlap_length;\n-\t\n-\tmy $key = "$chr#$start#$end#$nb#$subid";\n-\t$all_match_joined{$key} = \\%match_joined;\n-\t\n-\t\n-\t# my %match_joined;\n-\t# my $nb_covered=0;\n-\t# my $length=0;\n-\t# for (my $i=0;$i<=$#match_table;$i++){\n-\t\t# my %match=%{$match_table[$i]};\n-\t\t# $nb_covered+=$match{"Similarity"};\n-\t\t# $length = $match{"Subject_length"}\n-\t# }\n-\t# # if ($match{"Subject_length"} == 0){\n-\t\t# # print $key,"\\n",$match{"Ligne"},"\\n",$match{"Subject"},"\\n";\n-\t\t# # exit(0);\n-\t# # }\n-\t# my $similarity = sprintf("%.2f",$nb_covered / $length);\n-\t\n-\t# print "TEST : ",$key,"\\t",$similarity,"\\t",$nb_covered,"\\t",$length,"\\n";\n-\t\n-\t\n-\t# if ($similarity > 1){\n-\t\t# for (my $i=0;$i<=$#match_table;$i++){\n-\t\t\t# my %match=%{$match_table[$i]};\n-\t\t\t# print "----- : ",$match{"Ligne"},"\\n";\n-\t\t\t# exit(0);\n-\t\t# }\t\n-\t# }\n-\t# $match_joined{"Query"}=$match{"Query"};\n-\t# $match_joined{"Subject"}=$match{"Subject"};\n-\t# $match_joined{"Subject_id"}=$match{"Subject_id"};\n-\t# $match_joined{"Similarity"}=$similarity;\n-\t# $match_joined{"Query_length"}=$match{"Query_length"};\n-\t# $match_joined{"Subject_length"}=$match{"Subject_length"};\n-\t# push(@all_match_joined,\\%match_joined);\n-\n-}\n-\n-close (IB);\n-\n-my %all_match_joined_best;\n-foreach my $key (sort sortkey keys %all_match_joined){\n-\tmy %match = %{$all_match_joined{$key}};\n-\tprint OJ $match{"Display"},"\\n";\n-\tmy $shortkey = $match{"Query"};\n-\tif ($all_match_joined_best{$shortkey}){\n-\t}\n-\telse {\n-\t\t$all_match_joined_best{$shortkey} = \\%match;\n-\t\tprint OB $match{"Display"},"\\n";\n-\t}\n-\t\n-}\n-\n-# for (my $i=0;$i<=$#all_match_joined;$i++){\n-\t# my $match_joined = %{$all_match_joined[$i]};\n-\t# print $match_joined{"Query"},"\\t",$match_joined{"Subject"},"\\t",$match_joined{"Subject_id"},"\\t",$match_joined{"Similarity"},"\\t",$match_joined{"Query_length"},"\\t",$match_joined{"Subject_length"},"\\n";\n-# }\n-\n-\n-sub mysort{\n-\tmy %matcha=%{$a};\n-\tmy %matchb=%{$b};\n-\t\n-\t#print "TEST : ",$matcha{"Query_start"}, " / ", $matchb{"Query_start"},"\\n";\n-\t\n-\t$matcha{"Query_start"} <=> $matchb{"Query_start"}\n-\t||\n-\t$matcha{"Query_end"} <=> $matchb{"Query_end"}\n-\t\n-}\n-\n-sub sortkey {\n-\tmy @fieldsa = split (/\\#/,$a);\n-\tmy @fieldsb = split (/\\#/,$b);\n-\t\n-\t#print "$a\\n$b\\n";\n-\t#print $fieldsa[0]," cmp ",$fieldsb[0],"\\n";\n-\t#exit(0);\n-\n-\t$fieldsa[0] cmp $fieldsb[0]\n-\t||\n-\t$fieldsa[1] <=> $fieldsb[1]\n-\t||\n-\t$fieldsb[2] <=> $fieldsa[2]\n-\t||\n-\t$fieldsb[3] <=> $fieldsa[3]\n-}\n' |
b |
diff -r c52e74b98773 -r 8dfa09868059 genephys/parseblasttab.xml --- a/genephys/parseblasttab.xml Wed Aug 20 12:42:40 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,17 +0,0 @@ -<tool id="parseblasttab" name="parseblasttab" version="0.01"> -<description>Parse Blast result (Tabular) to merge feature</description> -<command interpreter="perl"> - parseblasttab.pl $input_blast $output_merge $output_best -</command> -<inputs> - <param name="input_blast" type="data" format="txt" label="Select a suitable input BLASTTAB () file from your history"/> -</inputs> -<outputs> - <data name="output_merge" format="txt" label="${tool.name} MERGE on ${on_string}"/> - <data name="output_best" format="txt" label="${tool.name} BEST on ${on_string}"/> -</outputs> - -<help> - -</help> -</tool> |
b |
diff -r c52e74b98773 -r 8dfa09868059 genephys/repository_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genephys/repository_dependencies.xml Fri Oct 24 05:54:20 2014 -0400 |
b |
@@ -0,0 +1,4 @@ +<?xml version="1.0"?> +<repositories description="GenePhys repository"> + <repository changeset_revision="623f727cdff1" name="ncbi_blast_plus" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" /> +</repositories> |