# HG changeset patch # User mcharles # Date 1408018326 14400 # Node ID ad321ff1b67da995e577ef6f5936c33c31c0d012 # Parent 1d37c16ab67423ed12a7c33a9b46eba99d450e32 Uploaded diff -r 1d37c16ab674 -r ad321ff1b67d rapsodyn/Galaxy-Workflow-Pipeline_v0.1.ga --- a/rapsodyn/Galaxy-Workflow-Pipeline_v0.1.ga Tue Aug 12 08:41:47 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,491 +0,0 @@ -{ - "a_galaxy_workflow": "true", - "annotation": "", - "format-version": "0.1", - "name": "Pipeline v0.1", - "steps": { - "0": { - "annotation": "", - "id": 0, - "input_connections": {}, - "inputs": [ - { - "description": "", - "name": "Input Dataset" - } - ], - "name": "Input dataset", - "outputs": [], - "position": { - "left": 200, - "top": 310 - }, - "tool_errors": null, - "tool_id": null, - "tool_state": "{\"name\": \"Input Dataset\"}", - "tool_version": null, - "type": "data_input", - "user_outputs": [] - }, - "1": { - "annotation": "", - "id": 1, - "input_connections": {}, - "inputs": [ - { - "description": "", - "name": "Input Dataset" - } - ], - "name": "Input dataset", - "outputs": [], - "position": { - "left": 205, - "top": 430 - }, - "tool_errors": null, - "tool_id": null, - "tool_state": "{\"name\": \"Input Dataset\"}", - "tool_version": null, - "type": "data_input", - "user_outputs": [] - }, - "2": { - "annotation": "", - "id": 2, - "input_connections": {}, - "inputs": [ - { - "description": "", - "name": "Input Dataset" - } - ], - "name": "Input dataset", - "outputs": [], - "position": { - "left": 904, - "top": 218 - }, - "tool_errors": null, - "tool_id": null, - "tool_state": "{\"name\": \"Input Dataset\"}", - "tool_version": null, - "type": "data_input", - "user_outputs": [] - }, - "3": { - "annotation": "", - "id": 3, - "input_connections": { - "input_file": { - "id": 0, - "output_name": "output" - } - }, - "inputs": [], - "name": "FASTQ Groomer", - "outputs": [ - { - "name": "output_file", - "type": "fastqsanger" - } - ], - "position": { - "left": 354, - "top": 310 - }, - "post_job_actions": { - "HideDatasetActionoutput_file": { - "action_arguments": {}, - "action_type": "HideDatasetAction", - "output_name": "output_file" - } - }, - "tool_errors": null, - "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastq_groomer/fastq_groomer/1.0.4", - "tool_state": "{\"__page__\": 0, \"input_file\": \"null\", \"input_type\": \"\\\"illumina\\\"\", \"__rerun_remap_job_id__\": null, \"options_type\": \"{\\\"options_type_selector\\\": \\\"basic\\\", \\\"__current_case__\\\": 0}\", \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\"}", - "tool_version": "1.0.4", - "type": "tool", - "user_outputs": [] - }, - "4": { - "annotation": "", - "id": 4, - "input_connections": { - "input_file": { - "id": 1, - "output_name": "output" - } - }, - "inputs": [], - "name": "FASTQ Groomer", - "outputs": [ - { - "name": "output_file", - "type": "fastqsanger" - } - ], - "position": { - "left": 354, - "top": 430 - }, - "post_job_actions": { - "HideDatasetActionoutput_file": { - "action_arguments": {}, - "action_type": "HideDatasetAction", - "output_name": "output_file" - } - }, - "tool_errors": null, - "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastq_groomer/fastq_groomer/1.0.4", - "tool_state": "{\"__page__\": 0, \"input_file\": \"null\", \"input_type\": \"\\\"illumina\\\"\", \"__rerun_remap_job_id__\": null, \"options_type\": \"{\\\"options_type_selector\\\": \\\"basic\\\", \\\"__current_case__\\\": 0}\", \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\"}", - "tool_version": "1.0.4", - "type": "tool", - "user_outputs": [] - }, - "5": { - "annotation": "", - "id": 5, - "input_connections": { - "readtype|input_paired1": { - "id": 3, - "output_name": "output_file" - }, - "readtype|input_paired2": { - "id": 4, - "output_name": "output_file" - } - }, - "inputs": [], - "name": "Sickle", - "outputs": [ - { - "name": "output_single", - "type": "input" - }, - { - "name": "output_paired1", - "type": "fastq" - }, - { - "name": "output_paired2", - "type": "fastq" - }, - { - "name": "output_paired_single", - "type": "fastq" - } - ], - "position": { - "left": 574, - "top": 310 - }, - "post_job_actions": { - "HideDatasetActionoutput_paired1": { - "action_arguments": {}, - "action_type": "HideDatasetAction", - "output_name": "output_paired1" - }, - "HideDatasetActionoutput_paired2": { - "action_arguments": {}, - "action_type": "HideDatasetAction", - "output_name": "output_paired2" - }, - "HideDatasetActionoutput_paired_single": { - "action_arguments": {}, - "action_type": "HideDatasetAction", - "output_name": "output_paired_single" - }, - "HideDatasetActionoutput_single": { - "action_arguments": {}, - "action_type": "HideDatasetAction", - "output_name": "output_single" - } - }, - "tool_errors": null, - "tool_id": "testtoolshed.g2.bx.psu.edu/repos/jjohnson/sickle/sickle/1.0.0", - "tool_state": "{\"__page__\": 0, \"length_threshold\": \"\\\"30\\\"\", \"no_five_prime\": \"\\\"False\\\"\", \"discard_n\": \"\\\"False\\\"\", \"__rerun_remap_job_id__\": null, \"readtype\": \"{\\\"single_or_paired\\\": \\\"pe\\\", \\\"input_paired1\\\": null, \\\"input_paired2\\\": null, \\\"__current_case__\\\": 1}\", \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"qual_threshold\": \"\\\"30\\\"\"}", - "tool_version": "1.0.0", - "type": "tool", - "user_outputs": [] - }, - "6": { - "annotation": "", - "id": 6, - "input_connections": { - "readtype|input_paired1": { - "id": 5, - "output_name": "output_paired1" - }, - "readtype|input_paired2": { - "id": 5, - "output_name": "output_paired2" - } - }, - "inputs": [], - "name": "Sickle", - "outputs": [ - { - "name": "output_single", - "type": "input" - }, - { - "name": "output_paired1", - "type": "fastq" - }, - { - "name": "output_paired2", - "type": "fastq" - }, - { - "name": "output_paired_single", - "type": "fastq" - } - ], - "position": { - "left": 794, - "top": 310 - }, - "post_job_actions": { - "HideDatasetActionoutput_paired1": { - "action_arguments": {}, - "action_type": "HideDatasetAction", - "output_name": "output_paired1" - }, - "HideDatasetActionoutput_paired2": { - "action_arguments": {}, - "action_type": "HideDatasetAction", - "output_name": "output_paired2" - }, - "HideDatasetActionoutput_paired_single": { - "action_arguments": {}, - "action_type": "HideDatasetAction", - "output_name": "output_paired_single" - }, - "HideDatasetActionoutput_single": { - "action_arguments": {}, - "action_type": "HideDatasetAction", - "output_name": "output_single" - } - }, - "tool_errors": null, - "tool_id": "testtoolshed.g2.bx.psu.edu/repos/jjohnson/sickle/sickle/1.0.0", - "tool_state": "{\"__page__\": 0, \"length_threshold\": \"\\\"30\\\"\", \"no_five_prime\": \"\\\"False\\\"\", \"discard_n\": \"\\\"True\\\"\", \"__rerun_remap_job_id__\": null, \"readtype\": \"{\\\"single_or_paired\\\": \\\"pe\\\", \\\"input_paired1\\\": null, \\\"input_paired2\\\": null, \\\"__current_case__\\\": 1}\", \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"qual_threshold\": \"\\\"30\\\"\"}", - "tool_version": "1.0.0", - "type": "tool", - "user_outputs": [] - }, - "7": { - "annotation": "", - "id": 7, - "input_connections": { - "genomeSource|ownFile": { - "id": 2, - "output_name": "output" - }, - "paired|input1": { - "id": 6, - "output_name": "output_paired1" - }, - "paired|input2": { - "id": 6, - "output_name": "output_paired2" - } - }, - "inputs": [], - "name": "Map with BWA for Illumina", - "outputs": [ - { - "name": "output", - "type": "sam" - } - ], - "position": { - "left": 1135, - "top": 363 - }, - "post_job_actions": {}, - "tool_errors": null, - "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/bwa_wrappers/bwa_wrapper/1.2.3", - "tool_state": "{\"genomeSource\": \"{\\\"refGenomeSource\\\": \\\"history\\\", \\\"ownFile\\\": null, \\\"__current_case__\\\": 1}\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"paired\": \"{\\\"input2\\\": null, \\\"sPaired\\\": \\\"paired\\\", \\\"input1\\\": null, \\\"__current_case__\\\": 1}\", \"params\": \"{\\\"__current_case__\\\": 0, \\\"source_select\\\": \\\"pre_set\\\"}\", \"suppressHeader\": \"\\\"False\\\"\", \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\"}", - "tool_version": "1.2.3", - "type": "tool", - "user_outputs": [] - }, - "8": { - "annotation": "", - "id": 8, - "input_connections": { - "input_sam_file": { - "id": 7, - "output_name": "output" - } - }, - "inputs": [], - "name": "filtersamunique", - "outputs": [ - { - "name": "output_file", - "type": "sam" - } - ], - "position": { - "left": 1250, - "top": 664.5 - }, - "post_job_actions": { - "HideDatasetActionoutput_file": { - "action_arguments": {}, - "action_type": "HideDatasetAction", - "output_name": "output_file" - } - }, - "tool_errors": null, - "tool_id": "filtersamunique", - "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_sam_file\": \"null\"}", - "tool_version": "0.01", - "type": "tool", - "user_outputs": [] - }, - "9": { - "annotation": "", - "id": 9, - "input_connections": { - "input1": { - "id": 8, - "output_name": "output_file" - } - }, - "inputs": [], - "name": "Filter SAM or BAM", - "outputs": [ - { - "name": "output1", - "type": "sam" - } - ], - "position": { - "left": 1378, - "top": 398.5 - }, - "post_job_actions": {}, - "tool_errors": null, - "tool_id": "toolshed.g2.bx.psu.edu/repos/jjohnson/samtools_filter/samtools_filter/1.1.1", - "tool_state": "{\"__page__\": 0, \"bed_file\": \"null\", \"input1\": \"null\", \"__rerun_remap_job_id__\": null, \"read_group\": \"\\\"\\\"\", \"library\": \"\\\"\\\"\", \"regions\": \"\\\"\\\"\", \"header\": \"\\\"-h\\\"\", \"flag\": \"{\\\"filter\\\": \\\"yes\\\", \\\"skipBits\\\": null, \\\"__current_case__\\\": 1, \\\"reqBits\\\": \\\"0x0002\\\"}\", \"mapq\": \"\\\"\\\"\"}", - "tool_version": "1.1.1", - "type": "tool", - "user_outputs": [] - }, - "10": { - "annotation": "", - "id": 10, - "input_connections": { - "source|input1": { - "id": 9, - "output_name": "output1" - }, - "source|ref_file": { - "id": 2, - "output_name": "output" - } - }, - "inputs": [], - "name": "SAM-to-BAM", - "outputs": [ - { - "name": "output1", - "type": "bam" - } - ], - "position": { - "left": 1707, - "top": 342 - }, - "post_job_actions": { - "HideDatasetActionoutput1": { - "action_arguments": {}, - "action_type": "HideDatasetAction", - "output_name": "output1" - } - }, - "tool_errors": null, - "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/sam_to_bam/sam_to_bam/1.1.4", - "tool_state": "{\"source\": \"{\\\"index_source\\\": \\\"history\\\", \\\"ref_file\\\": null, \\\"input1\\\": null, \\\"__current_case__\\\": 1}\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__page__\": 0}", - "tool_version": "1.1.4", - "type": "tool", - "user_outputs": [] - }, - "11": { - "annotation": "", - "id": 11, - "input_connections": { - "reference_source|input_bams_0|input_bam": { - "id": 10, - "output_name": "output1" - }, - "reference_source|ref_file": { - "id": 2, - "output_name": "output" - } - }, - "inputs": [], - "name": "MPileup", - "outputs": [ - { - "name": "output_mpileup", - "type": "pileup" - }, - { - "name": "output_log", - "type": "txt" - } - ], - "position": { - "left": 1938, - "top": 195 - }, - "post_job_actions": { - "HideDatasetActionoutput_log": { - "action_arguments": {}, - "action_type": "HideDatasetAction", - "output_name": "output_log" - } - }, - "tool_errors": null, - "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/samtools_mpileup/samtools_mpileup/0.0.3", - "tool_state": "{\"__page__\": 0, \"genotype_likelihood_computation_type\": \"{\\\"genotype_likelihood_computation_type_selector\\\": \\\"do_not_perform_genotype_likelihood_computation\\\", \\\"__current_case__\\\": 1}\", \"__rerun_remap_job_id__\": null, \"advanced_options\": \"{\\\"max_reads_per_bam\\\": \\\"250\\\", \\\"advanced_options_selector\\\": \\\"advanced\\\", \\\"extended_BAQ_computation\\\": \\\"False\\\", \\\"region_string\\\": \\\"\\\", \\\"output_per_sample_strand_bias_p_value\\\": \\\"False\\\", \\\"minimum_base_quality\\\": \\\"30\\\", \\\"disable_probabilistic_realignment\\\": \\\"False\\\", \\\"skip_anomalous_read_pairs\\\": \\\"False\\\", \\\"minimum_mapping_quality\\\": \\\"0\\\", \\\"output_per_sample_read_depth\\\": \\\"False\\\", \\\"__current_case__\\\": 0, \\\"position_list\\\": null, \\\"coefficient_for_downgrading\\\": \\\"0\\\"}\", \"reference_source\": \"{\\\"ref_file\\\": null, \\\"reference_source_selector\\\": \\\"history\\\", \\\"input_bams\\\": [{\\\"__index__\\\": 0, \\\"input_bam\\\": null}], \\\"__current_case__\\\": 1}\", \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\"}", - "tool_version": "0.0.3", - "type": "tool", - "user_outputs": [] - }, - "12": { - "annotation": "", - "id": 12, - "input_connections": { - "input_file": { - "id": 11, - "output_name": "output_mpileup" - } - }, - "inputs": [], - "name": "mpileupfilter", - "outputs": [ - { - "name": "output_file", - "type": "pileup" - } - ], - "position": { - "left": 2258, - "top": 286.5 - }, - "post_job_actions": {}, - "tool_errors": null, - "tool_id": "mpileupfilter", - "tool_state": "{\"__page__\": 0, \"input_file\": \"null\", \"min_frequency\": \"\\\"80.0\\\"\", \"min_depth\": \"\\\"2\\\"\", \"__rerun_remap_job_id__\": null, \"min_forward_and_reverse\": \"\\\"0\\\"\", \"max_depth\": \"\\\"100\\\"\", \"min_distance\": \"\\\"0\\\"\"}", - "tool_version": "0.05", - "type": "tool", - "user_outputs": [] - } - } -} \ No newline at end of file diff -r 1d37c16ab674 -r ad321ff1b67d rapsodyn/Galaxy-Workflow-Rapsodyn_v1.3.ga --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rapsodyn/Galaxy-Workflow-Rapsodyn_v1.3.ga Thu Aug 14 08:12:06 2014 -0400 @@ -0,0 +1,632 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "", + "format-version": "0.1", + "name": "Rapsodyn v1.3", + "steps": { + "0": { + "annotation": "", + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "READ1" + } + ], + "name": "Input dataset", + "outputs": [], + "position": { + "left": 320, + "top": 201 + }, + "tool_errors": null, + "tool_id": null, + "tool_state": "{\"name\": \"READ1\"}", + "tool_version": null, + "type": "data_input", + "user_outputs": [] + }, + "1": { + "annotation": "", + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "READ2" + } + ], + "name": "Input dataset", + "outputs": [], + "position": { + "left": 320, + "top": 321 + }, + "tool_errors": null, + "tool_id": null, + "tool_state": "{\"name\": \"READ2\"}", + "tool_version": null, + "type": "data_input", + "user_outputs": [] + }, + "2": { + "annotation": "", + "id": 2, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "ASSEMBLY" + } + ], + "name": "Input dataset", + "outputs": [], + "position": { + "left": 320, + "top": 441 + }, + "tool_errors": null, + "tool_id": null, + "tool_state": "{\"name\": \"ASSEMBLY\"}", + "tool_version": null, + "type": "data_input", + "user_outputs": [] + }, + "3": { + "annotation": "", + "id": 3, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "DUBIOUS POSITION" + } + ], + "name": "Input dataset", + "outputs": [], + "position": { + "left": 320, + "top": 561 + }, + "tool_errors": null, + "tool_id": null, + "tool_state": "{\"name\": \"DUBIOUS POSITION\"}", + "tool_version": null, + "type": "data_input", + "user_outputs": [] + }, + "4": { + "annotation": "", + "id": 4, + "input_connections": { + "input_file": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [], + "name": "FASTQ Groomer", + "outputs": [ + { + "name": "output_file", + "type": "fastqsanger" + } + ], + "position": { + "left": 540, + "top": 201 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastq_groomer/fastq_groomer/1.0.4", + "tool_state": "{\"__page__\": 0, \"input_file\": \"null\", \"input_type\": \"\\\"illumina\\\"\", \"__rerun_remap_job_id__\": null, \"options_type\": \"{\\\"options_type_selector\\\": \\\"basic\\\", \\\"__current_case__\\\": 0}\", \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\"}", + "tool_version": "1.0.4", + "type": "tool", + "user_outputs": [] + }, + "5": { + "annotation": "", + "id": 5, + "input_connections": { + "input_file": { + "id": 1, + "output_name": "output" + } + }, + "inputs": [], + "name": "FASTQ Groomer", + "outputs": [ + { + "name": "output_file", + "type": "fastqsanger" + } + ], + "position": { + "left": 540, + "top": 321 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/fastq_groomer/fastq_groomer/1.0.4", + "tool_state": "{\"__page__\": 0, \"input_file\": \"null\", \"input_type\": \"\\\"illumina\\\"\", \"__rerun_remap_job_id__\": null, \"options_type\": \"{\\\"options_type_selector\\\": \\\"basic\\\", \\\"__current_case__\\\": 0}\", \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\"}", + "tool_version": "1.0.4", + "type": "tool", + "user_outputs": [] + }, + "6": { + "annotation": "", + "id": 6, + "input_connections": { + "input_fasta": { + "id": 2, + "output_name": "output" + } + }, + "inputs": [], + "name": "fastaGroomerForMakeBlastdb", + "outputs": [ + { + "name": "output_fasta", + "type": "fasta" + } + ], + "position": { + "left": 540, + "top": 441 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "fastaGroomerForMakeBlastdb", + "tool_state": "{\"input_fasta\": \"null\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__page__\": 0}", + "tool_version": "0.01", + "type": "tool", + "user_outputs": [] + }, + "7": { + "annotation": "", + "id": 7, + "input_connections": { + "readtype|input_paired1": { + "id": 4, + "output_name": "output_file" + }, + "readtype|input_paired2": { + "id": 5, + "output_name": "output_file" + } + }, + "inputs": [], + "name": "Sickle", + "outputs": [ + { + "name": "output_single", + "type": "input" + }, + { + "name": "output_paired1", + "type": "fastq" + }, + { + "name": "output_paired2", + "type": "fastq" + }, + { + "name": "output_paired_single", + "type": "fastq" + } + ], + "position": { + "left": 760, + "top": 201 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "testtoolshed.g2.bx.psu.edu/repos/jjohnson/sickle/sickle/1.0.0", + "tool_state": "{\"__page__\": 0, \"length_threshold\": \"\\\"30\\\"\", \"no_five_prime\": \"\\\"False\\\"\", \"discard_n\": \"\\\"False\\\"\", \"__rerun_remap_job_id__\": null, \"readtype\": \"{\\\"single_or_paired\\\": \\\"pe\\\", \\\"input_paired1\\\": null, \\\"input_paired2\\\": null, \\\"__current_case__\\\": 1}\", \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"qual_threshold\": \"\\\"30\\\"\"}", + "tool_version": "1.0.0", + "type": "tool", + "user_outputs": [] + }, + "8": { + "annotation": "", + "id": 8, + "input_connections": { + "input_file": { + "id": 6, + "output_name": "output_fasta" + } + }, + "inputs": [], + "name": "NCBI BLAST+ makeblastdb", + "outputs": [ + { + "name": "outfile", + "type": "data" + } + ], + "position": { + "left": 760, + "top": 321 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_makeblastdb/0.1.00", + "tool_state": "{\"__page__\": 0, \"mask_data_file\": \"null\", \"input_file\": \"null\", \"dbtype\": \"\\\"nucl\\\"\", \"__rerun_remap_job_id__\": null, \"hash_index\": \"\\\"True\\\"\", \"tax\": \"{\\\"taxselect\\\": \\\"\\\", \\\"__current_case__\\\": 0}\", \"title\": \"\\\"\\\"\", \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"parse_seqids\": \"\\\"False\\\"\"}", + "tool_version": "0.1.00", + "type": "tool", + "user_outputs": [] + }, + "9": { + "annotation": "", + "id": 9, + "input_connections": { + "readtype|input_paired1": { + "id": 7, + "output_name": "output_paired1" + }, + "readtype|input_paired2": { + "id": 7, + "output_name": "output_paired2" + } + }, + "inputs": [], + "name": "Sickle", + "outputs": [ + { + "name": "output_single", + "type": "input" + }, + { + "name": "output_paired1", + "type": "fastq" + }, + { + "name": "output_paired2", + "type": "fastq" + }, + { + "name": "output_paired_single", + "type": "fastq" + } + ], + "position": { + "left": 980, + "top": 201 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "testtoolshed.g2.bx.psu.edu/repos/jjohnson/sickle/sickle/1.0.0", + "tool_state": "{\"__page__\": 0, \"length_threshold\": \"\\\"30\\\"\", \"no_five_prime\": \"\\\"False\\\"\", \"discard_n\": \"\\\"True\\\"\", \"__rerun_remap_job_id__\": null, \"readtype\": \"{\\\"single_or_paired\\\": \\\"pe\\\", \\\"input_paired1\\\": null, \\\"input_paired2\\\": null, \\\"__current_case__\\\": 1}\", \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"qual_threshold\": \"\\\"30\\\"\"}", + "tool_version": "1.0.0", + "type": "tool", + "user_outputs": [] + }, + "10": { + "annotation": "", + "id": 10, + "input_connections": { + "genomeSource|ownFile": { + "id": 2, + "output_name": "output" + }, + "paired|input1": { + "id": 9, + "output_name": "output_paired1" + }, + "paired|input2": { + "id": 9, + "output_name": "output_paired2" + } + }, + "inputs": [], + "name": "Map with BWA for Illumina", + "outputs": [ + { + "name": "output", + "type": "sam" + } + ], + "position": { + "left": 1200, + "top": 201 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/bwa_wrappers/bwa_wrapper/1.2.3", + "tool_state": "{\"genomeSource\": \"{\\\"refGenomeSource\\\": \\\"history\\\", \\\"ownFile\\\": null, \\\"__current_case__\\\": 1}\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"paired\": \"{\\\"input2\\\": null, \\\"sPaired\\\": \\\"paired\\\", \\\"input1\\\": null, \\\"__current_case__\\\": 1}\", \"params\": \"{\\\"__current_case__\\\": 0, \\\"source_select\\\": \\\"pre_set\\\"}\", \"suppressHeader\": \"\\\"False\\\"\", \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\"}", + "tool_version": "1.2.3", + "type": "tool", + "user_outputs": [] + }, + "11": { + "annotation": "", + "id": 11, + "input_connections": { + "input_sam_file": { + "id": 10, + "output_name": "output" + } + }, + "inputs": [], + "name": "filtersamunique", + "outputs": [ + { + "name": "output_file", + "type": "sam" + } + ], + "position": { + "left": 1420, + "top": 201 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "filtersamunique", + "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"input_sam_file\": \"null\"}", + "tool_version": "0.01", + "type": "tool", + "user_outputs": [] + }, + "12": { + "annotation": "", + "id": 12, + "input_connections": { + "input1": { + "id": 11, + "output_name": "output_file" + } + }, + "inputs": [], + "name": "Filter SAM or BAM", + "outputs": [ + { + "name": "output1", + "type": "sam" + } + ], + "position": { + "left": 1640, + "top": 201 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/jjohnson/samtools_filter/samtools_filter/1.1.1", + "tool_state": "{\"__page__\": 0, \"bed_file\": \"null\", \"input1\": \"null\", \"__rerun_remap_job_id__\": null, \"read_group\": \"\\\"\\\"\", \"library\": \"\\\"\\\"\", \"regions\": \"\\\"\\\"\", \"header\": \"\\\"-h\\\"\", \"flag\": \"{\\\"filter\\\": \\\"yes\\\", \\\"skipBits\\\": null, \\\"__current_case__\\\": 1, \\\"reqBits\\\": \\\"0x0002\\\"}\", \"mapq\": \"\\\"\\\"\", \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\"}", + "tool_version": "1.1.1", + "type": "tool", + "user_outputs": [] + }, + "13": { + "annotation": "", + "id": 13, + "input_connections": { + "source|input1": { + "id": 12, + "output_name": "output1" + }, + "source|ref_file": { + "id": 2, + "output_name": "output" + } + }, + "inputs": [], + "name": "SAM-to-BAM", + "outputs": [ + { + "name": "output1", + "type": "bam" + } + ], + "position": { + "left": 1860, + "top": 201 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/sam_to_bam/sam_to_bam/1.1.4", + "tool_state": "{\"source\": \"{\\\"index_source\\\": \\\"history\\\", \\\"ref_file\\\": null, \\\"input1\\\": null, \\\"__current_case__\\\": 1}\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__page__\": 0}", + "tool_version": "1.1.4", + "type": "tool", + "user_outputs": [] + }, + "14": { + "annotation": "", + "id": 14, + "input_connections": { + "reference_source|input_bams_0|input_bam": { + "id": 13, + "output_name": "output1" + }, + "reference_source|ref_file": { + "id": 2, + "output_name": "output" + } + }, + "inputs": [], + "name": "MPileup", + "outputs": [ + { + "name": "output_mpileup", + "type": "pileup" + }, + { + "name": "output_log", + "type": "txt" + } + ], + "position": { + "left": 2080, + "top": 201 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/samtools_mpileup/samtools_mpileup/0.0.3", + "tool_state": "{\"__page__\": 0, \"genotype_likelihood_computation_type\": \"{\\\"genotype_likelihood_computation_type_selector\\\": \\\"do_not_perform_genotype_likelihood_computation\\\", \\\"__current_case__\\\": 1}\", \"__rerun_remap_job_id__\": null, \"advanced_options\": \"{\\\"max_reads_per_bam\\\": \\\"250\\\", \\\"advanced_options_selector\\\": \\\"advanced\\\", \\\"extended_BAQ_computation\\\": \\\"False\\\", \\\"region_string\\\": \\\"\\\", \\\"output_per_sample_strand_bias_p_value\\\": \\\"False\\\", \\\"minimum_base_quality\\\": \\\"30\\\", \\\"disable_probabilistic_realignment\\\": \\\"False\\\", \\\"skip_anomalous_read_pairs\\\": \\\"False\\\", \\\"minimum_mapping_quality\\\": \\\"0\\\", \\\"output_per_sample_read_depth\\\": \\\"False\\\", \\\"__current_case__\\\": 0, \\\"position_list\\\": null, \\\"coefficient_for_downgrading\\\": \\\"0\\\"}\", \"reference_source\": \"{\\\"ref_file\\\": null, \\\"reference_source_selector\\\": \\\"history\\\", \\\"input_bams\\\": [{\\\"__index__\\\": 0, \\\"input_bam\\\": null}], \\\"__current_case__\\\": 1}\", \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\"}", + "tool_version": "0.0.3", + "type": "tool", + "user_outputs": [] + }, + "15": { + "annotation": "", + "id": 15, + "input_connections": { + "input_file": { + "id": 14, + "output_name": "output_mpileup" + } + }, + "inputs": [], + "name": "mpileupfilter", + "outputs": [ + { + "name": "output_file", + "type": "pileup" + } + ], + "position": { + "left": 2300, + "top": 201 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "mpileupfilter", + "tool_state": "{\"__page__\": 0, \"input_file\": \"null\", \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"min_frequency\": \"\\\"0.8\\\"\", \"min_depth\": \"\\\"2\\\"\", \"__rerun_remap_job_id__\": null, \"min_forward_and_reverse\": \"\\\"0\\\"\", \"max_depth\": \"\\\"50\\\"\", \"min_distance\": \"\\\"0\\\"\"}", + "tool_version": "0.05", + "type": "tool", + "user_outputs": [] + }, + "16": { + "annotation": "", + "id": 16, + "input_connections": { + "input_listA": { + "id": 15, + "output_name": "output_file" + }, + "input_listB": { + "id": 3, + "output_name": "output" + } + }, + "inputs": [], + "name": "listfiltering", + "outputs": [ + { + "name": "output_file", + "type": "pileup" + } + ], + "position": { + "left": 2520, + "top": 201 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "listfiltering", + "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"nb_col\": \"\\\"2\\\"\", \"input_listB\": \"null\", \"input_listA\": \"null\", \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\"}", + "tool_version": "0.01", + "type": "tool", + "user_outputs": [] + }, + "17": { + "annotation": "", + "id": 17, + "input_connections": { + "input_assembly_file": { + "id": 2, + "output_name": "output" + }, + "input_variant_file": { + "id": 16, + "output_name": "output_file" + } + }, + "inputs": [], + "name": "extractseq", + "outputs": [ + { + "name": "output_file", + "type": "fasta" + } + ], + "position": { + "left": 2740, + "top": 201 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "extractseq", + "tool_state": "{\"__page__\": 0, \"input_variant_file\": \"null\", \"__rerun_remap_job_id__\": null, \"input_assembly_file\": \"null\", \"window_length\": \"\\\"50\\\"\", \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\"}", + "tool_version": "0.03", + "type": "tool", + "user_outputs": [] + }, + "18": { + "annotation": "", + "id": 18, + "input_connections": { + "db_opts|histdb": { + "id": 8, + "output_name": "outfile" + }, + "query": { + "id": 17, + "output_name": "output_file" + } + }, + "inputs": [], + "name": "NCBI BLAST+ blastn", + "outputs": [ + { + "name": "output1", + "type": "tabular" + } + ], + "position": { + "left": 2960, + "top": 201 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_blastn_wrapper/0.1.00", + "tool_state": "{\"evalue_cutoff\": \"\\\"0.001\\\"\", \"__page__\": 0, \"adv_opts\": \"{\\\"identity_cutoff\\\": \\\"0.0\\\", \\\"adv_opts_selector\\\": \\\"advanced\\\", \\\"ungapped\\\": \\\"False\\\", \\\"filter_query\\\": \\\"False\\\", \\\"word_size\\\": \\\"0\\\", \\\"__current_case__\\\": 1, \\\"parse_deflines\\\": \\\"False\\\", \\\"strand\\\": \\\"-strand both\\\", \\\"max_hits\\\": \\\"0\\\"}\", \"__rerun_remap_job_id__\": null, \"db_opts\": \"{\\\"db_opts_selector\\\": \\\"histdb\\\", \\\"subject\\\": \\\"\\\", \\\"histdb\\\": null, \\\"__current_case__\\\": 1, \\\"database\\\": \\\"\\\"}\", \"query\": \"null\", \"blast_type\": \"\\\"megablast\\\"\", \"output\": \"{\\\"out_format\\\": \\\"5\\\", \\\"__current_case__\\\": 3}\", \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\"}", + "tool_version": "0.1.00", + "type": "tool", + "user_outputs": [] + }, + "19": { + "annotation": "", + "id": 19, + "input_connections": { + "input_blastxml_file": { + "id": 18, + "output_name": "output1" + }, + "input_variant_file": { + "id": 16, + "output_name": "output_file" + } + }, + "inputs": [], + "name": "mpileupfilteronblastxml", + "outputs": [ + { + "name": "output_file", + "type": "pileup" + } + ], + "position": { + "left": 3180, + "top": 201 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "mpileupfilteronblastxml", + "tool_state": "{\"__page__\": 0, \"input_variant_file\": \"null\", \"__rerun_remap_job_id__\": null, \"input_blastxml_file\": \"null\", \"window_length\": \"\\\"50\\\"\", \"nb_mismatch_max\": \"\\\"3\\\"\", \"chromInfo\": \"\\\"/home/galaxy/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\"}", + "tool_version": "0.03", + "type": "tool", + "user_outputs": [] + } + } +} \ No newline at end of file diff -r 1d37c16ab674 -r ad321ff1b67d rapsodyn/extractseq.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rapsodyn/extractseq.pl Thu Aug 14 08:12:06 2014 -0400 @@ -0,0 +1,132 @@ +#!/usr/bin/perl +#V1.10 + +use strict; +use warnings; +use Getopt::Long; + +my $input_variant_file; +my $input_assembly_file; +my $WINDOWS_LENGTH = 50; + +GetOptions ( +"input_variant_file=s" => \$input_variant_file, +"input_assembly_file=s" => \$input_assembly_file, +"window_length=i" => \$WINDOWS_LENGTH +) or die("Error in command line arguments\n"); + +open(INV, $input_variant_file) or die ("Can't open $input_variant_file\n"); +open(INA, $input_assembly_file) or die ("Can't open $input_assembly_file\n"); + +my @variant_list; + + +### Retrieving the assembly +my %genome; + +my $current_header=""; +my $current_seq=""; +while (my $ligne = ){ + if ($ligne =~ /^\>(.*?)\s*$/){ + if ($current_header){ + $genome{$current_header} = $current_seq; + } + $current_header=$1; + $current_seq = ""; + } + else { + if ($ligne=~/^([ATGCNXatgcnx]+)\s*$/){ + $current_seq .= $1; + } + else { + print STDERR "Erreur Parsing n°2\n$ligne\n"; + } + } +} +#TRAITEMENT DU DERNIER +if ($current_header){ + $genome{$current_header} = $current_seq; + undef($current_seq); +} +close (INA); + + +### Retrieving the variant +while (my $ligne=){ + if ($ligne !~ /^\s*$/){ + my %variant; + my @fields = split (/\s+/,$ligne); + $variant{"ref"}=$fields[0]; + $variant{"position"}=$fields[1]; + $variant{"baseref"}=$fields[2]; + $variant{"depth"}=$fields[3]; + $variant{"pileup"}=$fields[4]; + + + my $start = &max($variant{"position"} - $WINDOWS_LENGTH,1); + my $stop = &min ($variant{"position"} + $WINDOWS_LENGTH,length($genome{$variant{"ref"}})); + my $length = $stop-$start+1; + + #print $variant{"position"}," / ",length($genome{$variant{"ref"}})," / ","$start / $stop / $length \n"; + + $variant{"SEQ"} = substr $genome{$variant{"ref"}},$start-1,$length; + + my $pileup = $variant{"pileup"}; + $pileup =~ s/\$//g; #the read start at this position + $pileup =~ s/\^.//g; #the read end at this position + my $descriptor = $variant{"position"}."_".$variant{"depth"}."_"; + if ($pileup=~/\+([0-9]+)([ACGTNacgtn]+)/){ + $descriptor .="I".$1."_".$2; + } + elsif ($pileup=~/\-([0-9]+)([ACGTNacgtn]+)/){ + $descriptor .="D".$1."_".$2; + } + elsif ($pileup=~/([ACGTNacgtn])/){ + $descriptor.="M1"."_".$1; + } + else { + $descriptor.="?_?"; + } + $variant{"desc"}=$descriptor; + + print ">",$variant{"ref"},"_",$descriptor,"\n",$variant{"SEQ"},"\n"; + + + + #print ">",$variant{"ref"},"_",$variant{"position"},"_",$variant{"depth"},"\n",$variant{"SEQ"},"\n"; + + push(@variant_list,\%variant); + } +} +close (INV); + + + + + + + + + +#*********** +sub min{ + my $first = shift; + my $second = shift; + if ($first <= $second){ + return $first; + } + else { + return $second; + } +} + +sub max { + my $first = shift; + my $second = shift; + if ($first >= $second){ + return $first; + } + else { + return $second; + } +} \ No newline at end of file diff -r 1d37c16ab674 -r ad321ff1b67d rapsodyn/extractseq.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rapsodyn/extractseq.xml Thu Aug 14 08:12:06 2014 -0400 @@ -0,0 +1,20 @@ + +Extract Sequence around variant position + + extractseq.pl -input_variant_file $input_variant_file -input_assembly_file $input_assembly_file -window_length $window_length > $output_file + + + + + + + + + + + + + + + + diff -r 1d37c16ab674 -r ad321ff1b67d rapsodyn/filtersamunique.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rapsodyn/filtersamunique.pl Thu Aug 14 08:12:06 2014 -0400 @@ -0,0 +1,20 @@ +#!/usr/bin/perl +use strict; +use warnings; + +open(IN, $ARGV[0]) or die ("Can't open $ARGV[0]\n"); +while (my $line=){ + if ($line =~ /^\@/){ + #Header conservation + print $line; + } + else { + #Optionnal flag verification + if (($line =~ /XT\:A\:U/)&&($line =~ /X0\:i\:1/)&&($line =~ /X1\:i\:0\s/)){ + print $line; + } + } +} + + +close (IN); \ No newline at end of file diff -r 1d37c16ab674 -r ad321ff1b67d rapsodyn/filtersamunique.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rapsodyn/filtersamunique.xml Thu Aug 14 08:12:06 2014 -0400 @@ -0,0 +1,18 @@ + +Filter SAM file for uniquelly match reads + + filtersamunique.pl $input_sam_file > $output_file + + + + + + + + + + + + + + diff -r 1d37c16ab674 -r ad321ff1b67d rapsodyn/filtersamunique/filtersamunique.pl --- a/rapsodyn/filtersamunique/filtersamunique.pl Tue Aug 12 08:41:47 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,20 +0,0 @@ -#!/usr/bin/perl -use strict; -use warnings; - -open(IN, $ARGV[0]) or die ("Can't open $ARGV[0]\n"); -while (my $line=){ - if ($line =~ /^\@/){ - #Header conservation - print $line; - } - else { - #Optionnal flag verification - if (($line =~ /XT\:A\:U/)&&($line =~ /X0\:i\:1/)&&($line =~ /X1\:i\:0\s/)){ - print $line; - } - } -} - - -close (IN); \ No newline at end of file diff -r 1d37c16ab674 -r ad321ff1b67d rapsodyn/filtersamunique/filtersamunique.xml --- a/rapsodyn/filtersamunique/filtersamunique.xml Tue Aug 12 08:41:47 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,18 +0,0 @@ - -Filter SAM file for uniquelly match reads - - filtersamunique.pl $input_sam_file > $output_file - - - - - - - - - - - - - - diff -r 1d37c16ab674 -r ad321ff1b67d rapsodyn/listfiltering.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rapsodyn/listfiltering.pl Thu Aug 14 08:12:06 2014 -0400 @@ -0,0 +1,61 @@ +#!/usr/bin/perl +use strict; +use Getopt::Long; + +my $inputfile; +my $headerfile; +my $nb_col=1; +my %header; + +if ($#ARGV<0){ + print "\n"; + print "perl 021_ListFiltering.pl -input_file -header_file -nb_col [1]\n"; + exit(0); +} + +GetOptions ( +"input_file=s" => \$inputfile, +"header_file=s" => \$headerfile, +"nb_col=i" => \$nb_col +) or die("Error in command line arguments\n"); + +open(HF, $headerfile) or die("Can't open $headerfile\n"); +while (my $line=){ + chomp($line); + my @fields = split(/\s+/,$line); + my $ref=""; + my $compt=0; + while ($compt<$nb_col){ + if ($ref){$ref.="\t";} + $ref.=$fields[$compt]; + $compt++; + } + # my $ref = "$fields[0]\t$fields[1]"; + $header{$ref}=$line; +} +close (HF); + + +open(IF, $inputfile) or die("Can't open $inputfile\n"); +while (my $line=){ + my @fields = split(/\s+/,$line); + my $ref=""; + my $compt=0; + while ($compt<$nb_col){ + if ($ref){$ref.="\t";} + $ref.=$fields[$compt]; + $compt++; + } + # my $ref = "$fields[0]\t$fields[1]"; + + if ($header{$ref}){ + # print $line; + # print $header{$ref},"\n"; + } + else { + print $line; + } + +} +close(IF); + diff -r 1d37c16ab674 -r ad321ff1b67d rapsodyn/listfiltering.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rapsodyn/listfiltering.xml Thu Aug 14 08:12:06 2014 -0400 @@ -0,0 +1,20 @@ + +Compare listA and listB first X column and remove from listA the elements present in listB + + listfiltering.pl -input_file $input_listA -header_file $input_listB -nb_col $nb_col > $output_file + + + + + + + + + + + + + + + + diff -r 1d37c16ab674 -r ad321ff1b67d rapsodyn/listfiltering/listfiltering.pl --- a/rapsodyn/listfiltering/listfiltering.pl Tue Aug 12 08:41:47 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,61 +0,0 @@ -#!/usr/bin/perl -use strict; -use Getopt::Long; - -my $inputfile; -my $headerfile; -my $nb_col=1; -my %header; - -if ($#ARGV<0){ - print "\n"; - print "perl 021_ListFiltering.pl -input_file -header_file -nb_col [1]\n"; - exit(0); -} - -GetOptions ( -"input_file=s" => \$inputfile, -"header_file=s" => \$headerfile, -"nb_col=i" => \$nb_col -) or die("Error in command line arguments\n"); - -open(HF, $headerfile) or die("Can't open $headerfile\n"); -while (my $line=){ - chomp($line); - my @fields = split(/\s+/,$line); - my $ref=""; - my $compt=0; - while ($compt<$nb_col){ - if ($ref){$ref.="\t";} - $ref.=$fields[$compt]; - $compt++; - } - # my $ref = "$fields[0]\t$fields[1]"; - $header{$ref}=$line; -} -close (HF); - - -open(IF, $inputfile) or die("Can't open $inputfile\n"); -while (my $line=){ - my @fields = split(/\s+/,$line); - my $ref=""; - my $compt=0; - while ($compt<$nb_col){ - if ($ref){$ref.="\t";} - $ref.=$fields[$compt]; - $compt++; - } - # my $ref = "$fields[0]\t$fields[1]"; - - if ($header{$ref}){ - # print $line; - # print $header{$ref},"\n"; - } - else { - print $line; - } - -} -close(IF); - diff -r 1d37c16ab674 -r ad321ff1b67d rapsodyn/listfiltering/listfiltering.xml --- a/rapsodyn/listfiltering/listfiltering.xml Tue Aug 12 08:41:47 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,20 +0,0 @@ - -Compare listA and listB first X column and remove from listA the elements present in listB - - listfiltering.pl -input_file $input_listA -header_file $input_listB -nb_col $nb_col > $output_file - - - - - - - - - - - - - - - - diff -r 1d37c16ab674 -r ad321ff1b67d rapsodyn/mpileupfilter.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rapsodyn/mpileupfilter.pl Thu Aug 14 08:12:06 2014 -0400 @@ -0,0 +1,394 @@ +#!/usr/bin/perl +use strict; +use Getopt::Long; + +# +# Filter a pileup file on forward/reverse presence and %read having the variant +# The error code +# 1 : multiple variant type detected insertion/deletion/mutation +# 1i : inconsistency in insertion +# 1d : inconsistency in deletion +# 1m : inconsistency in mutation +# 2 : insufficient depth +# 3 : insufficient variant frequency +# 4 : variant position not covered by forward and reverse reads +# 5 : variant with other variant in neighbourhood +# 6 : too much depth +# 8 : parsing error (couldn't parse the mpileup line correctly) +# 9 : parsing error (couldn't parse the readbase string correctly) + + +my $inputfile; +my $logfile; +my $MIN_DISTANCE=0; +my $MIN_VARIANTFREQUENCY=0; +my $MIN_FORWARDREVERSE=0; +my $MIN_DEPTH=0; +my $MAX_DEPTH=500; +my $VERBOSE=0; +my $ONLY_UNFILTERED_VARIANT="OFF"; + +if ($#ARGV<0){ + print "\n"; + print "perl 020_FilterPileupv6 -input_file [OPTION]\n"; + print "-input_file \tinputfile in mpileup format\n"; + print "-log_file \tlogfile containing discarded mpileup lines and the errorcode associated\n"; + print "-min_depth \tminimum depth required [1]\n"; + print "-max_depth \tmaximim depth (position with more coverage will be discarded) [100]\n"; + print "-min_frequency \tminimum variant frequency (0->1) [1] (default 1 => 100% reads show the variant at this position)\n"; + print "-min_distance \tminimum distance between variant [0]\n"; + print "-min_forward_and_reverse \tminimum number of reads in forward and reverse covering the variant required [0]\n"; + print "\n"; + exit(0); +} + +GetOptions ( +"input_file=s" => \$inputfile, +"log_file=s" => \$logfile, +"min_depth=i" => \$MIN_DEPTH, +"max_depth=i" => \$MAX_DEPTH, +"min_frequency=f" => \$MIN_VARIANTFREQUENCY, +"min_distance=i" => \$MIN_DISTANCE, +"min_forward_and_reverse=i" => \$MIN_FORWARDREVERSE, +"variant_only=s" => \$ONLY_UNFILTERED_VARIANT, +"v=i" => \$VERBOSE +) or die("Error in command line arguments\n"); + + +open(IF, $inputfile) or die("Can't open $inputfile\n"); + +my @tbl_line; +my @tbl_variant_position; +my @tbl_variant_chr; +my @tbl_variant_refbase; +my @tbl_variant_coverage; +my @tbl_variant_readbase_string; +my @tbl_variant_quality_string; + +#Extraction des variants +my $nb_line=0; +while (my $line=){ + $nb_line++; + if (($nb_line % 1000000 == 0)&&($VERBOSE==1)){ + print "$nb_line\n"; + } + my $error_code=0; + if ($line=~/(.*?)\s+(\d+)\s+([ATGCN])\s+(\d+)\s+(.*?)\s+(.*?)$/){ + my $current_chromosome = $1; + my $current_position = $2; + my $current_refbase = $3; + my $current_coverage = $4; + my $current_readbase_string = $5; + my $current_quality_string = $6; + + #Suppression of mPileUp special character + $current_readbase_string =~ s/\$//g; #the read start at this position + $current_readbase_string =~ s/\^.//g; #the read end at this position followed by quality char + + if ($current_readbase_string =~ /[ATGCNatgcn\d]/){ + push(@tbl_line,$line); + push(@tbl_variant_chr,$current_chromosome); + push(@tbl_variant_position,$current_position); + push(@tbl_variant_refbase,$current_refbase); + push(@tbl_variant_coverage,$current_coverage); + push(@tbl_variant_readbase_string,$current_readbase_string); + push(@tbl_variant_quality_string,$current_quality_string); + if ($ONLY_UNFILTERED_VARIANT eq "ON"){ + print $line; + } + + } + else { + #Position with no variant + } + + } + else { + #Error Parsing + print STDERR "$line #8"; + } +} +close(IF); + +if ($ONLY_UNFILTERED_VARIANT eq "ON"){ + exit(0); +} + +####Checking the distance between variant and other filter + +if ($logfile){ + open(LF,">$logfile") or die ("Cant't open $logfile\n"); +} + +for (my $i=0;$i<=$#tbl_line;$i++){ + # print "ligne : $tbl_line[$i]\n"; + + my $error_code=0; + if ($i==0){ + #Comparing $i and $i+1 for neighbourhood filter; + if ($#tbl_line>0){ + if (($tbl_variant_chr[$i+1] eq $tbl_variant_chr[$i])&&($tbl_variant_position[$i]+$MIN_DISTANCE>=$tbl_variant_position[$i+1])){ + $error_code=5; + chomp($tbl_line[$i]); + if ($logfile){ + print LF "$tbl_line[$i]\tcode:$error_code\n"; + } + next; + } + } + + #Additionnal filters + $error_code = check_error($tbl_variant_chr[$i],$tbl_variant_position[$i],$tbl_variant_refbase[$i],$tbl_variant_coverage[$i],$tbl_variant_readbase_string[$i]); + + } + else { + #Compairing $i and $i-1 for neighbourhood filter + if (($tbl_variant_chr[$i-1] eq $tbl_variant_chr[$i])&&($tbl_variant_position[$i-1]+$MIN_DISTANCE>=$tbl_variant_position[$i])){ + $error_code=5; + chomp($tbl_line[$i]); + if ($logfile){ + print LF "$tbl_line[$i]\tcode:$error_code\n"; + } + next; + } + else { + #Additionnal filters + $error_code = check_error($tbl_variant_chr[$i],$tbl_variant_position[$i],$tbl_variant_refbase[$i],$tbl_variant_coverage[$i],$tbl_variant_readbase_string[$i]); + } + } + if ($error_code == 0){ + print $tbl_line[$i]; + } + else { + chomp($tbl_line[$i]); + if ($logfile){ + print LF "$tbl_line[$i]\tcode:$error_code\n"; + } + } +} + +if ($logfile){ + close (LF); +} + +sub check_error{ + my $current_chromosome = shift; + my $current_position = shift; + my $current_refbase = shift; + my $current_coverage = shift; + my $current_readbase_string = shift; + + # print "test : $current_readbase_string\n"; + + + + #Extraction of insertions + + ################################################################## + # my @IN = $current_readbase_string =~ m/\+[0-9]+[ACGTNacgtn]+/g; + # my @DEL = $current_readbase_string =~ m/\-[0-9]+[ACGTNacgtn]+/g; + # print "IN : @IN\n"; + # print "DEL :@DEL\n"; + #$current_readbase_string=~s/[\+\-][0-9]+[ACGTNacgtn]+//g; + ################################################################## + #!!! marche pas : exemple .+1Ct. correspond a . / +1C / t /. mais le match de l'expression vire +1Ct + ################################################################## + + # => parcours de boucle + my @readbase = split(//,$current_readbase_string); + my $cleaned_readbase_string=""; + my @IN; + my @DEL; + my $current_IN=""; + my $current_DEL=""; + my $current_size=0; + + for (my $i=0;$i<=$#readbase;$i++){ + if ($readbase[$i] eq "+"){ + #Ouverture de IN + $current_IN="+"; + + #Recuperation de la taille + my $sub = substr $current_readbase_string,$i; + if ($sub=~/^\+(\d+)/){ + $current_size = $1; + } + my $remaining_size = $current_size; + while (($remaining_size>0)&&($i<=$#readbase)){ + $i++; + $current_IN.=$readbase[$i]; + if ($readbase[$i]=~ /[ATGCNatgcn]/){ + $remaining_size--; + } + } + push(@IN,$current_IN); + } + elsif ($readbase[$i] eq "-"){ + #Ouverture de DEL + $current_DEL="-"; + + #Recuperation de la taille + my $sub = substr $current_readbase_string,$i; + if ($sub=~/^\-(\d+)/){ + $current_size = $1; + } + my $remaining_size = $current_size; + while (($remaining_size>0)&&($i<=$#readbase)){ + $i++; + $current_DEL.=$readbase[$i]; + if ($readbase[$i]=~ /[ATGCNatgcn]/){ + $remaining_size--; + } + } + push(@DEL,$current_DEL); + + } + else { + #Ajout a la string + $cleaned_readbase_string .= $readbase[$i]; + } + } + + + # print "IN : @IN\n"; + # print "DEL :@DEL\n"; + # print "$cleaned_readbase_string\n"; + + my @current_readbase_array = split(//,$cleaned_readbase_string); + + #Filtering : error detection + + if ($#current_readbase_array+1 != $current_coverage){ + return 9; + #parsing error (couldn't parse the readbase string correctly) + } + elsif ($current_coverage<$MIN_DEPTH){ + return 2; + # 2 : insufficient depth + } + elsif ($current_coverage>$MAX_DEPTH){ + return 6; + # 6 : too much depth + } + else { + if ($#IN>=0){ + if (($cleaned_readbase_string=~/[ACGTNacgtn]/)){ + return 1; + # 1 : variant type overload (multiple variant type detected insertion/deletion/mutation) + } + else { + ########## TEST de coherence des insertions ################ + # for (my $i=0;$i<=$#IN;$i++){ + # if (uc($IN[0]) ne uc($IN[$i])){ + # print uc($IN[0]),"\n"; + # print uc($IN[$i]),"\n"; + # return "1i"; + # } + # } + ########################################################### + + if($#IN+1 < $current_coverage*$MIN_VARIANTFREQUENCY){ + return 3; + # 3 : insufficient variant frequency + } + } + } + elsif ($#DEL>=0){ + if (($cleaned_readbase_string=~/[ACGTNacgtn]/)){ + return 1; + # 1 : variant type overload (multiple variant type detected insertion/deletion/mutation) + } + else { + ########## TEST de coherence des deletions ################ + # for (my $i=0;$i<=$#DEL;$i++){ + # if (uc($DEL[0]) ne uc($DEL[$i])){ + # print uc($DEL[0]),"\n"; + # print uc($DEL[$i]),"\n"; + # return "1d"; + # } + # } + ########################################################### + + if($#DEL+1 < $current_coverage*$MIN_VARIANTFREQUENCY){ + return 3; + # 3 : insufficient variant frequency + } + } + } + else { + my $nbA=0; + $nbA++ while ($current_readbase_string =~ m/A/g); + my $nbC=0; + $nbC++ while ($current_readbase_string =~ m/C/g); + my $nbT=0; + $nbT++ while ($current_readbase_string =~ m/T/g); + my $nbG=0; + $nbG++ while ($current_readbase_string =~ m/G/g); + my $nbN=0; + $nbN++ while ($current_readbase_string =~ m/N/g); + my $nba=0; + $nba++ while ($current_readbase_string =~ m/a/g); + my $nbc=0; + $nbc++ while ($current_readbase_string =~ m/c/g); + my $nbt=0; + $nbt++ while ($current_readbase_string =~ m/t/g); + my $nbg=0; + $nbg++ while ($current_readbase_string =~ m/g/g); + my $nbn=0; + $nbn++ while ($current_readbase_string =~ m/n/g); + + if (($nbA+$nba>0)&&($nbT+$nbt+$nbG+$nbg+$nbC+$nbc+$nbN+$nbn>0)){ + return "1m"; + } + if (($nbT+$nbt>0)&&($nbA+$nba+$nbG+$nbg+$nbC+$nbc+$nbN+$nbn>0)){ + return "1m"; + } + if (($nbG+$nbg>0)&&($nbA+$nba+$nbT+$nbt+$nbC+$nbc+$nbN+$nbn>0)){ + return "1m"; + } + if (($nbC+$nbc>0)&&($nbA+$nba+$nbT+$nbt+$nbG+$nbg+$nbN+$nbn>0)){ + return "1m"; + } + if (($nbN+$nbn>0)&&($nbA+$nba+$nbT+$nbt+$nbG+$nbg+$nbC+$nbc>0)){ + return "1m"; + } + + if ($nbA+$nba >= $current_coverage*$MIN_VARIANTFREQUENCY){ + if (($nbA<$MIN_FORWARDREVERSE)||($nba<$MIN_FORWARDREVERSE)){ + return 4; + # 4 : variant position not covered by forward and reverse reads + } + } + elsif ($nbT+$nbt >= $current_coverage*$MIN_VARIANTFREQUENCY){ + if (($nbT<$MIN_FORWARDREVERSE)||($nbt<$MIN_FORWARDREVERSE)){ + return 4; + # 4 : variant position not covered by forward and reverse reads + } + } + elsif ($nbG+$nbg >= $current_coverage*$MIN_VARIANTFREQUENCY){ + if (($nbG<$MIN_FORWARDREVERSE)||($nbg<$MIN_FORWARDREVERSE)){ + return 4; + # 4 : variant position not covered by forward and reverse reads + } + } + elsif ($nbC+$nbc >= $current_coverage*$MIN_VARIANTFREQUENCY){ + if (($nbC<$MIN_FORWARDREVERSE)||($nbc<$MIN_FORWARDREVERSE)){ + return 4; + # 4 : variant position not covered by forward and reverse reads + } + } + elsif ($nbN+$nbn >= $current_coverage*$MIN_VARIANTFREQUENCY){ + if (($nbN<$MIN_FORWARDREVERSE)||($nbn<$MIN_FORWARDREVERSE)){ + return 4; + # 4 : variant position not covered by forward and reverse reads + } + } + else { + return 3; + # 3 : insufficient variant frequency + } + } + } + + return 0; +} diff -r 1d37c16ab674 -r ad321ff1b67d rapsodyn/mpileupfilter.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rapsodyn/mpileupfilter.xml Thu Aug 14 08:12:06 2014 -0400 @@ -0,0 +1,23 @@ + +Filter mpileup file entry + + mpileupfilter.pl -input_file $input_file -min_depth $min_depth -min_frequency $min_frequency -min_distance $min_distance -min_forward_and_reverse $min_forward_and_reverse -max_depth $max_depth > $output_file + + + + + + + + + + + + + + + + + + + diff -r 1d37c16ab674 -r ad321ff1b67d rapsodyn/mpileupfilter/mpileupfilter.pl --- a/rapsodyn/mpileupfilter/mpileupfilter.pl Tue Aug 12 08:41:47 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,394 +0,0 @@ -#!/usr/bin/perl -use strict; -use Getopt::Long; - -# -# Filter a pileup file on forward/reverse presence and %read having the variant -# The error code -# 1 : multiple variant type detected insertion/deletion/mutation -# 1i : inconsistency in insertion -# 1d : inconsistency in deletion -# 1m : inconsistency in mutation -# 2 : insufficient depth -# 3 : insufficient variant frequency -# 4 : variant position not covered by forward and reverse reads -# 5 : variant with other variant in neighbourhood -# 6 : too much depth -# 8 : parsing error (couldn't parse the mpileup line correctly) -# 9 : parsing error (couldn't parse the readbase string correctly) - - -my $inputfile; -my $logfile; -my $MIN_DISTANCE=0; -my $MIN_VARIANTFREQUENCY=0; -my $MIN_FORWARDREVERSE=0; -my $MIN_DEPTH=0; -my $MAX_DEPTH=500; -my $VERBOSE=0; -my $ONLY_UNFILTERED_VARIANT="OFF"; - -if ($#ARGV<0){ - print "\n"; - print "perl 020_FilterPileupv6 -input_file [OPTION]\n"; - print "-input_file \tinputfile in mpileup format\n"; - print "-log_file \tlogfile containing discarded mpileup lines and the errorcode associated\n"; - print "-min_depth \tminimum depth required [1]\n"; - print "-max_depth \tmaximim depth (position with more coverage will be discarded) [100]\n"; - print "-min_frequency \tminimum variant frequency (0->1) [1] (default 1 => 100% reads show the variant at this position)\n"; - print "-min_distance \tminimum distance between variant [0]\n"; - print "-min_forward_and_reverse \tminimum number of reads in forward and reverse covering the variant required [0]\n"; - print "\n"; - exit(0); -} - -GetOptions ( -"input_file=s" => \$inputfile, -"log_file=s" => \$logfile, -"min_depth=i" => \$MIN_DEPTH, -"max_depth=i" => \$MAX_DEPTH, -"min_frequency=f" => \$MIN_VARIANTFREQUENCY, -"min_distance=i" => \$MIN_DISTANCE, -"min_forward_and_reverse=i" => \$MIN_FORWARDREVERSE, -"variant_only=s" => \$ONLY_UNFILTERED_VARIANT, -"v=i" => \$VERBOSE -) or die("Error in command line arguments\n"); - - -open(IF, $inputfile) or die("Can't open $inputfile\n"); - -my @tbl_line; -my @tbl_variant_position; -my @tbl_variant_chr; -my @tbl_variant_refbase; -my @tbl_variant_coverage; -my @tbl_variant_readbase_string; -my @tbl_variant_quality_string; - -#Extraction des variants -my $nb_line=0; -while (my $line=){ - $nb_line++; - if (($nb_line % 1000000 == 0)&&($VERBOSE==1)){ - print "$nb_line\n"; - } - my $error_code=0; - if ($line=~/(.*?)\s+(\d+)\s+([ATGCN])\s+(\d+)\s+(.*?)\s+(.*?)$/){ - my $current_chromosome = $1; - my $current_position = $2; - my $current_refbase = $3; - my $current_coverage = $4; - my $current_readbase_string = $5; - my $current_quality_string = $6; - - #Suppression of mPileUp special character - $current_readbase_string =~ s/\$//g; #the read start at this position - $current_readbase_string =~ s/\^.//g; #the read end at this position followed by quality char - - if ($current_readbase_string =~ /[ATGCNatgcn\d]/){ - push(@tbl_line,$line); - push(@tbl_variant_chr,$current_chromosome); - push(@tbl_variant_position,$current_position); - push(@tbl_variant_refbase,$current_refbase); - push(@tbl_variant_coverage,$current_coverage); - push(@tbl_variant_readbase_string,$current_readbase_string); - push(@tbl_variant_quality_string,$current_quality_string); - if ($ONLY_UNFILTERED_VARIANT eq "ON"){ - print $line; - } - - } - else { - #Position with no variant - } - - } - else { - #Error Parsing - print STDERR "$line #8"; - } -} -close(IF); - -if ($ONLY_UNFILTERED_VARIANT eq "ON"){ - exit(0); -} - -####Checking the distance between variant and other filter - -if ($logfile){ - open(LF,">$logfile") or die ("Cant't open $logfile\n"); -} - -for (my $i=0;$i<=$#tbl_line;$i++){ - # print "ligne : $tbl_line[$i]\n"; - - my $error_code=0; - if ($i==0){ - #Comparing $i and $i+1 for neighbourhood filter; - if ($#tbl_line>0){ - if (($tbl_variant_chr[$i+1] eq $tbl_variant_chr[$i])&&($tbl_variant_position[$i]+$MIN_DISTANCE>=$tbl_variant_position[$i+1])){ - $error_code=5; - chomp($tbl_line[$i]); - if ($logfile){ - print LF "$tbl_line[$i]\tcode:$error_code\n"; - } - next; - } - } - - #Additionnal filters - $error_code = check_error($tbl_variant_chr[$i],$tbl_variant_position[$i],$tbl_variant_refbase[$i],$tbl_variant_coverage[$i],$tbl_variant_readbase_string[$i]); - - } - else { - #Compairing $i and $i-1 for neighbourhood filter - if (($tbl_variant_chr[$i-1] eq $tbl_variant_chr[$i])&&($tbl_variant_position[$i-1]+$MIN_DISTANCE>=$tbl_variant_position[$i])){ - $error_code=5; - chomp($tbl_line[$i]); - if ($logfile){ - print LF "$tbl_line[$i]\tcode:$error_code\n"; - } - next; - } - else { - #Additionnal filters - $error_code = check_error($tbl_variant_chr[$i],$tbl_variant_position[$i],$tbl_variant_refbase[$i],$tbl_variant_coverage[$i],$tbl_variant_readbase_string[$i]); - } - } - if ($error_code == 0){ - print $tbl_line[$i]; - } - else { - chomp($tbl_line[$i]); - if ($logfile){ - print LF "$tbl_line[$i]\tcode:$error_code\n"; - } - } -} - -if ($logfile){ - close (LF); -} - -sub check_error{ - my $current_chromosome = shift; - my $current_position = shift; - my $current_refbase = shift; - my $current_coverage = shift; - my $current_readbase_string = shift; - - # print "test : $current_readbase_string\n"; - - - - #Extraction of insertions - - ################################################################## - # my @IN = $current_readbase_string =~ m/\+[0-9]+[ACGTNacgtn]+/g; - # my @DEL = $current_readbase_string =~ m/\-[0-9]+[ACGTNacgtn]+/g; - # print "IN : @IN\n"; - # print "DEL :@DEL\n"; - #$current_readbase_string=~s/[\+\-][0-9]+[ACGTNacgtn]+//g; - ################################################################## - #!!! marche pas : exemple .+1Ct. correspond a . / +1C / t /. mais le match de l'expression vire +1Ct - ################################################################## - - # => parcours de boucle - my @readbase = split(//,$current_readbase_string); - my $cleaned_readbase_string=""; - my @IN; - my @DEL; - my $current_IN=""; - my $current_DEL=""; - my $current_size=0; - - for (my $i=0;$i<=$#readbase;$i++){ - if ($readbase[$i] eq "+"){ - #Ouverture de IN - $current_IN="+"; - - #Recuperation de la taille - my $sub = substr $current_readbase_string,$i; - if ($sub=~/^\+(\d+)/){ - $current_size = $1; - } - my $remaining_size = $current_size; - while (($remaining_size>0)&&($i<=$#readbase)){ - $i++; - $current_IN.=$readbase[$i]; - if ($readbase[$i]=~ /[ATGCNatgcn]/){ - $remaining_size--; - } - } - push(@IN,$current_IN); - } - elsif ($readbase[$i] eq "-"){ - #Ouverture de DEL - $current_DEL="-"; - - #Recuperation de la taille - my $sub = substr $current_readbase_string,$i; - if ($sub=~/^\-(\d+)/){ - $current_size = $1; - } - my $remaining_size = $current_size; - while (($remaining_size>0)&&($i<=$#readbase)){ - $i++; - $current_DEL.=$readbase[$i]; - if ($readbase[$i]=~ /[ATGCNatgcn]/){ - $remaining_size--; - } - } - push(@DEL,$current_DEL); - - } - else { - #Ajout a la string - $cleaned_readbase_string .= $readbase[$i]; - } - } - - - # print "IN : @IN\n"; - # print "DEL :@DEL\n"; - # print "$cleaned_readbase_string\n"; - - my @current_readbase_array = split(//,$cleaned_readbase_string); - - #Filtering : error detection - - if ($#current_readbase_array+1 != $current_coverage){ - return 9; - #parsing error (couldn't parse the readbase string correctly) - } - elsif ($current_coverage<$MIN_DEPTH){ - return 2; - # 2 : insufficient depth - } - elsif ($current_coverage>$MAX_DEPTH){ - return 6; - # 6 : too much depth - } - else { - if ($#IN>=0){ - if (($cleaned_readbase_string=~/[ACGTNacgtn]/)){ - return 1; - # 1 : variant type overload (multiple variant type detected insertion/deletion/mutation) - } - else { - ########## TEST de coherence des insertions ################ - # for (my $i=0;$i<=$#IN;$i++){ - # if (uc($IN[0]) ne uc($IN[$i])){ - # print uc($IN[0]),"\n"; - # print uc($IN[$i]),"\n"; - # return "1i"; - # } - # } - ########################################################### - - if($#IN+1 < $current_coverage*$MIN_VARIANTFREQUENCY){ - return 3; - # 3 : insufficient variant frequency - } - } - } - elsif ($#DEL>=0){ - if (($cleaned_readbase_string=~/[ACGTNacgtn]/)){ - return 1; - # 1 : variant type overload (multiple variant type detected insertion/deletion/mutation) - } - else { - ########## TEST de coherence des deletions ################ - # for (my $i=0;$i<=$#DEL;$i++){ - # if (uc($DEL[0]) ne uc($DEL[$i])){ - # print uc($DEL[0]),"\n"; - # print uc($DEL[$i]),"\n"; - # return "1d"; - # } - # } - ########################################################### - - if($#DEL+1 < $current_coverage*$MIN_VARIANTFREQUENCY){ - return 3; - # 3 : insufficient variant frequency - } - } - } - else { - my $nbA=0; - $nbA++ while ($current_readbase_string =~ m/A/g); - my $nbC=0; - $nbC++ while ($current_readbase_string =~ m/C/g); - my $nbT=0; - $nbT++ while ($current_readbase_string =~ m/T/g); - my $nbG=0; - $nbG++ while ($current_readbase_string =~ m/G/g); - my $nbN=0; - $nbN++ while ($current_readbase_string =~ m/N/g); - my $nba=0; - $nba++ while ($current_readbase_string =~ m/a/g); - my $nbc=0; - $nbc++ while ($current_readbase_string =~ m/c/g); - my $nbt=0; - $nbt++ while ($current_readbase_string =~ m/t/g); - my $nbg=0; - $nbg++ while ($current_readbase_string =~ m/g/g); - my $nbn=0; - $nbn++ while ($current_readbase_string =~ m/n/g); - - if (($nbA+$nba>0)&&($nbT+$nbt+$nbG+$nbg+$nbC+$nbc+$nbN+$nbn>0)){ - return "1m"; - } - if (($nbT+$nbt>0)&&($nbA+$nba+$nbG+$nbg+$nbC+$nbc+$nbN+$nbn>0)){ - return "1m"; - } - if (($nbG+$nbg>0)&&($nbA+$nba+$nbT+$nbt+$nbC+$nbc+$nbN+$nbn>0)){ - return "1m"; - } - if (($nbC+$nbc>0)&&($nbA+$nba+$nbT+$nbt+$nbG+$nbg+$nbN+$nbn>0)){ - return "1m"; - } - if (($nbN+$nbn>0)&&($nbA+$nba+$nbT+$nbt+$nbG+$nbg+$nbC+$nbc>0)){ - return "1m"; - } - - if ($nbA+$nba >= $current_coverage*$MIN_VARIANTFREQUENCY){ - if (($nbA<$MIN_FORWARDREVERSE)||($nba<$MIN_FORWARDREVERSE)){ - return 4; - # 4 : variant position not covered by forward and reverse reads - } - } - elsif ($nbT+$nbt >= $current_coverage*$MIN_VARIANTFREQUENCY){ - if (($nbT<$MIN_FORWARDREVERSE)||($nbt<$MIN_FORWARDREVERSE)){ - return 4; - # 4 : variant position not covered by forward and reverse reads - } - } - elsif ($nbG+$nbg >= $current_coverage*$MIN_VARIANTFREQUENCY){ - if (($nbG<$MIN_FORWARDREVERSE)||($nbg<$MIN_FORWARDREVERSE)){ - return 4; - # 4 : variant position not covered by forward and reverse reads - } - } - elsif ($nbC+$nbc >= $current_coverage*$MIN_VARIANTFREQUENCY){ - if (($nbC<$MIN_FORWARDREVERSE)||($nbc<$MIN_FORWARDREVERSE)){ - return 4; - # 4 : variant position not covered by forward and reverse reads - } - } - elsif ($nbN+$nbn >= $current_coverage*$MIN_VARIANTFREQUENCY){ - if (($nbN<$MIN_FORWARDREVERSE)||($nbn<$MIN_FORWARDREVERSE)){ - return 4; - # 4 : variant position not covered by forward and reverse reads - } - } - else { - return 3; - # 3 : insufficient variant frequency - } - } - } - - return 0; -} diff -r 1d37c16ab674 -r ad321ff1b67d rapsodyn/mpileupfilter/mpileupfilter.xml --- a/rapsodyn/mpileupfilter/mpileupfilter.xml Tue Aug 12 08:41:47 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ - -Filter mpileup file entry - - mpileupfilter.pl -input_file $input_file -min_depth $min_depth -min_frequency $min_frequency -min_distance $min_distance -min_forward_and_reverse $min_forward_and_reverse -max_depth $max_depth > $output_file - - - - - - - - - - - - - - - - - - - diff -r 1d37c16ab674 -r ad321ff1b67d rapsodyn/mpileupfilteronblastxml.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rapsodyn/mpileupfilteronblastxml.pl Thu Aug 14 08:12:06 2014 -0400 @@ -0,0 +1,287 @@ +#!/usr/bin/perl +use strict; +use warnings; +use Getopt::Long; + +my $input_variant_file; +my $input_blastxml_file; +my $window_length = 50; +my $nb_mismatch_max = 2; + +GetOptions ( +"input_variant_file=s" => \$input_variant_file, +"input_blastxml_file=s" => \$input_blastxml_file, +"window_length=i" => \$window_length, +"nb_mismatch_max=i" => \$nb_mismatch_max +) or die("Error in command line arguments\n"); + + +open(INB, $input_blastxml_file) or die ("Can't open $input_blastxml_file\n"); + + +my $iteration_stop=""; +my $hit_stop=""; +my $hsp_stop=""; +my $query_flag_start=""; +my $query_flag_stop=""; +my $subject_flag_start=""; +my $subject_flag_stop=""; +my $query_HSP_flag_from_start=""; +my $query_HSP_flag_from_stop=""; +my $query_HSP_flag_to_start=""; +my $query_HSP_flag_to_stop=""; +my $subject_HSP_flag_from_start=""; +my $subject_HSP_flag_from_stop=""; +my $subject_HSP_flag_to_start=""; +my $subject_HSP_flag_to_stop=""; +my $query_HSP_flag_seq_start=""; +my $query_HSP_flag_seq_stop=""; +my $subject_HSP_flag_seq_start=""; +my $subject_HSP_flag_seq_stop=""; +my $HSP_midline_flag_start=""; +my $HSP_midline_flag_stop=""; + +my %hash; + +my $compt=0; +my $current_query=""; +my $current_subject=""; +my $current_HSP_query_from; +my $current_HSP_query_to; +my $current_HSP_subject_from; +my $current_HSP_subject_to; +my $current_HSP_midline; +my $current_HSP_qseq; +my $current_HSP_hseq; +my $current_query_short; + +while (my $ligne = ) { + + if ($ligne=~/$subject_flag_start(.*?)$subject_flag_stop/){ + $current_subject=$1; + #print "--",$1,"\n"; + } + + if ($ligne=~/$query_flag_start(.*?)$query_flag_stop/){ + $current_query=$1; + $current_query_short=$current_query; + if ($current_query =~ /^(.*?_\d+)\_/){ + $current_query_short=$1; + } + if (!$hash{$current_query_short}){ + $hash{$current_query_short}=0; + } + + + #print "--",$1,"\n"; + } + if ($ligne=~/$query_HSP_flag_from_start(.*?)$query_HSP_flag_from_stop/){ + $current_HSP_query_from=$1; + #print "--",$1,"..."; + } + if ($ligne=~/$query_HSP_flag_to_start(.*?)$query_HSP_flag_to_stop/){ + $current_HSP_query_to=$1; + #print $1,"\n"; + } + if ($ligne=~/$subject_HSP_flag_from_start(.*?)$subject_HSP_flag_from_stop/){ + $current_HSP_subject_from=$1; + #print "--",$1,"..."; + } + if ($ligne=~/$subject_HSP_flag_to_start(.*?)$subject_HSP_flag_to_stop/){ + $current_HSP_subject_to=$1; + #print $1,"\n"; + } + if ($ligne=~/$query_HSP_flag_seq_start(.*?)$query_HSP_flag_seq_stop/){ + $current_HSP_qseq=$1; + #print "--",$1,"\n"; + } + if ($ligne=~/$subject_HSP_flag_seq_start(.*?)$subject_HSP_flag_seq_stop/){ + $current_HSP_hseq=$1; + #print "--",$1,"\n"; + } + if ($ligne=~/$HSP_midline_flag_start(.*?)$HSP_midline_flag_stop/){ + $current_HSP_midline=$1; + #print "--",$1,"\n"; + } + + if ($ligne=~/$hsp_stop/){ + if ($current_HSP_query_from){ + #print "\ntest1\n"; + #print "Query : $current_query\n"; + #print "Subject : $current_subject\n"; + #print "$current_HSP_query_from ... $current_HSP_query_to\n"; + #print "$current_HSP_subject_from ... $current_HSP_subject_to\n"; + for (my $i=1;$i<$current_HSP_query_from;$i++){ + $current_HSP_qseq = "N".$current_HSP_qseq; + $current_HSP_midline = " ".$current_HSP_midline; + $current_HSP_hseq = "N".$current_HSP_hseq; + } + for (my $i=$current_HSP_query_to+1;$i<=$window_length*2+1;$i++){ + $current_HSP_qseq .= "N"; + $current_HSP_midline .= " "; + $current_HSP_hseq .= "N"; + } + + my @qseq = split(//,$current_HSP_qseq); + my @midline = split(//,$current_HSP_midline); + my @hseq = split(//,$current_HSP_hseq); + + my $comptbase=0; + my $compt5p=0; + my $compt3p=0; + for (my $i=0;$i<=$#qseq;$i++){ + if ($qseq[$i] ne "-"){ + $comptbase++; # Va de 1 -> $window_length *2 +1 + } + if ($midline[$i] eq " "){ + if ($comptbase<=$window_length){ #1 -> $window_length + $compt5p++; + } + elsif ($comptbase>=$window_length+2){ #$window_length+2 -> $window_length *2 + 1; + $compt3p++; + } + else { #+1-$window_length*2+1 + + } + } + } + if (($compt3p<=$nb_mismatch_max)||($compt5p<=$nb_mismatch_max)){ + $hash{$current_query_short}++; + } + + #print "$current_HSP_qseq\n"; + #print "$current_HSP_midline\n"; + #print "$current_HSP_hseq\n"; + #print "$compt5p // $compt3p\n"; + #print $hash{$current_query_short},"\n"; + + } + + undef $current_HSP_query_from; + undef $current_HSP_query_to; + undef $current_HSP_subject_from; + undef $current_HSP_subject_to; + $current_HSP_midline=""; + $current_HSP_qseq=""; + $current_HSP_hseq=""; + } + + if ($ligne=~/$iteration_stop/){ + if ($current_HSP_query_from){ + #print "\ntest2\n"; + #print "Query : $current_query\n"; + #print "Subject : $current_subject\n"; + #print "$current_HSP_query_from ... $current_HSP_query_to\n"; + #print "$current_HSP_subject_from ... $current_HSP_subject_to\n"; + for (my $i=1;$i<$current_HSP_query_from;$i++){ + $current_HSP_qseq = "N".$current_HSP_qseq; + $current_HSP_midline = " ".$current_HSP_midline; + $current_HSP_hseq = "N".$current_HSP_hseq; + } + for (my $i=$current_HSP_query_to+1;$i<=$window_length*2+1;$i++){ + $current_HSP_qseq .= "N"; + $current_HSP_midline .= " "; + $current_HSP_hseq .= "N"; + } + + my @qseq = split(//,$current_HSP_qseq); + my @midline = split(//,$current_HSP_midline); + my @hseq = split(//,$current_HSP_hseq); + + my $comptbase=0; + my $compt5p=0; + my $compt3p=0; + for (my $i=0;$i<=$#qseq;$i++){ + if ($qseq[$i] ne "-"){ + $comptbase++; # Va de 1 -> $window_length *2 +1 + } + if ($midline[$i] eq " "){ + if ($comptbase<=$window_length){ #1 -> $window_length + $compt5p++; + } + elsif ($comptbase>=$window_length+2){ #$window_length+2 -> $window_length *2 + 1; + $compt3p++; + } + else { #+1-$window_length*2+1 + + } + } + } + if (($compt3p<=$nb_mismatch_max)||($compt5p<=$nb_mismatch_max)){ + $hash{$current_query_short}++; + } + + #print "$current_HSP_qseq\n"; + #print "$current_HSP_midline\n"; + #print "$current_HSP_hseq\n"; + #print "$compt5p // $compt3p\n"; + #print $hash{$current_query_short},"\n"; + } + $current_query=""; + $current_query_short=""; + $current_subject=""; + undef $current_HSP_query_from; + undef $current_HSP_query_to; + undef $current_HSP_subject_from; + undef $current_HSP_subject_to; + $current_HSP_midline=""; + $current_HSP_qseq=""; + $current_HSP_hseq=""; + } + +} + +close (INB); + +# foreach my $key (sort trinombre keys %hash){ + # print $key," ",$hash{$key},"\n"; + +# } +# exit(0); + +open(INV, $input_variant_file) or die ("Can't open $input_variant_file\n"); + +while (my $ligne = ) { + my @champs = split (/\s+/,$ligne); + my $header = $champs[0]."_".$champs[1]; + + if ($hash{$header}){ + if ($hash{$header}==1){ + print "$ligne"; + } + else { + #print $hash{$header}," $ligne"; + } + } + else { + print STDERR "No blast result for ",$header,"\n"; + } + + +} + +close(INV); + + + +sub trinombre { + my $chra=$a; + my $posa=0; + my $chrb=$b; + my $posb=0; + + if ($a =~/(.*?)\_(\d+)/){ + $chra=$1; + $posa=$2; + } + if ($b =~/(.*?)\_(\d+)/){ + $chrb=$1; + $posb=$2; + } + + + $chra cmp $chrb + || + $posa <=> $posb; +} + diff -r 1d37c16ab674 -r ad321ff1b67d rapsodyn/mpileupfilteronblastxml.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rapsodyn/mpileupfilteronblastxml.xml Thu Aug 14 08:12:06 2014 -0400 @@ -0,0 +1,21 @@ + +Filter mpileup with blast results + + mpileupfilteronblastxml.pl -input_variant_file $input_variant_file -input_blastxml_file $input_blastxml_file -window_length $window_length -nb_mismatch_max $nb_mismatch_max > $output_file + + + + + + + + + + + + + + + + + diff -r 1d37c16ab674 -r ad321ff1b67d rapsodyn/mpileupfilteronblastxml/mpileupfilteronblastxml.pl --- a/rapsodyn/mpileupfilteronblastxml/mpileupfilteronblastxml.pl Tue Aug 12 08:41:47 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,287 +0,0 @@ -#!/usr/bin/perl -use strict; -use warnings; -use Getopt::Long; - -my $input_variant_file; -my $input_blastxml_file; -my $window_length = 50; -my $nb_mismatch_max = 2; - -GetOptions ( -"input_variant_file=s" => \$input_variant_file, -"input_blastxml_file=s" => \$input_blastxml_file, -"window_length=i" => \$window_length, -"nb_mismatch_max=i" => \$nb_mismatch_max -) or die("Error in command line arguments\n"); - - -open(INB, $input_blastxml_file) or die ("Can't open $input_blastxml_file\n"); - - -my $iteration_stop=""; -my $hit_stop=""; -my $hsp_stop=""; -my $query_flag_start=""; -my $query_flag_stop=""; -my $subject_flag_start=""; -my $subject_flag_stop=""; -my $query_HSP_flag_from_start=""; -my $query_HSP_flag_from_stop=""; -my $query_HSP_flag_to_start=""; -my $query_HSP_flag_to_stop=""; -my $subject_HSP_flag_from_start=""; -my $subject_HSP_flag_from_stop=""; -my $subject_HSP_flag_to_start=""; -my $subject_HSP_flag_to_stop=""; -my $query_HSP_flag_seq_start=""; -my $query_HSP_flag_seq_stop=""; -my $subject_HSP_flag_seq_start=""; -my $subject_HSP_flag_seq_stop=""; -my $HSP_midline_flag_start=""; -my $HSP_midline_flag_stop=""; - -my %hash; - -my $compt=0; -my $current_query=""; -my $current_subject=""; -my $current_HSP_query_from; -my $current_HSP_query_to; -my $current_HSP_subject_from; -my $current_HSP_subject_to; -my $current_HSP_midline; -my $current_HSP_qseq; -my $current_HSP_hseq; -my $current_query_short; - -while (my $ligne = ) { - - if ($ligne=~/$subject_flag_start(.*?)$subject_flag_stop/){ - $current_subject=$1; - #print "--",$1,"\n"; - } - - if ($ligne=~/$query_flag_start(.*?)$query_flag_stop/){ - $current_query=$1; - $current_query_short=$current_query; - if ($current_query =~ /^(.*?_\d+)\_/){ - $current_query_short=$1; - } - if (!$hash{$current_query_short}){ - $hash{$current_query_short}=0; - } - - - #print "--",$1,"\n"; - } - if ($ligne=~/$query_HSP_flag_from_start(.*?)$query_HSP_flag_from_stop/){ - $current_HSP_query_from=$1; - #print "--",$1,"..."; - } - if ($ligne=~/$query_HSP_flag_to_start(.*?)$query_HSP_flag_to_stop/){ - $current_HSP_query_to=$1; - #print $1,"\n"; - } - if ($ligne=~/$subject_HSP_flag_from_start(.*?)$subject_HSP_flag_from_stop/){ - $current_HSP_subject_from=$1; - #print "--",$1,"..."; - } - if ($ligne=~/$subject_HSP_flag_to_start(.*?)$subject_HSP_flag_to_stop/){ - $current_HSP_subject_to=$1; - #print $1,"\n"; - } - if ($ligne=~/$query_HSP_flag_seq_start(.*?)$query_HSP_flag_seq_stop/){ - $current_HSP_qseq=$1; - #print "--",$1,"\n"; - } - if ($ligne=~/$subject_HSP_flag_seq_start(.*?)$subject_HSP_flag_seq_stop/){ - $current_HSP_hseq=$1; - #print "--",$1,"\n"; - } - if ($ligne=~/$HSP_midline_flag_start(.*?)$HSP_midline_flag_stop/){ - $current_HSP_midline=$1; - #print "--",$1,"\n"; - } - - if ($ligne=~/$hsp_stop/){ - if ($current_HSP_query_from){ - #print "\ntest1\n"; - #print "Query : $current_query\n"; - #print "Subject : $current_subject\n"; - #print "$current_HSP_query_from ... $current_HSP_query_to\n"; - #print "$current_HSP_subject_from ... $current_HSP_subject_to\n"; - for (my $i=1;$i<$current_HSP_query_from;$i++){ - $current_HSP_qseq = "N".$current_HSP_qseq; - $current_HSP_midline = " ".$current_HSP_midline; - $current_HSP_hseq = "N".$current_HSP_hseq; - } - for (my $i=$current_HSP_query_to+1;$i<=$window_length*2+1;$i++){ - $current_HSP_qseq .= "N"; - $current_HSP_midline .= " "; - $current_HSP_hseq .= "N"; - } - - my @qseq = split(//,$current_HSP_qseq); - my @midline = split(//,$current_HSP_midline); - my @hseq = split(//,$current_HSP_hseq); - - my $comptbase=0; - my $compt5p=0; - my $compt3p=0; - for (my $i=0;$i<=$#qseq;$i++){ - if ($qseq[$i] ne "-"){ - $comptbase++; # Va de 1 -> $window_length *2 +1 - } - if ($midline[$i] eq " "){ - if ($comptbase<=$window_length){ #1 -> $window_length - $compt5p++; - } - elsif ($comptbase>=$window_length+2){ #$window_length+2 -> $window_length *2 + 1; - $compt3p++; - } - else { #+1-$window_length*2+1 - - } - } - } - if (($compt3p<=$nb_mismatch_max)||($compt5p<=$nb_mismatch_max)){ - $hash{$current_query_short}++; - } - - #print "$current_HSP_qseq\n"; - #print "$current_HSP_midline\n"; - #print "$current_HSP_hseq\n"; - #print "$compt5p // $compt3p\n"; - #print $hash{$current_query_short},"\n"; - - } - - undef $current_HSP_query_from; - undef $current_HSP_query_to; - undef $current_HSP_subject_from; - undef $current_HSP_subject_to; - $current_HSP_midline=""; - $current_HSP_qseq=""; - $current_HSP_hseq=""; - } - - if ($ligne=~/$iteration_stop/){ - if ($current_HSP_query_from){ - #print "\ntest2\n"; - #print "Query : $current_query\n"; - #print "Subject : $current_subject\n"; - #print "$current_HSP_query_from ... $current_HSP_query_to\n"; - #print "$current_HSP_subject_from ... $current_HSP_subject_to\n"; - for (my $i=1;$i<$current_HSP_query_from;$i++){ - $current_HSP_qseq = "N".$current_HSP_qseq; - $current_HSP_midline = " ".$current_HSP_midline; - $current_HSP_hseq = "N".$current_HSP_hseq; - } - for (my $i=$current_HSP_query_to+1;$i<=$window_length*2+1;$i++){ - $current_HSP_qseq .= "N"; - $current_HSP_midline .= " "; - $current_HSP_hseq .= "N"; - } - - my @qseq = split(//,$current_HSP_qseq); - my @midline = split(//,$current_HSP_midline); - my @hseq = split(//,$current_HSP_hseq); - - my $comptbase=0; - my $compt5p=0; - my $compt3p=0; - for (my $i=0;$i<=$#qseq;$i++){ - if ($qseq[$i] ne "-"){ - $comptbase++; # Va de 1 -> $window_length *2 +1 - } - if ($midline[$i] eq " "){ - if ($comptbase<=$window_length){ #1 -> $window_length - $compt5p++; - } - elsif ($comptbase>=$window_length+2){ #$window_length+2 -> $window_length *2 + 1; - $compt3p++; - } - else { #+1-$window_length*2+1 - - } - } - } - if (($compt3p<=$nb_mismatch_max)||($compt5p<=$nb_mismatch_max)){ - $hash{$current_query_short}++; - } - - #print "$current_HSP_qseq\n"; - #print "$current_HSP_midline\n"; - #print "$current_HSP_hseq\n"; - #print "$compt5p // $compt3p\n"; - #print $hash{$current_query_short},"\n"; - } - $current_query=""; - $current_query_short=""; - $current_subject=""; - undef $current_HSP_query_from; - undef $current_HSP_query_to; - undef $current_HSP_subject_from; - undef $current_HSP_subject_to; - $current_HSP_midline=""; - $current_HSP_qseq=""; - $current_HSP_hseq=""; - } - -} - -close (INB); - -# foreach my $key (sort trinombre keys %hash){ - # print $key," ",$hash{$key},"\n"; - -# } -# exit(0); - -open(INV, $input_variant_file) or die ("Can't open $input_variant_file\n"); - -while (my $ligne = ) { - my @champs = split (/\s+/,$ligne); - my $header = $champs[0]."_".$champs[1]; - - if ($hash{$header}){ - if ($hash{$header}==1){ - print "$ligne"; - } - else { - #print $hash{$header}," $ligne"; - } - } - else { - print STDERR "No blast result for ",$header,"\n"; - } - - -} - -close(INV); - - - -sub trinombre { - my $chra=$a; - my $posa=0; - my $chrb=$b; - my $posb=0; - - if ($a =~/(.*?)\_(\d+)/){ - $chra=$1; - $posa=$2; - } - if ($b =~/(.*?)\_(\d+)/){ - $chrb=$1; - $posb=$2; - } - - - $chra cmp $chrb - || - $posa <=> $posb; -} - diff -r 1d37c16ab674 -r ad321ff1b67d rapsodyn/mpileupfilteronblastxml/mpileupfilteronblastxml.xml --- a/rapsodyn/mpileupfilteronblastxml/mpileupfilteronblastxml.xml Tue Aug 12 08:41:47 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ - -Filter mpileup with blast results - - mpileupfilteronblastxml.pl -input_variant_file $input_variant_file -input_blastxml_file $input_blastxml_file -window_length $window_length -nb_mismatch_max $nb_mismatch_max > $output_file - - - - - - - - - - - - - - - - - diff -r 1d37c16ab674 -r ad321ff1b67d rapsodyn/repository_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rapsodyn/repository_dependencies.xml Thu Aug 14 08:12:06 2014 -0400 @@ -0,0 +1,10 @@ + + + + + + + + + +