Mercurial > repos > mcharles > rapsosnp
changeset 15:56d328bce3a7 draft default tip
Uploaded
line wrap: on
line diff
--- a/rapsodyn/CreateMatrixMultiple.pl Mon Jan 26 18:10:52 2015 -0500 +++ b/rapsodyn/CreateMatrixMultiple.pl Thu Jan 29 08:54:06 2015 -0500 @@ -11,7 +11,136 @@ ) or die("Error in command line arguments\n"); my @files = split(/,/,$input_matrix_files); +my @tbl_hash; +my %global_hash; +my @tbl_genotype_names; +my %chromosome_hash; +my %result_by_chr; +my @tbl_chr_name; + for (my $i=0;$i<=$#files;$i++){ - print $files[$i],"\n"; + my $current_file = $files[$i]; + my $current_genotype = "NA"; + my %current_hash; + + open (CF,$current_file) or die ("Can't open file $current_file\n"); + my $header = <CF>; + if ($header =~ /Chrom\s*Pos\s*Ref\s*(.*?)\s*$/){ + $current_genotype = $1; + } + else { + print STDERR "Unable to recognize header in matrix file\n$header\n"; + exit(0); + } + while (my $line=<CF>){ + if ($line!~/^\s*$/){ + my @fields = split (/\t+/,$line); + my $chr; + my $pos; + my $ref; + my $variant; + + if ($fields[0]=~/^\s*([\w\-]+)\s*$/){ + $chr = $1; + } + else { + print STDERR "Unable to detect chromosome in matrix file\n$line\n"; + exit(0); + } + if ($fields[1]=~/^\s*(\d+)\s*$/){ + $pos = $1; + } + else { + print STDERR "Unable to detect position in matrix file\n$line\n"; + exit(0); + } + + if ($fields[2]=~/^\s*([ATGCNX])\s*$/i){ + $ref = $1; + } + else { + print STDERR "Unable to detect reference base in matrix file\n$line\n"; + exit(0); + } + + if ($fields[3]=~/^\s*([\w\/]+)\s*$/i){ + $variant = $1; + } + else { + print STDERR "Unable to detect variant in matrix file\n$line\n"; + exit(0); + } + $current_hash{"$chr#$pos"} = $variant; + $global_hash{"$chr#$pos"} = $ref; + } + } + close CF; + push(@tbl_genotype_names,$current_genotype); + push(@tbl_hash,\%current_hash); } +print "Chrom\tPos\tRef"; +for (my $i=0;$i<=$#tbl_genotype_names;$i++){ + print "\t".$tbl_genotype_names[$i]; +} +print "\n"; + +my @tbl_line_to_display; + +#exit(0); + +foreach my $key (keys %global_hash){ + my @tbl = split (/\#/,$key); + my $chr = $tbl[0]; + my $pos = $tbl[1]; + my $ref = $global_hash{$key}; + my $line = "$chr\t$pos\t$ref"; + my $isvariant = 0; + for (my $i=0;$i<=$#tbl_hash;$i++){ + #my %current_hash = %{$tbl_hash[$i]}; + if ($tbl_hash[$i]->{$key}){ + $line.="\t".$tbl_hash[$i]->{$key}; + if ($tbl_hash[$i]->{$key} ne $ref){ + $isvariant = 1; + } + } + else { + $line.="\t"."NA"; + } + + } + + $line .="\n"; + if ($isvariant == 1){ + push(@tbl_line_to_display,$line); + } +} + + +for (my $i=0;$i<=$#tbl_line_to_display;$i++){ + my @tbl = split (/\s+/,$tbl_line_to_display[$i]); + my $current_chr = $tbl[0]; + my @current_tbl; + if ($result_by_chr{$current_chr}){ + push (@{$result_by_chr{$current_chr}},$tbl_line_to_display[$i]); + } + else { + push (@current_tbl,$tbl_line_to_display[$i]); + $result_by_chr{$current_chr} = \@current_tbl; + } +} + +foreach my $key (sort keys %result_by_chr){ + my @current_tbl = sort mysort @{$result_by_chr{$key}}; + for (my $i=0;$i<=$#current_tbl;$i++){ + print $current_tbl[$i]; + } +} + +sub mysort { + my @tbla = split (/\s+/,$a); + my @tblb = split (/\s+/,$b); + $tbla[0] cmp $tblb[0] || $tbla[1]<=>$tblb[1]; +} + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rapsodyn/Galaxy-Workflow-rapsosnp_v1.5.ga Thu Jan 29 08:54:06 2015 -0500 @@ -0,0 +1,2918 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "", + "format-version": "0.1", + "name": "rapsosnp v1.5", + "steps": { + "0": { + "annotation": "", + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "READ1" + } + ], + "name": "Input dataset", + "outputs": [], + "position": { + "left": 200, + "top": 805 + }, + "tool_errors": null, + "tool_id": null, + "tool_state": "{\"name\": \"READ1\"}", + "tool_version": null, + "type": "data_input", + "user_outputs": [] + }, + "1": { + "annotation": "", + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "READ2" + } + ], + "name": "Input dataset", + "outputs": [], + "position": { + "left": 203, + "top": 905 + }, + "tool_errors": null, + "tool_id": null, + "tool_state": "{\"name\": \"READ2\"}", + "tool_version": null, + "type": "data_input", + "user_outputs": [] + }, + "2": { + "annotation": "", + "id": 2, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "ASSEMBLY" + } + ], + "name": "Input dataset", + "outputs": [], + "position": { + "left": 3465, + "top": 399 + }, + "tool_errors": null, + "tool_id": null, + "tool_state": "{\"name\": \"ASSEMBLY\"}", + "tool_version": null, + "type": "data_input", + "user_outputs": [] + }, + "3": { + "annotation": "", + "id": 3, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "EXCLUDED POSITIONS" + } + ], + "name": "Input dataset", + "outputs": [], + "position": { + "left": 3421.25, + "top": 1189.75 + }, + "tool_errors": null, + "tool_id": null, + "tool_state": "{\"name\": \"EXCLUDED POSITIONS\"}", + "tool_version": null, + "type": "data_input", + "user_outputs": [] + }, + "4": { + "annotation": "", + "id": 4, + "input_connections": { + "input_file": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [], + "name": "SplitFileUpTo10x", + "outputs": [ + { + "name": "output_file1", + "type": "txt" + }, + { + "name": "output_file2", + "type": "txt" + }, + { + "name": "output_file3", + "type": "txt" + }, + { + "name": "output_file4", + "type": "txt" + }, + { + "name": "output_file5", + "type": "txt" + }, + { + "name": "output_file6", + "type": "txt" + }, + { + "name": "output_file7", + "type": "txt" + }, + { + "name": "output_file8", + "type": "txt" + }, + { + "name": "output_file9", + "type": "txt" + }, + { + "name": "output_file10", + "type": "txt" + } + ], + "position": { + "left": 389, + "top": 644 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "SplitFileUpTo10x", + "tool_state": "{\"out_format\": \"\\\"fastq\\\"\", \"line_number\": \"\\\"4\\\"\", \"file_number\": \"\\\"4\\\"\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_file\": \"null\"}", + "tool_version": "1.00", + "type": "tool", + "user_outputs": [] + }, + "5": { + "annotation": "", + "id": 5, + "input_connections": { + "input_file": { + "id": 1, + "output_name": "output" + } + }, + "inputs": [], + "name": "SplitFileUpTo10x", + "outputs": [ + { + "name": "output_file1", + "type": "txt" + }, + { + "name": "output_file2", + "type": "txt" + }, + { + "name": "output_file3", + "type": "txt" + }, + { + "name": "output_file4", + "type": "txt" + }, + { + "name": "output_file5", + "type": "txt" + }, + { + "name": "output_file6", + "type": "txt" + }, + { + "name": "output_file7", + "type": "txt" + }, + { + "name": "output_file8", + "type": "txt" + }, + { + "name": "output_file9", + "type": "txt" + }, + { + "name": "output_file10", + "type": "txt" + } + ], + "position": { + "left": 392, + "top": 1175 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "SplitFileUpTo10x", + "tool_state": "{\"out_format\": \"\\\"fastq\\\"\", \"line_number\": \"\\\"4\\\"\", \"file_number\": \"\\\"4\\\"\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_file\": \"null\"}", + "tool_version": "1.00", + "type": "tool", + "user_outputs": [] + }, + "6": { + "annotation": "", + "id": 6, + "input_connections": { + "input_fasta": { + "id": 2, + "output_name": "output" + } + }, + "inputs": [], + "name": "fastaGroomerForMakeBlastdb", + "outputs": [ + { + "name": "output_fasta", + "type": "fasta" + } + ], + "position": { + "left": 4557.25, + "top": 278.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "fastaGroomerForMakeBlastdb", + "tool_state": "{\"input_fasta\": \"null\", \"__rerun_remap_job_id__\": null, \"__page__\": 0}", + "tool_version": "1.00", + "type": "tool", + "user_outputs": [] + }, + "7": { + "annotation": "", + "id": 7, + "input_connections": { + "input_read1_file": { + "id": 4, + "output_name": "output_file1" + }, + "input_read2_file": { + "id": 5, + "output_name": "output_file1" + } + }, + "inputs": [], + "name": "PrepareFastqLight", + "outputs": [ + { + "name": "output_read1_file", + "type": "fastqsanger" + }, + { + "name": "output_read2_file", + "type": "fastqsanger" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 718.75, + "top": 626.25 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "PrepareFastqLight", + "tool_state": "{\"__page__\": 0, \"quality_type\": \"\\\"auto\\\"\", \"min_length\": \"\\\"30\\\"\", \"__rerun_remap_job_id__\": null, \"min_quality\": \"\\\"30\\\"\", \"input_read1_file\": \"null\", \"input_read2_file\": \"null\"}", + "tool_version": "1.11", + "type": "tool", + "user_outputs": [] + }, + "8": { + "annotation": "", + "id": 8, + "input_connections": { + "input_read1_file": { + "id": 4, + "output_name": "output_file2" + }, + "input_read2_file": { + "id": 5, + "output_name": "output_file2" + } + }, + "inputs": [], + "name": "PrepareFastqLight", + "outputs": [ + { + "name": "output_read1_file", + "type": "fastqsanger" + }, + { + "name": "output_read2_file", + "type": "fastqsanger" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 722.75, + "top": 839.25 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "PrepareFastqLight", + "tool_state": "{\"__page__\": 0, \"quality_type\": \"\\\"auto\\\"\", \"min_length\": \"\\\"30\\\"\", \"__rerun_remap_job_id__\": null, \"min_quality\": \"\\\"30\\\"\", \"input_read1_file\": \"null\", \"input_read2_file\": \"null\"}", + "tool_version": "1.11", + "type": "tool", + "user_outputs": [] + }, + "9": { + "annotation": "", + "id": 9, + "input_connections": { + "input_read1_file": { + "id": 4, + "output_name": "output_file6" + }, + "input_read2_file": { + "id": 5, + "output_name": "output_file6" + } + }, + "inputs": [], + "name": "PrepareFastqLight", + "outputs": [ + { + "name": "output_read1_file", + "type": "fastqsanger" + }, + { + "name": "output_read2_file", + "type": "fastqsanger" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 1028.75, + "top": 624.25 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "PrepareFastqLight", + "tool_state": "{\"__page__\": 0, \"quality_type\": \"\\\"auto\\\"\", \"min_length\": \"\\\"30\\\"\", \"__rerun_remap_job_id__\": null, \"min_quality\": \"\\\"30\\\"\", \"input_read1_file\": \"null\", \"input_read2_file\": \"null\"}", + "tool_version": "1.11", + "type": "tool", + "user_outputs": [] + }, + "10": { + "annotation": "", + "id": 10, + "input_connections": { + "input_read1_file": { + "id": 4, + "output_name": "output_file3" + }, + "input_read2_file": { + "id": 5, + "output_name": "output_file3" + } + }, + "inputs": [], + "name": "PrepareFastqLight", + "outputs": [ + { + "name": "output_read1_file", + "type": "fastqsanger" + }, + { + "name": "output_read2_file", + "type": "fastqsanger" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 721.75, + "top": 1059.25 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "PrepareFastqLight", + "tool_state": "{\"__page__\": 0, \"quality_type\": \"\\\"auto\\\"\", \"min_length\": \"\\\"30\\\"\", \"__rerun_remap_job_id__\": null, \"min_quality\": \"\\\"30\\\"\", \"input_read1_file\": \"null\", \"input_read2_file\": \"null\"}", + "tool_version": "1.11", + "type": "tool", + "user_outputs": [] + }, + "11": { + "annotation": "", + "id": 11, + "input_connections": { + "input_read1_file": { + "id": 4, + "output_name": "output_file7" + }, + "input_read2_file": { + "id": 5, + "output_name": "output_file7" + } + }, + "inputs": [], + "name": "PrepareFastqLight", + "outputs": [ + { + "name": "output_read1_file", + "type": "fastqsanger" + }, + { + "name": "output_read2_file", + "type": "fastqsanger" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 1033.75, + "top": 844.25 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "PrepareFastqLight", + "tool_state": "{\"__page__\": 0, \"quality_type\": \"\\\"auto\\\"\", \"min_length\": \"\\\"30\\\"\", \"__rerun_remap_job_id__\": null, \"min_quality\": \"\\\"30\\\"\", \"input_read1_file\": \"null\", \"input_read2_file\": \"null\"}", + "tool_version": "1.11", + "type": "tool", + "user_outputs": [] + }, + "12": { + "annotation": "", + "id": 12, + "input_connections": { + "input_read1_file": { + "id": 4, + "output_name": "output_file4" + }, + "input_read2_file": { + "id": 5, + "output_name": "output_file4" + } + }, + "inputs": [], + "name": "PrepareFastqLight", + "outputs": [ + { + "name": "output_read1_file", + "type": "fastqsanger" + }, + { + "name": "output_read2_file", + "type": "fastqsanger" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 723.75, + "top": 1274.25 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "PrepareFastqLight", + "tool_state": "{\"__page__\": 0, \"quality_type\": \"\\\"auto\\\"\", \"min_length\": \"\\\"30\\\"\", \"__rerun_remap_job_id__\": null, \"min_quality\": \"\\\"30\\\"\", \"input_read1_file\": \"null\", \"input_read2_file\": \"null\"}", + "tool_version": "1.11", + "type": "tool", + "user_outputs": [] + }, + "13": { + "annotation": "", + "id": 13, + "input_connections": { + "input_read1_file": { + "id": 4, + "output_name": "output_file8" + }, + "input_read2_file": { + "id": 5, + "output_name": "output_file8" + } + }, + "inputs": [], + "name": "PrepareFastqLight", + "outputs": [ + { + "name": "output_read1_file", + "type": "fastqsanger" + }, + { + "name": "output_read2_file", + "type": "fastqsanger" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 1033.75, + "top": 1068.25 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "PrepareFastqLight", + "tool_state": "{\"__page__\": 0, \"quality_type\": \"\\\"auto\\\"\", \"min_length\": \"\\\"30\\\"\", \"__rerun_remap_job_id__\": null, \"min_quality\": \"\\\"30\\\"\", \"input_read1_file\": \"null\", \"input_read2_file\": \"null\"}", + "tool_version": "1.11", + "type": "tool", + "user_outputs": [] + }, + "14": { + "annotation": "", + "id": 14, + "input_connections": { + "input_read1_file": { + "id": 4, + "output_name": "output_file9" + }, + "input_read2_file": { + "id": 5, + "output_name": "output_file9" + } + }, + "inputs": [], + "name": "PrepareFastqLight", + "outputs": [ + { + "name": "output_read1_file", + "type": "fastqsanger" + }, + { + "name": "output_read2_file", + "type": "fastqsanger" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 1037.75, + "top": 1281.25 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "PrepareFastqLight", + "tool_state": "{\"__page__\": 0, \"quality_type\": \"\\\"auto\\\"\", \"min_length\": \"\\\"30\\\"\", \"__rerun_remap_job_id__\": null, \"min_quality\": \"\\\"30\\\"\", \"input_read1_file\": \"null\", \"input_read2_file\": \"null\"}", + "tool_version": "1.11", + "type": "tool", + "user_outputs": [] + }, + "15": { + "annotation": "", + "id": 15, + "input_connections": { + "input_read1_file": { + "id": 4, + "output_name": "output_file5" + }, + "input_read2_file": { + "id": 5, + "output_name": "output_file5" + } + }, + "inputs": [], + "name": "PrepareFastqLight", + "outputs": [ + { + "name": "output_read1_file", + "type": "fastqsanger" + }, + { + "name": "output_read2_file", + "type": "fastqsanger" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 725.75, + "top": 1495.25 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "PrepareFastqLight", + "tool_state": "{\"__page__\": 0, \"quality_type\": \"\\\"auto\\\"\", \"min_length\": \"\\\"30\\\"\", \"__rerun_remap_job_id__\": null, \"min_quality\": \"\\\"30\\\"\", \"input_read1_file\": \"null\", \"input_read2_file\": \"null\"}", + "tool_version": "1.11", + "type": "tool", + "user_outputs": [] + }, + "16": { + "annotation": "", + "id": 16, + "input_connections": { + "input_read1_file": { + "id": 4, + "output_name": "output_file10" + }, + "input_read2_file": { + "id": 5, + "output_name": "output_file10" + } + }, + "inputs": [], + "name": "PrepareFastqLight", + "outputs": [ + { + "name": "output_read1_file", + "type": "fastqsanger" + }, + { + "name": "output_read2_file", + "type": "fastqsanger" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 1039.75, + "top": 1496.25 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "PrepareFastqLight", + "tool_state": "{\"__page__\": 0, \"quality_type\": \"\\\"auto\\\"\", \"min_length\": \"\\\"30\\\"\", \"__rerun_remap_job_id__\": null, \"min_quality\": \"\\\"30\\\"\", \"input_read1_file\": \"null\", \"input_read2_file\": \"null\"}", + "tool_version": "1.11", + "type": "tool", + "user_outputs": [] + }, + "17": { + "annotation": "", + "id": 17, + "input_connections": { + "input_file": { + "id": 6, + "output_name": "output_fasta" + } + }, + "inputs": [], + "name": "NCBI BLAST+ makeblastdb", + "outputs": [ + { + "name": "outfile", + "type": "data" + } + ], + "position": { + "left": 4974.25, + "top": 271.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_makeblastdb/0.1.00", + "tool_state": "{\"__page__\": 0, \"mask_data_file\": \"null\", \"input_file\": \"null\", \"dbtype\": \"\\\"nucl\\\"\", \"__rerun_remap_job_id__\": null, \"hash_index\": \"\\\"True\\\"\", \"tax\": \"{\\\"taxselect\\\": \\\"\\\", \\\"__current_case__\\\": 0}\", \"title\": \"\\\"\\\"\", \"parse_seqids\": \"\\\"False\\\"\"}", + "tool_version": "0.1.00", + "type": "tool", + "user_outputs": [] + }, + "18": { + "annotation": "", + "id": 18, + "input_connections": { + "genomeSource|ownFile": { + "id": 2, + "output_name": "output" + }, + "paired|input1": { + "id": 7, + "output_name": "output_read1_file" + }, + "paired|input2": { + "id": 7, + "output_name": "output_read2_file" + } + }, + "inputs": [], + "name": "Map with BWA for Illumina", + "outputs": [ + { + "name": "output", + "type": "sam" + } + ], + "position": { + "left": 1378, + "top": 625 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/bwa_wrappers/bwa_wrapper/1.2.3", + "tool_state": "{\"genomeSource\": \"{\\\"refGenomeSource\\\": \\\"history\\\", \\\"ownFile\\\": null, \\\"__current_case__\\\": 1}\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"paired\": \"{\\\"input2\\\": null, \\\"sPaired\\\": \\\"paired\\\", \\\"input1\\\": null, \\\"__current_case__\\\": 1}\", \"params\": \"{\\\"__current_case__\\\": 0, \\\"source_select\\\": \\\"pre_set\\\"}\", \"suppressHeader\": \"\\\"False\\\"\"}", + "tool_version": "1.2.3", + "type": "tool", + "user_outputs": [] + }, + "19": { + "annotation": "", + "id": 19, + "input_connections": { + "genomeSource|ownFile": { + "id": 2, + "output_name": "output" + }, + "paired|input1": { + "id": 8, + "output_name": "output_read1_file" + }, + "paired|input2": { + "id": 8, + "output_name": "output_read2_file" + } + }, + "inputs": [], + "name": "Map with BWA for Illumina", + "outputs": [ + { + "name": "output", + "type": "sam" + } + ], + "position": { + "left": 1391, + "top": 852 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/bwa_wrappers/bwa_wrapper/1.2.3", + "tool_state": "{\"genomeSource\": \"{\\\"refGenomeSource\\\": \\\"history\\\", \\\"ownFile\\\": null, \\\"__current_case__\\\": 1}\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"paired\": \"{\\\"input2\\\": null, \\\"sPaired\\\": \\\"paired\\\", \\\"input1\\\": null, \\\"__current_case__\\\": 1}\", \"params\": \"{\\\"__current_case__\\\": 0, \\\"source_select\\\": \\\"pre_set\\\"}\", \"suppressHeader\": \"\\\"False\\\"\"}", + "tool_version": "1.2.3", + "type": "tool", + "user_outputs": [] + }, + "20": { + "annotation": "", + "id": 20, + "input_connections": { + "genomeSource|ownFile": { + "id": 2, + "output_name": "output" + }, + "paired|input1": { + "id": 9, + "output_name": "output_read1_file" + }, + "paired|input2": { + "id": 9, + "output_name": "output_read2_file" + } + }, + "inputs": [], + "name": "Map with BWA for Illumina", + "outputs": [ + { + "name": "output", + "type": "sam" + } + ], + "position": { + "left": 1660, + "top": 622 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/bwa_wrappers/bwa_wrapper/1.2.3", + "tool_state": "{\"genomeSource\": \"{\\\"refGenomeSource\\\": \\\"history\\\", \\\"ownFile\\\": null, \\\"__current_case__\\\": 1}\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"paired\": \"{\\\"input2\\\": null, \\\"sPaired\\\": \\\"paired\\\", \\\"input1\\\": null, \\\"__current_case__\\\": 1}\", \"params\": \"{\\\"__current_case__\\\": 0, \\\"source_select\\\": \\\"pre_set\\\"}\", \"suppressHeader\": \"\\\"False\\\"\"}", + "tool_version": "1.2.3", + "type": "tool", + "user_outputs": [] + }, + "21": { + "annotation": "", + "id": 21, + "input_connections": { + "genomeSource|ownFile": { + "id": 2, + "output_name": "output" + }, + "paired|input1": { + "id": 10, + "output_name": "output_read1_file" + }, + "paired|input2": { + "id": 10, + "output_name": "output_read2_file" + } + }, + "inputs": [], + "name": "Map with BWA for Illumina", + "outputs": [ + { + "name": "output", + "type": "sam" + } + ], + "position": { + "left": 1383, + "top": 1060 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/bwa_wrappers/bwa_wrapper/1.2.3", + "tool_state": "{\"genomeSource\": \"{\\\"refGenomeSource\\\": \\\"history\\\", \\\"ownFile\\\": null, \\\"__current_case__\\\": 1}\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"paired\": \"{\\\"input2\\\": null, \\\"sPaired\\\": \\\"paired\\\", \\\"input1\\\": null, \\\"__current_case__\\\": 1}\", \"params\": \"{\\\"__current_case__\\\": 0, \\\"source_select\\\": \\\"pre_set\\\"}\", \"suppressHeader\": \"\\\"False\\\"\"}", + "tool_version": "1.2.3", + "type": "tool", + "user_outputs": [] + }, + "22": { + "annotation": "", + "id": 22, + "input_connections": { + "genomeSource|ownFile": { + "id": 2, + "output_name": "output" + }, + "paired|input1": { + "id": 11, + "output_name": "output_read1_file" + }, + "paired|input2": { + "id": 11, + "output_name": "output_read2_file" + } + }, + "inputs": [], + "name": "Map with BWA for Illumina", + "outputs": [ + { + "name": "output", + "type": "sam" + } + ], + "position": { + "left": 1663, + "top": 845 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/bwa_wrappers/bwa_wrapper/1.2.3", + "tool_state": "{\"genomeSource\": \"{\\\"refGenomeSource\\\": \\\"history\\\", \\\"ownFile\\\": null, \\\"__current_case__\\\": 1}\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"paired\": \"{\\\"input2\\\": null, \\\"sPaired\\\": \\\"paired\\\", \\\"input1\\\": null, \\\"__current_case__\\\": 1}\", \"params\": \"{\\\"__current_case__\\\": 0, \\\"source_select\\\": \\\"pre_set\\\"}\", \"suppressHeader\": \"\\\"False\\\"\"}", + "tool_version": "1.2.3", + "type": "tool", + "user_outputs": [] + }, + "23": { + "annotation": "", + "id": 23, + "input_connections": { + "genomeSource|ownFile": { + "id": 2, + "output_name": "output" + }, + "paired|input1": { + "id": 12, + "output_name": "output_read1_file" + }, + "paired|input2": { + "id": 12, + "output_name": "output_read2_file" + } + }, + "inputs": [], + "name": "Map with BWA for Illumina", + "outputs": [ + { + "name": "output", + "type": "sam" + } + ], + "position": { + "left": 1391, + "top": 1277 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/bwa_wrappers/bwa_wrapper/1.2.3", + "tool_state": "{\"genomeSource\": \"{\\\"refGenomeSource\\\": \\\"history\\\", \\\"ownFile\\\": null, \\\"__current_case__\\\": 1}\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"paired\": \"{\\\"input2\\\": null, \\\"sPaired\\\": \\\"paired\\\", \\\"input1\\\": null, \\\"__current_case__\\\": 1}\", \"params\": \"{\\\"__current_case__\\\": 0, \\\"source_select\\\": \\\"pre_set\\\"}\", \"suppressHeader\": \"\\\"False\\\"\"}", + "tool_version": "1.2.3", + "type": "tool", + "user_outputs": [] + }, + "24": { + "annotation": "", + "id": 24, + "input_connections": { + "genomeSource|ownFile": { + "id": 2, + "output_name": "output" + }, + "paired|input1": { + "id": 13, + "output_name": "output_read1_file" + }, + "paired|input2": { + "id": 13, + "output_name": "output_read2_file" + } + }, + "inputs": [], + "name": "Map with BWA for Illumina", + "outputs": [ + { + "name": "output", + "type": "sam" + } + ], + "position": { + "left": 1663, + "top": 1059 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/bwa_wrappers/bwa_wrapper/1.2.3", + "tool_state": "{\"genomeSource\": \"{\\\"refGenomeSource\\\": \\\"history\\\", \\\"ownFile\\\": null, \\\"__current_case__\\\": 1}\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"paired\": \"{\\\"input2\\\": null, \\\"sPaired\\\": \\\"paired\\\", \\\"input1\\\": null, \\\"__current_case__\\\": 1}\", \"params\": \"{\\\"__current_case__\\\": 0, \\\"source_select\\\": \\\"pre_set\\\"}\", \"suppressHeader\": \"\\\"False\\\"\"}", + "tool_version": "1.2.3", + "type": "tool", + "user_outputs": [] + }, + "25": { + "annotation": "", + "id": 25, + "input_connections": { + "genomeSource|ownFile": { + "id": 2, + "output_name": "output" + }, + "paired|input1": { + "id": 14, + "output_name": "output_read1_file" + }, + "paired|input2": { + "id": 14, + "output_name": "output_read2_file" + } + }, + "inputs": [], + "name": "Map with BWA for Illumina", + "outputs": [ + { + "name": "output", + "type": "sam" + } + ], + "position": { + "left": 1671, + "top": 1281 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/bwa_wrappers/bwa_wrapper/1.2.3", + "tool_state": "{\"genomeSource\": \"{\\\"refGenomeSource\\\": \\\"history\\\", \\\"ownFile\\\": null, \\\"__current_case__\\\": 1}\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"paired\": \"{\\\"input2\\\": null, \\\"sPaired\\\": \\\"paired\\\", \\\"input1\\\": null, \\\"__current_case__\\\": 1}\", \"params\": \"{\\\"__current_case__\\\": 0, \\\"source_select\\\": \\\"pre_set\\\"}\", \"suppressHeader\": \"\\\"False\\\"\"}", + "tool_version": "1.2.3", + "type": "tool", + "user_outputs": [] + }, + "26": { + "annotation": "", + "id": 26, + "input_connections": { + "genomeSource|ownFile": { + "id": 2, + "output_name": "output" + }, + "paired|input1": { + "id": 15, + "output_name": "output_read1_file" + }, + "paired|input2": { + "id": 15, + "output_name": "output_read2_file" + } + }, + "inputs": [], + "name": "Map with BWA for Illumina", + "outputs": [ + { + "name": "output", + "type": "sam" + } + ], + "position": { + "left": 1397, + "top": 1497 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/bwa_wrappers/bwa_wrapper/1.2.3", + "tool_state": "{\"genomeSource\": \"{\\\"refGenomeSource\\\": \\\"history\\\", \\\"ownFile\\\": null, \\\"__current_case__\\\": 1}\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"paired\": \"{\\\"input2\\\": null, \\\"sPaired\\\": \\\"paired\\\", \\\"input1\\\": null, \\\"__current_case__\\\": 1}\", \"params\": \"{\\\"__current_case__\\\": 0, \\\"source_select\\\": \\\"pre_set\\\"}\", \"suppressHeader\": \"\\\"False\\\"\"}", + "tool_version": "1.2.3", + "type": "tool", + "user_outputs": [] + }, + "27": { + "annotation": "", + "id": 27, + "input_connections": { + "genomeSource|ownFile": { + "id": 2, + "output_name": "output" + }, + "paired|input1": { + "id": 16, + "output_name": "output_read1_file" + }, + "paired|input2": { + "id": 16, + "output_name": "output_read2_file" + } + }, + "inputs": [], + "name": "Map with BWA for Illumina", + "outputs": [ + { + "name": "output", + "type": "sam" + } + ], + "position": { + "left": 1667, + "top": 1494 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/bwa_wrappers/bwa_wrapper/1.2.3", + "tool_state": "{\"genomeSource\": \"{\\\"refGenomeSource\\\": \\\"history\\\", \\\"ownFile\\\": null, \\\"__current_case__\\\": 1}\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"paired\": \"{\\\"input2\\\": null, \\\"sPaired\\\": \\\"paired\\\", \\\"input1\\\": null, \\\"__current_case__\\\": 1}\", \"params\": \"{\\\"__current_case__\\\": 0, \\\"source_select\\\": \\\"pre_set\\\"}\", \"suppressHeader\": \"\\\"False\\\"\"}", + "tool_version": "1.2.3", + "type": "tool", + "user_outputs": [] + }, + "28": { + "annotation": "", + "id": 28, + "input_connections": { + "input_sam_file": { + "id": 18, + "output_name": "output" + } + }, + "inputs": [], + "name": "filtersam_mapped_and_unique", + "outputs": [ + { + "name": "output_sam_file", + "type": "sam" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 1984.75, + "top": 621.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "filtersam_mapped_and_unique", + "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_sam_file\": \"null\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "29": { + "annotation": "", + "id": 29, + "input_connections": { + "input_sam_file": { + "id": 19, + "output_name": "output" + } + }, + "inputs": [], + "name": "filtersam_mapped_and_unique", + "outputs": [ + { + "name": "output_sam_file", + "type": "sam" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 1986.75, + "top": 841.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "filtersam_mapped_and_unique", + "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_sam_file\": \"null\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "30": { + "annotation": "", + "id": 30, + "input_connections": { + "input_sam_file": { + "id": 20, + "output_name": "output" + } + }, + "inputs": [], + "name": "filtersam_mapped_and_unique", + "outputs": [ + { + "name": "output_sam_file", + "type": "sam" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 2287.75, + "top": 623.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "filtersam_mapped_and_unique", + "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_sam_file\": \"null\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "31": { + "annotation": "", + "id": 31, + "input_connections": { + "input_sam_file": { + "id": 21, + "output_name": "output" + } + }, + "inputs": [], + "name": "filtersam_mapped_and_unique", + "outputs": [ + { + "name": "output_sam_file", + "type": "sam" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 1990.75, + "top": 1046.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "filtersam_mapped_and_unique", + "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_sam_file\": \"null\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "32": { + "annotation": "", + "id": 32, + "input_connections": { + "input_sam_file": { + "id": 22, + "output_name": "output" + } + }, + "inputs": [], + "name": "filtersam_mapped_and_unique", + "outputs": [ + { + "name": "output_sam_file", + "type": "sam" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 2289.75, + "top": 840.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "filtersam_mapped_and_unique", + "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_sam_file\": \"null\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "33": { + "annotation": "", + "id": 33, + "input_connections": { + "input_sam_file": { + "id": 23, + "output_name": "output" + } + }, + "inputs": [], + "name": "filtersam_mapped_and_unique", + "outputs": [ + { + "name": "output_sam_file", + "type": "sam" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 1991.75, + "top": 1275.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "filtersam_mapped_and_unique", + "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_sam_file\": \"null\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "34": { + "annotation": "", + "id": 34, + "input_connections": { + "input_sam_file": { + "id": 24, + "output_name": "output" + } + }, + "inputs": [], + "name": "filtersam_mapped_and_unique", + "outputs": [ + { + "name": "output_sam_file", + "type": "sam" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 2293.75, + "top": 1041.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "filtersam_mapped_and_unique", + "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_sam_file\": \"null\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "35": { + "annotation": "", + "id": 35, + "input_connections": { + "input_sam_file": { + "id": 25, + "output_name": "output" + } + }, + "inputs": [], + "name": "filtersam_mapped_and_unique", + "outputs": [ + { + "name": "output_sam_file", + "type": "sam" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 2298.75, + "top": 1272.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "filtersam_mapped_and_unique", + "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_sam_file\": \"null\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "36": { + "annotation": "", + "id": 36, + "input_connections": { + "input_sam_file": { + "id": 26, + "output_name": "output" + } + }, + "inputs": [], + "name": "filtersam_mapped_and_unique", + "outputs": [ + { + "name": "output_sam_file", + "type": "sam" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 1991.75, + "top": 1490.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "filtersam_mapped_and_unique", + "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_sam_file\": \"null\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "37": { + "annotation": "", + "id": 37, + "input_connections": { + "input_sam_file": { + "id": 27, + "output_name": "output" + } + }, + "inputs": [], + "name": "filtersam_mapped_and_unique", + "outputs": [ + { + "name": "output_sam_file", + "type": "sam" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 2308.75, + "top": 1488.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "filtersam_mapped_and_unique", + "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_sam_file\": \"null\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "38": { + "annotation": "", + "id": 38, + "input_connections": { + "input_files": [ + { + "id": 37, + "output_name": "output_sam_file" + }, + { + "id": 36, + "output_name": "output_sam_file" + }, + { + "id": 35, + "output_name": "output_sam_file" + }, + { + "id": 34, + "output_name": "output_sam_file" + }, + { + "id": 33, + "output_name": "output_sam_file" + }, + { + "id": 32, + "output_name": "output_sam_file" + }, + { + "id": 31, + "output_name": "output_sam_file" + }, + { + "id": 30, + "output_name": "output_sam_file" + }, + { + "id": 29, + "output_name": "output_sam_file" + }, + { + "id": 28, + "output_name": "output_sam_file" + } + ] + }, + "inputs": [], + "name": "MergeSamFiles", + "outputs": [ + { + "name": "output_file", + "type": "sam" + } + ], + "position": { + "left": 2785.25, + "top": 992.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "MergeSamFiles", + "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_files\": \"null\"}", + "tool_version": "1.00", + "type": "tool", + "user_outputs": [] + }, + "39": { + "annotation": "", + "id": 39, + "input_connections": { + "source|input1": { + "id": 38, + "output_name": "output_file" + }, + "source|ref_file": { + "id": 2, + "output_name": "output" + } + }, + "inputs": [], + "name": "SAM-to-BAM", + "outputs": [ + { + "name": "output1", + "type": "bam" + } + ], + "position": { + "left": 3099.25, + "top": 993.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/sam_to_bam/sam_to_bam/1.1.4", + "tool_state": "{\"source\": \"{\\\"index_source\\\": \\\"history\\\", \\\"ref_file\\\": null, \\\"input1\\\": null, \\\"__current_case__\\\": 1}\", \"__rerun_remap_job_id__\": null, \"__page__\": 0}", + "tool_version": "1.1.4", + "type": "tool", + "user_outputs": [] + }, + "40": { + "annotation": "", + "id": 40, + "input_connections": { + "reference_source|input_bams_0|input_bam": { + "id": 39, + "output_name": "output1" + }, + "reference_source|ref_file": { + "id": 2, + "output_name": "output" + } + }, + "inputs": [], + "name": "MPileup", + "outputs": [ + { + "name": "output_mpileup", + "type": "pileup" + }, + { + "name": "output_log", + "type": "txt" + } + ], + "position": { + "left": 3313.25, + "top": 935.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/samtools_mpileup/samtools_mpileup/0.0.3", + "tool_state": "{\"__page__\": 0, \"advanced_options\": \"{\\\"max_reads_per_bam\\\": \\\"250\\\", \\\"advanced_options_selector\\\": \\\"advanced\\\", \\\"extended_BAQ_computation\\\": \\\"False\\\", \\\"region_string\\\": \\\"\\\", \\\"output_per_sample_strand_bias_p_value\\\": \\\"False\\\", \\\"minimum_base_quality\\\": \\\"0\\\", \\\"disable_probabilistic_realignment\\\": \\\"False\\\", \\\"skip_anomalous_read_pairs\\\": \\\"False\\\", \\\"minimum_mapping_quality\\\": \\\"0\\\", \\\"output_per_sample_read_depth\\\": \\\"False\\\", \\\"__current_case__\\\": 0, \\\"position_list\\\": null, \\\"coefficient_for_downgrading\\\": \\\"0\\\"}\", \"__rerun_remap_job_id__\": null, \"genotype_likelihood_computation_type\": \"{\\\"genotype_likelihood_computation_type_selector\\\": \\\"do_not_perform_genotype_likelihood_computation\\\", \\\"__current_case__\\\": 1}\", \"reference_source\": \"{\\\"ref_file\\\": null, \\\"reference_source_selector\\\": \\\"history\\\", \\\"input_bams\\\": [{\\\"__index__\\\": 0, \\\"input_bam\\\": null}], \\\"__current_case__\\\": 1}\"}", + "tool_version": "0.0.3", + "type": "tool", + "user_outputs": [] + }, + "41": { + "annotation": "", + "id": 41, + "input_connections": { + "exclude|input_exclusion_file": { + "id": 3, + "output_name": "output" + }, + "input_pileup_file": { + "id": 40, + "output_name": "output_mpileup" + } + }, + "inputs": [], + "name": "PileupVariant", + "outputs": [ + { + "name": "output_pileup_file", + "type": "pileup" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 3612.25, + "top": 944.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "PileupVariant", + "tool_state": "{\"exclude\": \"{\\\"do\\\": \\\"YES\\\", \\\"input_exclusion_file\\\": null, \\\"__current_case__\\\": 0}\", \"__rerun_remap_job_id__\": null, \"input_pileup_file\": \"null\", \"__page__\": 0}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "42": { + "annotation": "", + "id": 42, + "input_connections": { + "input_file": { + "id": 41, + "output_name": "output_pileup_file" + } + }, + "inputs": [], + "name": "SplitFileUpTo10x", + "outputs": [ + { + "name": "output_file1", + "type": "txt" + }, + { + "name": "output_file2", + "type": "txt" + }, + { + "name": "output_file3", + "type": "txt" + }, + { + "name": "output_file4", + "type": "txt" + }, + { + "name": "output_file5", + "type": "txt" + }, + { + "name": "output_file6", + "type": "txt" + }, + { + "name": "output_file7", + "type": "txt" + }, + { + "name": "output_file8", + "type": "txt" + }, + { + "name": "output_file9", + "type": "txt" + }, + { + "name": "output_file10", + "type": "txt" + } + ], + "position": { + "left": 3899.25, + "top": 723.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "SplitFileUpTo10x", + "tool_state": "{\"out_format\": \"\\\"pileup\\\"\", \"line_number\": \"\\\"4\\\"\", \"file_number\": \"\\\"4\\\"\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_file\": \"null\"}", + "tool_version": "1.00", + "type": "tool", + "user_outputs": [] + }, + "43": { + "annotation": "", + "id": 43, + "input_connections": { + "input_assembly_file": { + "id": 2, + "output_name": "output" + }, + "input_variant_file": { + "id": 42, + "output_name": "output_file1" + } + }, + "inputs": [], + "name": "extractseq", + "outputs": [ + { + "name": "output_file", + "type": "fasta" + } + ], + "position": { + "left": 4231.25, + "top": 470.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "extractseq", + "tool_state": "{\"__page__\": 0, \"input_variant_file\": \"null\", \"__rerun_remap_job_id__\": null, \"window_length\": \"\\\"50\\\"\", \"input_assembly_file\": \"null\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "44": { + "annotation": "", + "id": 44, + "input_connections": { + "input_assembly_file": { + "id": 2, + "output_name": "output" + }, + "input_variant_file": { + "id": 42, + "output_name": "output_file2" + } + }, + "inputs": [], + "name": "extractseq", + "outputs": [ + { + "name": "output_file", + "type": "fasta" + } + ], + "position": { + "left": 4234.25, + "top": 663.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "extractseq", + "tool_state": "{\"__page__\": 0, \"input_variant_file\": \"null\", \"__rerun_remap_job_id__\": null, \"window_length\": \"\\\"50\\\"\", \"input_assembly_file\": \"null\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "45": { + "annotation": "", + "id": 45, + "input_connections": { + "input_assembly_file": { + "id": 2, + "output_name": "output" + }, + "input_variant_file": { + "id": 42, + "output_name": "output_file3" + } + }, + "inputs": [], + "name": "extractseq", + "outputs": [ + { + "name": "output_file", + "type": "fasta" + } + ], + "position": { + "left": 4240.25, + "top": 863.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "extractseq", + "tool_state": "{\"__page__\": 0, \"input_variant_file\": \"null\", \"__rerun_remap_job_id__\": null, \"window_length\": \"\\\"50\\\"\", \"input_assembly_file\": \"null\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "46": { + "annotation": "", + "id": 46, + "input_connections": { + "input_assembly_file": { + "id": 2, + "output_name": "output" + }, + "input_variant_file": { + "id": 42, + "output_name": "output_file4" + } + }, + "inputs": [], + "name": "extractseq", + "outputs": [ + { + "name": "output_file", + "type": "fasta" + } + ], + "position": { + "left": 4246.25, + "top": 1097.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "extractseq", + "tool_state": "{\"__page__\": 0, \"input_variant_file\": \"null\", \"__rerun_remap_job_id__\": null, \"window_length\": \"\\\"50\\\"\", \"input_assembly_file\": \"null\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "47": { + "annotation": "", + "id": 47, + "input_connections": { + "input_variant_file": { + "id": 42, + "output_name": "output_file5" + } + }, + "inputs": [], + "name": "extractseq", + "outputs": [ + { + "name": "output_file", + "type": "fasta" + } + ], + "position": { + "left": 4244.25, + "top": 1330.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "extractseq", + "tool_state": "{\"__page__\": 0, \"input_variant_file\": \"null\", \"__rerun_remap_job_id__\": null, \"window_length\": \"\\\"50\\\"\", \"input_assembly_file\": \"null\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "48": { + "annotation": "", + "id": 48, + "input_connections": { + "input_assembly_file": { + "id": 2, + "output_name": "output" + }, + "input_variant_file": { + "id": 42, + "output_name": "output_file6" + } + }, + "inputs": [], + "name": "extractseq", + "outputs": [ + { + "name": "output_file", + "type": "fasta" + } + ], + "position": { + "left": 4569.25, + "top": 472.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "extractseq", + "tool_state": "{\"__page__\": 0, \"input_variant_file\": \"null\", \"__rerun_remap_job_id__\": null, \"window_length\": \"\\\"50\\\"\", \"input_assembly_file\": \"null\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "49": { + "annotation": "", + "id": 49, + "input_connections": { + "input_assembly_file": { + "id": 2, + "output_name": "output" + }, + "input_variant_file": { + "id": 42, + "output_name": "output_file7" + } + }, + "inputs": [], + "name": "extractseq", + "outputs": [ + { + "name": "output_file", + "type": "fasta" + } + ], + "position": { + "left": 4572.25, + "top": 658.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "extractseq", + "tool_state": "{\"__page__\": 0, \"input_variant_file\": \"null\", \"__rerun_remap_job_id__\": null, \"window_length\": \"\\\"50\\\"\", \"input_assembly_file\": \"null\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "50": { + "annotation": "", + "id": 50, + "input_connections": { + "input_assembly_file": { + "id": 2, + "output_name": "output" + }, + "input_variant_file": { + "id": 42, + "output_name": "output_file8" + } + }, + "inputs": [], + "name": "extractseq", + "outputs": [ + { + "name": "output_file", + "type": "fasta" + } + ], + "position": { + "left": 4577.25, + "top": 871.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "extractseq", + "tool_state": "{\"__page__\": 0, \"input_variant_file\": \"null\", \"__rerun_remap_job_id__\": null, \"window_length\": \"\\\"50\\\"\", \"input_assembly_file\": \"null\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "51": { + "annotation": "", + "id": 51, + "input_connections": { + "input_assembly_file": { + "id": 2, + "output_name": "output" + }, + "input_variant_file": { + "id": 42, + "output_name": "output_file9" + } + }, + "inputs": [], + "name": "extractseq", + "outputs": [ + { + "name": "output_file", + "type": "fasta" + } + ], + "position": { + "left": 4582.25, + "top": 1094.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "extractseq", + "tool_state": "{\"__page__\": 0, \"input_variant_file\": \"null\", \"__rerun_remap_job_id__\": null, \"window_length\": \"\\\"50\\\"\", \"input_assembly_file\": \"null\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "52": { + "annotation": "", + "id": 52, + "input_connections": { + "input_assembly_file": { + "id": 2, + "output_name": "output" + }, + "input_variant_file": { + "id": 42, + "output_name": "output_file10" + } + }, + "inputs": [], + "name": "extractseq", + "outputs": [ + { + "name": "output_file", + "type": "fasta" + } + ], + "position": { + "left": 4585.25, + "top": 1330.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "extractseq", + "tool_state": "{\"__page__\": 0, \"input_variant_file\": \"null\", \"__rerun_remap_job_id__\": null, \"window_length\": \"\\\"50\\\"\", \"input_assembly_file\": \"null\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "53": { + "annotation": "", + "id": 53, + "input_connections": { + "db_opts|histdb": { + "id": 17, + "output_name": "outfile" + }, + "query": { + "id": 43, + "output_name": "output_file" + } + }, + "inputs": [], + "name": "NCBI BLAST+ blastn", + "outputs": [ + { + "name": "output1", + "type": "tabular" + } + ], + "position": { + "left": 5000.75, + "top": 478.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_blastn_wrapper/0.1.00", + "tool_state": "{\"evalue_cutoff\": \"\\\"0.001\\\"\", \"__page__\": 0, \"adv_opts\": \"{\\\"identity_cutoff\\\": \\\"0.0\\\", \\\"adv_opts_selector\\\": \\\"advanced\\\", \\\"ungapped\\\": \\\"False\\\", \\\"filter_query\\\": \\\"False\\\", \\\"word_size\\\": \\\"0\\\", \\\"__current_case__\\\": 1, \\\"parse_deflines\\\": \\\"False\\\", \\\"strand\\\": \\\"-strand both\\\", \\\"max_hits\\\": \\\"0\\\"}\", \"__rerun_remap_job_id__\": null, \"blast_type\": \"\\\"megablast\\\"\", \"db_opts\": \"{\\\"db_opts_selector\\\": \\\"histdb\\\", \\\"subject\\\": \\\"\\\", \\\"histdb\\\": null, \\\"__current_case__\\\": 1, \\\"database\\\": \\\"\\\"}\", \"output\": \"{\\\"out_format\\\": \\\"cols\\\", \\\"std_cols\\\": [\\\"qseqid\\\", \\\"qstart\\\", \\\"qend\\\"], \\\"ids_cols\\\": null, \\\"tax_cols\\\": null, \\\"__current_case__\\\": 2, \\\"misc_cols\\\": null, \\\"ext_cols\\\": [\\\"qseq\\\", \\\"sseq\\\"]}\", \"query\": \"null\"}", + "tool_version": "0.1.00", + "type": "tool", + "user_outputs": [] + }, + "54": { + "annotation": "", + "id": 54, + "input_connections": { + "db_opts|histdb": { + "id": 17, + "output_name": "outfile" + }, + "query": { + "id": 44, + "output_name": "output_file" + } + }, + "inputs": [], + "name": "NCBI BLAST+ blastn", + "outputs": [ + { + "name": "output1", + "type": "tabular" + } + ], + "position": { + "left": 4996.75, + "top": 682.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_blastn_wrapper/0.1.00", + "tool_state": "{\"evalue_cutoff\": \"\\\"0.001\\\"\", \"__page__\": 0, \"adv_opts\": \"{\\\"identity_cutoff\\\": \\\"0.0\\\", \\\"adv_opts_selector\\\": \\\"advanced\\\", \\\"ungapped\\\": \\\"False\\\", \\\"filter_query\\\": \\\"False\\\", \\\"word_size\\\": \\\"0\\\", \\\"__current_case__\\\": 1, \\\"parse_deflines\\\": \\\"False\\\", \\\"strand\\\": \\\"-strand both\\\", \\\"max_hits\\\": \\\"0\\\"}\", \"__rerun_remap_job_id__\": null, \"blast_type\": \"\\\"megablast\\\"\", \"db_opts\": \"{\\\"db_opts_selector\\\": \\\"histdb\\\", \\\"subject\\\": \\\"\\\", \\\"histdb\\\": null, \\\"__current_case__\\\": 1, \\\"database\\\": \\\"\\\"}\", \"output\": \"{\\\"out_format\\\": \\\"cols\\\", \\\"std_cols\\\": [\\\"qseqid\\\", \\\"qstart\\\", \\\"qend\\\"], \\\"ids_cols\\\": null, \\\"tax_cols\\\": null, \\\"__current_case__\\\": 2, \\\"misc_cols\\\": null, \\\"ext_cols\\\": [\\\"qseq\\\", \\\"sseq\\\"]}\", \"query\": \"null\"}", + "tool_version": "0.1.00", + "type": "tool", + "user_outputs": [] + }, + "55": { + "annotation": "", + "id": 55, + "input_connections": { + "db_opts|histdb": { + "id": 17, + "output_name": "outfile" + }, + "query": { + "id": 45, + "output_name": "output_file" + } + }, + "inputs": [], + "name": "NCBI BLAST+ blastn", + "outputs": [ + { + "name": "output1", + "type": "tabular" + } + ], + "position": { + "left": 4997.75, + "top": 873.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_blastn_wrapper/0.1.00", + "tool_state": "{\"evalue_cutoff\": \"\\\"0.001\\\"\", \"__page__\": 0, \"adv_opts\": \"{\\\"identity_cutoff\\\": \\\"0.0\\\", \\\"adv_opts_selector\\\": \\\"advanced\\\", \\\"ungapped\\\": \\\"False\\\", \\\"filter_query\\\": \\\"False\\\", \\\"word_size\\\": \\\"0\\\", \\\"__current_case__\\\": 1, \\\"parse_deflines\\\": \\\"False\\\", \\\"strand\\\": \\\"-strand both\\\", \\\"max_hits\\\": \\\"0\\\"}\", \"__rerun_remap_job_id__\": null, \"blast_type\": \"\\\"megablast\\\"\", \"db_opts\": \"{\\\"db_opts_selector\\\": \\\"histdb\\\", \\\"subject\\\": \\\"\\\", \\\"histdb\\\": null, \\\"__current_case__\\\": 1, \\\"database\\\": \\\"\\\"}\", \"output\": \"{\\\"out_format\\\": \\\"cols\\\", \\\"std_cols\\\": [\\\"qseqid\\\", \\\"qstart\\\", \\\"qend\\\"], \\\"ids_cols\\\": null, \\\"tax_cols\\\": null, \\\"__current_case__\\\": 2, \\\"misc_cols\\\": null, \\\"ext_cols\\\": [\\\"qseq\\\", \\\"sseq\\\"]}\", \"query\": \"null\"}", + "tool_version": "0.1.00", + "type": "tool", + "user_outputs": [] + }, + "56": { + "annotation": "", + "id": 56, + "input_connections": { + "db_opts|histdb": { + "id": 17, + "output_name": "outfile" + }, + "query": { + "id": 46, + "output_name": "output_file" + } + }, + "inputs": [], + "name": "NCBI BLAST+ blastn", + "outputs": [ + { + "name": "output1", + "type": "tabular" + } + ], + "position": { + "left": 4990.75, + "top": 1090.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_blastn_wrapper/0.1.00", + "tool_state": "{\"evalue_cutoff\": \"\\\"0.001\\\"\", \"__page__\": 0, \"adv_opts\": \"{\\\"identity_cutoff\\\": \\\"0.0\\\", \\\"adv_opts_selector\\\": \\\"advanced\\\", \\\"ungapped\\\": \\\"False\\\", \\\"filter_query\\\": \\\"False\\\", \\\"word_size\\\": \\\"0\\\", \\\"__current_case__\\\": 1, \\\"parse_deflines\\\": \\\"False\\\", \\\"strand\\\": \\\"-strand both\\\", \\\"max_hits\\\": \\\"0\\\"}\", \"__rerun_remap_job_id__\": null, \"blast_type\": \"\\\"megablast\\\"\", \"db_opts\": \"{\\\"db_opts_selector\\\": \\\"histdb\\\", \\\"subject\\\": \\\"\\\", \\\"histdb\\\": null, \\\"__current_case__\\\": 1, \\\"database\\\": \\\"\\\"}\", \"output\": \"{\\\"out_format\\\": \\\"cols\\\", \\\"std_cols\\\": [\\\"qseqid\\\", \\\"qstart\\\", \\\"qend\\\"], \\\"ids_cols\\\": null, \\\"tax_cols\\\": null, \\\"__current_case__\\\": 2, \\\"misc_cols\\\": null, \\\"ext_cols\\\": [\\\"qseq\\\", \\\"sseq\\\"]}\", \"query\": \"null\"}", + "tool_version": "0.1.00", + "type": "tool", + "user_outputs": [] + }, + "57": { + "annotation": "", + "id": 57, + "input_connections": { + "db_opts|histdb": { + "id": 17, + "output_name": "outfile" + }, + "query": { + "id": 47, + "output_name": "output_file" + } + }, + "inputs": [], + "name": "NCBI BLAST+ blastn", + "outputs": [ + { + "name": "output1", + "type": "tabular" + } + ], + "position": { + "left": 5000.75, + "top": 1337.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_blastn_wrapper/0.1.00", + "tool_state": "{\"evalue_cutoff\": \"\\\"0.001\\\"\", \"__page__\": 0, \"adv_opts\": \"{\\\"identity_cutoff\\\": \\\"0.0\\\", \\\"adv_opts_selector\\\": \\\"advanced\\\", \\\"ungapped\\\": \\\"False\\\", \\\"filter_query\\\": \\\"False\\\", \\\"word_size\\\": \\\"0\\\", \\\"__current_case__\\\": 1, \\\"parse_deflines\\\": \\\"False\\\", \\\"strand\\\": \\\"-strand both\\\", \\\"max_hits\\\": \\\"0\\\"}\", \"__rerun_remap_job_id__\": null, \"blast_type\": \"\\\"megablast\\\"\", \"db_opts\": \"{\\\"db_opts_selector\\\": \\\"histdb\\\", \\\"subject\\\": \\\"\\\", \\\"histdb\\\": null, \\\"__current_case__\\\": 1, \\\"database\\\": \\\"\\\"}\", \"output\": \"{\\\"out_format\\\": \\\"cols\\\", \\\"std_cols\\\": [\\\"qseqid\\\", \\\"qstart\\\", \\\"qend\\\"], \\\"ids_cols\\\": null, \\\"tax_cols\\\": null, \\\"__current_case__\\\": 2, \\\"misc_cols\\\": null, \\\"ext_cols\\\": [\\\"qseq\\\", \\\"sseq\\\"]}\", \"query\": \"null\"}", + "tool_version": "0.1.00", + "type": "tool", + "user_outputs": [] + }, + "58": { + "annotation": "", + "id": 58, + "input_connections": { + "db_opts|histdb": { + "id": 17, + "output_name": "outfile" + }, + "query": { + "id": 48, + "output_name": "output_file" + } + }, + "inputs": [], + "name": "NCBI BLAST+ blastn", + "outputs": [ + { + "name": "output1", + "type": "tabular" + } + ], + "position": { + "left": 5279.75, + "top": 471.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_blastn_wrapper/0.1.00", + "tool_state": "{\"evalue_cutoff\": \"\\\"0.001\\\"\", \"__page__\": 0, \"adv_opts\": \"{\\\"identity_cutoff\\\": \\\"0.0\\\", \\\"adv_opts_selector\\\": \\\"advanced\\\", \\\"ungapped\\\": \\\"False\\\", \\\"filter_query\\\": \\\"False\\\", \\\"word_size\\\": \\\"0\\\", \\\"__current_case__\\\": 1, \\\"parse_deflines\\\": \\\"False\\\", \\\"strand\\\": \\\"-strand both\\\", \\\"max_hits\\\": \\\"0\\\"}\", \"__rerun_remap_job_id__\": null, \"blast_type\": \"\\\"megablast\\\"\", \"db_opts\": \"{\\\"db_opts_selector\\\": \\\"histdb\\\", \\\"subject\\\": \\\"\\\", \\\"histdb\\\": null, \\\"__current_case__\\\": 1, \\\"database\\\": \\\"\\\"}\", \"output\": \"{\\\"out_format\\\": \\\"cols\\\", \\\"std_cols\\\": [\\\"qseqid\\\", \\\"qstart\\\", \\\"qend\\\"], \\\"ids_cols\\\": null, \\\"tax_cols\\\": null, \\\"__current_case__\\\": 2, \\\"misc_cols\\\": null, \\\"ext_cols\\\": [\\\"qseq\\\", \\\"sseq\\\"]}\", \"query\": \"null\"}", + "tool_version": "0.1.00", + "type": "tool", + "user_outputs": [] + }, + "59": { + "annotation": "", + "id": 59, + "input_connections": { + "db_opts|histdb": { + "id": 17, + "output_name": "outfile" + }, + "query": { + "id": 49, + "output_name": "output_file" + } + }, + "inputs": [], + "name": "NCBI BLAST+ blastn", + "outputs": [ + { + "name": "output1", + "type": "tabular" + } + ], + "position": { + "left": 5288.75, + "top": 677.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_blastn_wrapper/0.1.00", + "tool_state": "{\"evalue_cutoff\": \"\\\"0.001\\\"\", \"__page__\": 0, \"adv_opts\": \"{\\\"identity_cutoff\\\": \\\"0.0\\\", \\\"adv_opts_selector\\\": \\\"advanced\\\", \\\"ungapped\\\": \\\"False\\\", \\\"filter_query\\\": \\\"False\\\", \\\"word_size\\\": \\\"0\\\", \\\"__current_case__\\\": 1, \\\"parse_deflines\\\": \\\"False\\\", \\\"strand\\\": \\\"-strand both\\\", \\\"max_hits\\\": \\\"0\\\"}\", \"__rerun_remap_job_id__\": null, \"blast_type\": \"\\\"megablast\\\"\", \"db_opts\": \"{\\\"db_opts_selector\\\": \\\"histdb\\\", \\\"subject\\\": \\\"\\\", \\\"histdb\\\": null, \\\"__current_case__\\\": 1, \\\"database\\\": \\\"\\\"}\", \"output\": \"{\\\"out_format\\\": \\\"cols\\\", \\\"std_cols\\\": [\\\"qseqid\\\", \\\"qstart\\\", \\\"qend\\\"], \\\"ids_cols\\\": null, \\\"tax_cols\\\": null, \\\"__current_case__\\\": 2, \\\"misc_cols\\\": null, \\\"ext_cols\\\": [\\\"qseq\\\", \\\"sseq\\\"]}\", \"query\": \"null\"}", + "tool_version": "0.1.00", + "type": "tool", + "user_outputs": [] + }, + "60": { + "annotation": "", + "id": 60, + "input_connections": { + "db_opts|histdb": { + "id": 17, + "output_name": "outfile" + }, + "query": { + "id": 50, + "output_name": "output_file" + } + }, + "inputs": [], + "name": "NCBI BLAST+ blastn", + "outputs": [ + { + "name": "output1", + "type": "tabular" + } + ], + "position": { + "left": 5297.75, + "top": 874.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_blastn_wrapper/0.1.00", + "tool_state": "{\"evalue_cutoff\": \"\\\"0.001\\\"\", \"__page__\": 0, \"adv_opts\": \"{\\\"identity_cutoff\\\": \\\"0.0\\\", \\\"adv_opts_selector\\\": \\\"advanced\\\", \\\"ungapped\\\": \\\"False\\\", \\\"filter_query\\\": \\\"False\\\", \\\"word_size\\\": \\\"0\\\", \\\"__current_case__\\\": 1, \\\"parse_deflines\\\": \\\"False\\\", \\\"strand\\\": \\\"-strand both\\\", \\\"max_hits\\\": \\\"0\\\"}\", \"__rerun_remap_job_id__\": null, \"blast_type\": \"\\\"megablast\\\"\", \"db_opts\": \"{\\\"db_opts_selector\\\": \\\"histdb\\\", \\\"subject\\\": \\\"\\\", \\\"histdb\\\": null, \\\"__current_case__\\\": 1, \\\"database\\\": \\\"\\\"}\", \"output\": \"{\\\"out_format\\\": \\\"cols\\\", \\\"std_cols\\\": [\\\"qseqid\\\", \\\"qstart\\\", \\\"qend\\\"], \\\"ids_cols\\\": null, \\\"tax_cols\\\": null, \\\"__current_case__\\\": 2, \\\"misc_cols\\\": null, \\\"ext_cols\\\": [\\\"qseq\\\", \\\"sseq\\\"]}\", \"query\": \"null\"}", + "tool_version": "0.1.00", + "type": "tool", + "user_outputs": [] + }, + "61": { + "annotation": "", + "id": 61, + "input_connections": { + "db_opts|histdb": { + "id": 17, + "output_name": "outfile" + }, + "query": { + "id": 51, + "output_name": "output_file" + } + }, + "inputs": [], + "name": "NCBI BLAST+ blastn", + "outputs": [ + { + "name": "output1", + "type": "tabular" + } + ], + "position": { + "left": 5304.75, + "top": 1091.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_blastn_wrapper/0.1.00", + "tool_state": "{\"evalue_cutoff\": \"\\\"0.001\\\"\", \"__page__\": 0, \"adv_opts\": \"{\\\"identity_cutoff\\\": \\\"0.0\\\", \\\"adv_opts_selector\\\": \\\"advanced\\\", \\\"ungapped\\\": \\\"False\\\", \\\"filter_query\\\": \\\"False\\\", \\\"word_size\\\": \\\"0\\\", \\\"__current_case__\\\": 1, \\\"parse_deflines\\\": \\\"False\\\", \\\"strand\\\": \\\"-strand both\\\", \\\"max_hits\\\": \\\"0\\\"}\", \"__rerun_remap_job_id__\": null, \"blast_type\": \"\\\"megablast\\\"\", \"db_opts\": \"{\\\"db_opts_selector\\\": \\\"histdb\\\", \\\"subject\\\": \\\"\\\", \\\"histdb\\\": null, \\\"__current_case__\\\": 1, \\\"database\\\": \\\"\\\"}\", \"output\": \"{\\\"out_format\\\": \\\"cols\\\", \\\"std_cols\\\": [\\\"qseqid\\\", \\\"qstart\\\", \\\"qend\\\"], \\\"ids_cols\\\": null, \\\"tax_cols\\\": null, \\\"__current_case__\\\": 2, \\\"misc_cols\\\": null, \\\"ext_cols\\\": [\\\"qseq\\\", \\\"sseq\\\"]}\", \"query\": \"null\"}", + "tool_version": "0.1.00", + "type": "tool", + "user_outputs": [] + }, + "62": { + "annotation": "", + "id": 62, + "input_connections": { + "db_opts|histdb": { + "id": 17, + "output_name": "outfile" + }, + "query": { + "id": 52, + "output_name": "output_file" + } + }, + "inputs": [], + "name": "NCBI BLAST+ blastn", + "outputs": [ + { + "name": "output1", + "type": "tabular" + } + ], + "position": { + "left": 5309.75, + "top": 1328.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_blastn_wrapper/0.1.00", + "tool_state": "{\"evalue_cutoff\": \"\\\"0.001\\\"\", \"__page__\": 0, \"adv_opts\": \"{\\\"identity_cutoff\\\": \\\"0.0\\\", \\\"adv_opts_selector\\\": \\\"advanced\\\", \\\"ungapped\\\": \\\"False\\\", \\\"filter_query\\\": \\\"False\\\", \\\"word_size\\\": \\\"0\\\", \\\"__current_case__\\\": 1, \\\"parse_deflines\\\": \\\"False\\\", \\\"strand\\\": \\\"-strand both\\\", \\\"max_hits\\\": \\\"0\\\"}\", \"__rerun_remap_job_id__\": null, \"blast_type\": \"\\\"megablast\\\"\", \"db_opts\": \"{\\\"db_opts_selector\\\": \\\"histdb\\\", \\\"subject\\\": \\\"\\\", \\\"histdb\\\": null, \\\"__current_case__\\\": 1, \\\"database\\\": \\\"\\\"}\", \"output\": \"{\\\"out_format\\\": \\\"cols\\\", \\\"std_cols\\\": [\\\"qseqid\\\", \\\"qstart\\\", \\\"qend\\\"], \\\"ids_cols\\\": null, \\\"tax_cols\\\": null, \\\"__current_case__\\\": 2, \\\"misc_cols\\\": null, \\\"ext_cols\\\": [\\\"qseq\\\", \\\"sseq\\\"]}\", \"query\": \"null\"}", + "tool_version": "0.1.00", + "type": "tool", + "user_outputs": [] + }, + "63": { + "annotation": "", + "id": 63, + "input_connections": { + "input_blast_file": { + "id": 53, + "output_name": "output1" + }, + "input_variant_file": { + "id": 42, + "output_name": "output_file1" + } + }, + "inputs": [], + "name": "ParseBlastForUniqueMatch", + "outputs": [ + { + "name": "output_variant_file", + "type": "pileup" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 5611.89990234375, + "top": 356.3000030517578 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "ParseBlastForUniqueMatch", + "tool_state": "{\"input_variant_file\": \"null\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_blast_file\": \"null\", \"window_length\": \"\\\"50\\\"\", \"nb_mismatch_max\": \"\\\"3\\\"\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "64": { + "annotation": "", + "id": 64, + "input_connections": { + "input_blast_file": { + "id": 54, + "output_name": "output1" + }, + "input_variant_file": { + "id": 42, + "output_name": "output_file2" + } + }, + "inputs": [], + "name": "ParseBlastForUniqueMatch", + "outputs": [ + { + "name": "output_variant_file", + "type": "pileup" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 5608.89990234375, + "top": 598.3000030517578 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "ParseBlastForUniqueMatch", + "tool_state": "{\"input_variant_file\": \"null\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_blast_file\": \"null\", \"window_length\": \"\\\"50\\\"\", \"nb_mismatch_max\": \"\\\"3\\\"\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "65": { + "annotation": "", + "id": 65, + "input_connections": { + "input_blast_file": { + "id": 55, + "output_name": "output1" + }, + "input_variant_file": { + "id": 42, + "output_name": "output_file3" + } + }, + "inputs": [], + "name": "ParseBlastForUniqueMatch", + "outputs": [ + { + "name": "output_variant_file", + "type": "pileup" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 5614.89990234375, + "top": 837.2999877929688 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "ParseBlastForUniqueMatch", + "tool_state": "{\"input_variant_file\": \"null\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_blast_file\": \"null\", \"window_length\": \"\\\"50\\\"\", \"nb_mismatch_max\": \"\\\"3\\\"\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "66": { + "annotation": "", + "id": 66, + "input_connections": { + "input_blast_file": { + "id": 56, + "output_name": "output1" + }, + "input_variant_file": { + "id": 42, + "output_name": "output_file4" + } + }, + "inputs": [], + "name": "ParseBlastForUniqueMatch", + "outputs": [ + { + "name": "output_variant_file", + "type": "pileup" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 5613.89990234375, + "top": 1074.2999877929688 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "ParseBlastForUniqueMatch", + "tool_state": "{\"input_variant_file\": \"null\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_blast_file\": \"null\", \"window_length\": \"\\\"50\\\"\", \"nb_mismatch_max\": \"\\\"3\\\"\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "67": { + "annotation": "", + "id": 67, + "input_connections": { + "input_blast_file": { + "id": 57, + "output_name": "output1" + }, + "input_variant_file": { + "id": 42, + "output_name": "output_file5" + } + }, + "inputs": [], + "name": "ParseBlastForUniqueMatch", + "outputs": [ + { + "name": "output_variant_file", + "type": "pileup" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 5619.89990234375, + "top": 1313.2999877929688 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "ParseBlastForUniqueMatch", + "tool_state": "{\"input_variant_file\": \"null\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_blast_file\": \"null\", \"window_length\": \"\\\"50\\\"\", \"nb_mismatch_max\": \"\\\"3\\\"\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "68": { + "annotation": "", + "id": 68, + "input_connections": { + "input_blast_file": { + "id": 58, + "output_name": "output1" + }, + "input_variant_file": { + "id": 42, + "output_name": "output_file6" + } + }, + "inputs": [], + "name": "ParseBlastForUniqueMatch", + "outputs": [ + { + "name": "output_variant_file", + "type": "pileup" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 5937.89990234375, + "top": 354.3000030517578 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "ParseBlastForUniqueMatch", + "tool_state": "{\"input_variant_file\": \"null\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_blast_file\": \"null\", \"window_length\": \"\\\"50\\\"\", \"nb_mismatch_max\": \"\\\"3\\\"\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "69": { + "annotation": "", + "id": 69, + "input_connections": { + "input_blast_file": { + "id": 59, + "output_name": "output1" + }, + "input_variant_file": { + "id": 42, + "output_name": "output_file7" + } + }, + "inputs": [], + "name": "ParseBlastForUniqueMatch", + "outputs": [ + { + "name": "output_variant_file", + "type": "pileup" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 5939.89990234375, + "top": 596.3000030517578 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "ParseBlastForUniqueMatch", + "tool_state": "{\"input_variant_file\": \"null\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_blast_file\": \"null\", \"window_length\": \"\\\"50\\\"\", \"nb_mismatch_max\": \"\\\"3\\\"\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "70": { + "annotation": "", + "id": 70, + "input_connections": { + "input_blast_file": { + "id": 60, + "output_name": "output1" + }, + "input_variant_file": { + "id": 42, + "output_name": "output_file8" + } + }, + "inputs": [], + "name": "ParseBlastForUniqueMatch", + "outputs": [ + { + "name": "output_variant_file", + "type": "pileup" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 5942.89990234375, + "top": 834.2999877929688 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "ParseBlastForUniqueMatch", + "tool_state": "{\"input_variant_file\": \"null\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_blast_file\": \"null\", \"window_length\": \"\\\"50\\\"\", \"nb_mismatch_max\": \"\\\"3\\\"\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "71": { + "annotation": "", + "id": 71, + "input_connections": { + "input_blast_file": { + "id": 61, + "output_name": "output1" + }, + "input_variant_file": { + "id": 42, + "output_name": "output_file9" + } + }, + "inputs": [], + "name": "ParseBlastForUniqueMatch", + "outputs": [ + { + "name": "output_variant_file", + "type": "pileup" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 5943.89990234375, + "top": 1063.2999877929688 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "ParseBlastForUniqueMatch", + "tool_state": "{\"input_variant_file\": \"null\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_blast_file\": \"null\", \"window_length\": \"\\\"50\\\"\", \"nb_mismatch_max\": \"\\\"3\\\"\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "72": { + "annotation": "", + "id": 72, + "input_connections": { + "input_blast_file": { + "id": 62, + "output_name": "output1" + }, + "input_variant_file": { + "id": 42, + "output_name": "output_file10" + } + }, + "inputs": [], + "name": "ParseBlastForUniqueMatch", + "outputs": [ + { + "name": "output_variant_file", + "type": "pileup" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 5937.89990234375, + "top": 1314.2999877929688 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "ParseBlastForUniqueMatch", + "tool_state": "{\"input_variant_file\": \"null\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_blast_file\": \"null\", \"window_length\": \"\\\"50\\\"\", \"nb_mismatch_max\": \"\\\"3\\\"\"}", + "tool_version": "1.10", + "type": "tool", + "user_outputs": [] + }, + "73": { + "annotation": "", + "id": 73, + "input_connections": { + "input_files": [ + { + "id": 72, + "output_name": "output_variant_file" + }, + { + "id": 71, + "output_name": "output_variant_file" + }, + { + "id": 70, + "output_name": "output_variant_file" + }, + { + "id": 69, + "output_name": "output_variant_file" + }, + { + "id": 68, + "output_name": "output_variant_file" + }, + { + "id": 67, + "output_name": "output_variant_file" + }, + { + "id": 66, + "output_name": "output_variant_file" + }, + { + "id": 65, + "output_name": "output_variant_file" + }, + { + "id": 64, + "output_name": "output_variant_file" + }, + { + "id": 63, + "output_name": "output_variant_file" + } + ] + }, + "inputs": [], + "name": "MergeMultiFile", + "outputs": [ + { + "name": "output_file", + "type": "txt" + } + ], + "position": { + "left": 6432.89990234375, + "top": 801.3000030517578 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "MergeMultiFile", + "tool_state": "{\"out_format\": \"\\\"pileup\\\"\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_files\": \"null\"}", + "tool_version": "1.00", + "type": "tool", + "user_outputs": [] + }, + "74": { + "annotation": "", + "id": 74, + "input_connections": { + "input_file": { + "id": 73, + "output_name": "output_file" + } + }, + "inputs": [], + "name": "mpileupfilterandstat", + "outputs": [ + { + "name": "output_file", + "type": "pileup" + }, + { + "name": "log_file", + "type": "txt" + } + ], + "position": { + "left": 6806.39990234375, + "top": 829.3000030517578 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "mpileupfilterandstat", + "tool_state": "{\"__page__\": 0, \"stat\": \"{\\\"stat_dist_step\\\": \\\"50\\\", \\\"stat_min_depth_step\\\": \\\"4\\\", \\\"stat_max_depth_max\\\": \\\"500\\\", \\\"stat_min_depth_max\\\": \\\"20\\\", \\\"stat_freq_max\\\": \\\"1.0\\\", \\\"stat_min_depth_min\\\": \\\"4\\\", \\\"stat_max_depth_min\\\": \\\"250\\\", \\\"do_stat\\\": \\\"YES\\\", \\\"stat_dist_min\\\": \\\"0\\\", \\\"__current_case__\\\": 0, \\\"stat_max_depth_step\\\": \\\"250\\\", \\\"stat_dist_max\\\": \\\"50\\\", \\\"stat_freq_step\\\": \\\"0.1\\\", \\\"stat_freq_min\\\": \\\"0.8\\\"}\", \"input_file\": \"null\", \"min_frequency\": \"\\\"0.0\\\"\", \"min_depth\": \"\\\"1\\\"\", \"__rerun_remap_job_id__\": null, \"min_forward_and_reverse\": \"\\\"0\\\"\", \"max_depth\": \"\\\"500\\\"\", \"min_distance\": \"\\\"0\\\"\"}", + "tool_version": "1.00", + "type": "tool", + "user_outputs": [] + }, + "75": { + "annotation": "", + "id": 75, + "input_connections": { + "input_log_files": [ + { + "id": 74, + "output_name": "log_file" + }, + { + "id": 72, + "output_name": "log_file" + }, + { + "id": 71, + "output_name": "log_file" + }, + { + "id": 70, + "output_name": "log_file" + }, + { + "id": 69, + "output_name": "log_file" + }, + { + "id": 68, + "output_name": "log_file" + }, + { + "id": 67, + "output_name": "log_file" + }, + { + "id": 66, + "output_name": "log_file" + }, + { + "id": 65, + "output_name": "log_file" + }, + { + "id": 64, + "output_name": "log_file" + }, + { + "id": 63, + "output_name": "log_file" + }, + { + "id": 41, + "output_name": "log_file" + }, + { + "id": 37, + "output_name": "log_file" + }, + { + "id": 36, + "output_name": "log_file" + }, + { + "id": 35, + "output_name": "log_file" + }, + { + "id": 34, + "output_name": "log_file" + }, + { + "id": 33, + "output_name": "log_file" + }, + { + "id": 32, + "output_name": "log_file" + }, + { + "id": 31, + "output_name": "log_file" + }, + { + "id": 30, + "output_name": "log_file" + }, + { + "id": 29, + "output_name": "log_file" + }, + { + "id": 28, + "output_name": "log_file" + }, + { + "id": 16, + "output_name": "log_file" + }, + { + "id": 15, + "output_name": "log_file" + }, + { + "id": 14, + "output_name": "log_file" + }, + { + "id": 13, + "output_name": "log_file" + }, + { + "id": 12, + "output_name": "log_file" + }, + { + "id": 11, + "output_name": "log_file" + }, + { + "id": 10, + "output_name": "log_file" + }, + { + "id": 9, + "output_name": "log_file" + }, + { + "id": 8, + "output_name": "log_file" + }, + { + "id": 7, + "output_name": "log_file" + } + ] + }, + "inputs": [], + "name": "MergeLogFiles", + "outputs": [ + { + "name": "output_file", + "type": "txt" + } + ], + "position": { + "left": 7106.89990234375, + "top": 1007.2999877929688 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "MergeLogFiles", + "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input_log_files\": \"null\"}", + "tool_version": "1.00", + "type": "tool", + "user_outputs": [] + } + } +} \ No newline at end of file
--- a/rapsodyn/PrepareFastqLight.pl Mon Jan 26 18:10:52 2015 -0500 +++ b/rapsodyn/PrepareFastqLight.pl Thu Jan 29 08:54:06 2015 -0500 @@ -1,4 +1,5 @@ #!/usr/bin/perl +#v1.1.1 new check on read synchro #v1.1.0 manage empty files #v1.0.4 bug correction, last read not considered #v1.0.3 support rapsodyn header (.... 1:... / .... 2:...) @@ -132,17 +133,12 @@ my $ligne2_r2 =<READ2>; my $ligne3_r2 =<READ2>; my $ligne4_r2 =<READ2>; - # chomp($ligne1_r1); - # chomp($ligne2_r1); - # chomp($ligne3_r1); - # chomp($ligne4_r1); - # chomp($ligne2_r1); $compt++; $nb_read1++; $nb_read2++; -#@ 1 sec + if ((!$ligne1_r1)||(!$ligne2_r1)||(!$ligne3_r1)||(!$ligne4_r1)||(!$ligne1_r2)||(!$ligne2_r2)||(!$ligne3_r2)||(!$ligne4_r2)){ if ($VERBOSE eq "ON"){ print "Error in file format"; @@ -173,7 +169,7 @@ } $error2++; } -#@ 1 - 2 sec + else { my $length_seq1 = length(chomp($ligne2_r1)); @@ -190,26 +186,38 @@ my $repheader1=""; my $repheader2=""; - - if ($ligne1_r1 =~/^\@(.*?)[\s\/]/){ + my @tbl_header1; + my @tbl_header2; + if ($ligne1_r1 =~/^\@(.*?)\s*$/){ $header1 = $1; + @tbl_header1 = split(//,$header1); } - if ($ligne3_r1 =~/^\+(.*?)[\s\/]/){ + if ($ligne3_r1 =~/^\+(.*?)\s*$/){ $repheader1 = $1; } - if ($ligne1_r2 =~/^\@(.*?)[\s\/]/){ + if ($ligne1_r2 =~/^\@(.*?)\s*$/){ $header2 = $1; + @tbl_header2 = split(//,$header2); } - if ($ligne3_r2 =~/^\+(.*?)[\s\/]/){ + if ($ligne3_r2 =~/^\+(.*?)\s*$/){ $repheader2 = $1; } -#@ 2 sec + my $diffheader=0; + if ($#tbl_header1 == $#tbl_header2){ + for (my $i=0;$i<=$#tbl_header1;$i++){ + if ($tbl_header1[$i] ne $tbl_header2[$i]){ + $diffheader++; + } + } + } - ### Verification de la coherence sequence /qualité @ 1 sec - if (($TYPE eq "illumina")&&((!$header1)||(!$header2)||(!$repheader1)||(!$repheader2))){ + + + ### Verification de la coherence sequence /qualité + if ((!$header1)||(!$header2)){ if ($VERBOSE eq "ON"){ print "Error in header : empty\n"; print $ligne1_r1; @@ -224,24 +232,9 @@ } $error3++; } - elsif (($TYPE eq "sanger")&&((!$header1)||(!$header2))){ + elsif ((($repheader1)&&($header1 ne $repheader1))||(($repheader2)&&($header2 ne $repheader2))){ if ($VERBOSE eq "ON"){ - print "Error in header ref : empty\n"; - print $ligne1_r1; - print $ligne2_r1; - print $ligne3_r1; - print $ligne4_r1; - print $ligne1_r2; - print $ligne2_r2; - print $ligne3_r2; - print $ligne4_r2; - print "\n"; - } - $error3++; - } - elsif (($TYPE eq "illumina")&&(($header1 ne $repheader1)||($header2 ne $repheader2)||($header1 ne $header2))){ - if ($VERBOSE eq "ON"){ - print "Error in header : different\n"; + print "Error : difference in header and header repeat\n"; print $ligne1_r1; print $ligne2_r1; print $ligne3_r1; @@ -254,9 +247,24 @@ } $error4++; } - elsif (($TYPE eq "sanger")&&($header1 ne $header2)){ + elsif ($#tbl_header1 != $#tbl_header2){ if ($VERBOSE eq "ON"){ - print "Error in header : different\n"; + print "Error : difference in header size between reads\n"; + print $ligne1_r1; + print $ligne2_r1; + print $ligne3_r1; + print $ligne4_r1; + print $ligne1_r2; + print $ligne2_r2; + print $ligne3_r2; + print $ligne4_r2; + print "\n"; + } + $error4++; + } + elsif ($diffheader > 1 ){ # More than ...1 and ...2 difference in read1 and read2 header + if ($VERBOSE eq "ON"){ + print "Error can't establish synchro between reads, more than 1 difference between headers\n"; print $ligne1_r1; print $ligne2_r1; print $ligne3_r1;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rapsodyn/PrepareFastqLight.pl~ Thu Jan 29 08:54:06 2015 -0500 @@ -0,0 +1,512 @@ +#!/usr/bin/perl +#V1.0.2 added auto type detection +#V1.0.1 added log, option parameters +use strict; +use warnings; +use Getopt::Long; + +my $read1_file; +my $read2_file; +my $log_file; +my $output1_file; +my $output2_file; + +my $TYPE="sanger"; +my $MIN_LENGTH=30; +my $MIN_QUALITY=30; + +my $VERBOSE = "OFF"; + +GetOptions ( +"read1_file=s" => \$read1_file, +"read2_file=s" => \$read2_file, +"log_file=s" => \$log_file, +"output1_file=s" => \$output1_file, +"output2_file=s" => \$output2_file, +"type=s" => \$TYPE, +"min_length=i" => \$MIN_LENGTH, +"min_quality=i" => \$MIN_QUALITY, +"verbose=s" => \$VERBOSE +) or die("Error in command line arguments\n"); + + +my $nb_read1=0; +my $nb_base_read1=0; +my $nb_read2=0; +my $nb_base_read2=0; + +my $nb_read1_t=0; +my $nb_base_read1_t=0; +my $nb_read2_t=0; +my $nb_base_read2_t=0; + +my $nb_base_current_t=0; + +open(READ1, $read1_file) or die ("Can't open $read1_file\n"); +open(READ2, $read2_file) or die ("Can't open $read2_file\n"); +open(OUT1, ">$output1_file") or die ("Can't open $output1_file\n"); +open(OUT2, ">$output2_file") or die ("Can't open $output2_file\n"); +open (LF,">$log_file") or die("Can't open $log_file\n"); + + +my $error1=0; +my $error2=0; +my $error3=0; +my $error4=0; +my $error5=0; +my $error6=0; +my $error7=0; +my $error8=0; +my $error9=0; +my $error10=0; + +my $auto_type=""; +my %qual; +if ($TYPE eq "auto"){ + my $compt=0; + open(DETECT, $read1_file) or die ("Can't open $read1_file\n"); + while (my $ligne1_r1 =<DETECT>){ + my $ligne2_r1 =<DETECT>; + my $ligne3_r1 =<DETECT>; + my $ligne4_r1 =<DETECT>; + $compt++; + if ($ligne4_r1 =~ /^(.*)\s*$/i){ + my $qual = $1; + my @q = split(//,$qual); + for (my $i=0;$i<=$#q;$i++){ + my $num = ord($q[$i]); + if ($qual{$num}){ + $qual{$num}++; + } + else { + $qual{$num} = 1; + } + #range sanger / illumina 1.8+ : 33->94 + #range illumina 1.3->1.7 : 64->105 + if ($num > 94){$auto_type = "illumina";last;} + if ($num < 64){$auto_type = "sanger";last;} + } + } + else { + print STDERR "Error in format detection : quality not recognized\n$ligne4_r1"; + exit(0); + } + + if ($auto_type ne ""){ + last; + } + + } + close (DETECT); + if ($auto_type eq ""){ + print STDERR "Error in format detection : type not recognized parsing read1\n"; + foreach my $key (sort {$a <=> $b} keys %qual){ + print "$key\t:\t",$qual{$key},"\n"; + } + exit(0); + } + else { + $TYPE = $auto_type; + } +} + + + + +while (my $ligne1_r1 =<READ1>){ + my $ligne2_r1 =<READ1>; + my $ligne3_r1 =<READ1>; + my $ligne4_r1 =<READ1>; + my $ligne1_r2 =<READ2>; + my $ligne2_r2 =<READ2>; + my $ligne3_r2 =<READ2>; + my $ligne4_r2 =<READ2>; + + $nb_read1++; + $nb_read2++; + +#@ 1 sec + if ((!$ligne1_r1)||(!$ligne2_r1)||(!$ligne3_r1)||(!$ligne4_r1)||(!$ligne1_r2)||(!$ligne2_r2)||(!$ligne3_r2)||(!$ligne4_r2)){ + if ($VERBOSE eq "ON"){ + print "Error in file format"; + if ($ligne1_r1){print $ligne1_r1;} + if ($ligne2_r1){print $ligne2_r1;} + if ($ligne3_r1){print $ligne3_r1;} + if ($ligne4_r1){print $ligne4_r1;} + if ($ligne1_r2){print $ligne1_r2;} + if ($ligne2_r2){print $ligne2_r2;} + if ($ligne3_r2){print $ligne3_r2;} + if ($ligne4_r2){print $ligne4_r2;} + print "\n"; + } + $error1++; + } + elsif(($ligne1_r1 !~/^\@/)||($ligne1_r2 !~/^\@/)||($ligne3_r1 !~/^\+/)||($ligne3_r2 !~/^\+/)){ + if ($VERBOSE eq "ON"){ + print "Error in header : format\n"; + print $ligne1_r1; + print $ligne2_r1; + print $ligne3_r1; + print $ligne4_r1; + print $ligne1_r2; + print $ligne2_r2; + print $ligne3_r2; + print $ligne4_r2; + print "\n"; + } + $error2++; + } +#@ 1 - 2 sec + else { + + my $length_seq1 = length($ligne2_r1); + my $length_qual1 =length($ligne4_r1); + my $seq1; + my $qual1; + + my $length_seq2 = length($ligne2_r2); + my $length_qual2 =length($ligne4_r2); + my $seq2; + my $qual2; + my $header1=""; + my $header2=""; + my $repheader1=""; + my $repheader2=""; + + + if ($ligne1_r1 =~/^\@(.*?)[\s\/]/){ + $header1 = $1; + } + + if ($ligne3_r1 =~/^\+(.*?)[\s\/]/){ + $repheader1 = $1; + } + + if ($ligne1_r2 =~/^\@(.*?)[\s\/]/){ + $header2 = $1; + } + + if ($ligne3_r2 =~/^\+(.*?)[\s\/]/){ + $repheader2 = $1; + } +#@ 2 sec + + ### Verification de la coherence sequence /qualité @ 1 sec + if (($TYPE eq "illumina")&&((!$header1)||(!$header2)||(!$repheader1)||(!$repheader2))){ + if ($VERBOSE eq "ON"){ + print "Error in header : empty\n"; + print $ligne1_r1; + print $ligne2_r1; + print $ligne3_r1; + print $ligne4_r1; + print $ligne1_r2; + print $ligne2_r2; + print $ligne3_r2; + print $ligne4_r2; + print "\n"; + } + $error3++; + } + elsif (($TYPE eq "sanger")&&((!$header1)||(!$header2))){ + if ($VERBOSE eq "ON"){ + print "Error in header refgsd : empty\n"; + print $ligne1_r1; + print $ligne2_r1; + print $ligne3_r1; + print $ligne4_r1; + print $ligne1_r2; + print $ligne2_r2; + print $ligne3_r2; + print $ligne4_r2; + print "\n"; + } + $error3++; + } + elsif (($TYPE eq "illumina")&&(($header1 ne $repheader1)||($header2 ne $repheader2)||($header1 ne $header2))){ + if ($VERBOSE eq "ON"){ + print "Error in header : different\n"; + print $ligne1_r1; + print $ligne2_r1; + print $ligne3_r1; + print $ligne4_r1; + print $ligne1_r2; + print $ligne2_r2; + print $ligne3_r2; + print $ligne4_r2; + print "\n"; + } + $error4++; + } + elsif (($TYPE eq "sanger")&&($header1 ne $header2)){ + if ($VERBOSE eq "ON"){ + print "Error in header : different\n"; + print $ligne1_r1; + print $ligne2_r1; + print $ligne3_r1; + print $ligne4_r1; + print $ligne1_r2; + print $ligne2_r2; + print $ligne3_r2; + print $ligne4_r2; + print "\n"; + } + $error4++; + } + elsif (($length_seq1 != $length_qual1)||($length_seq2 != $length_qual2)){ + if ($VERBOSE eq "ON"){ + print "Error in seq/qual length\n"; + print $ligne1_r1; + print $ligne2_r1; + print $ligne3_r1; + print $ligne4_r1; + print $ligne1_r2; + print $ligne2_r2; + print $ligne3_r2; + print $ligne4_r2; + print "\n"; + } + $error5++; + } +#@ 1 - 2 sec + else { + ### Parsing sequence & qualité + if ($ligne2_r1 =~ /^([ATGCNX]+)\s*$/i){ + $seq1 = $1; + $nb_base_read1 += length($seq1); + } + if ($ligne2_r2 =~ /^([ATGCNX]+)\s*$/i){ + $seq2 = $1; + $nb_base_read2 += length($seq2); + } + if ($ligne4_r1 =~ /^(.*)\s*$/i){ + $qual1 = $1; + } + if ($ligne4_r2 =~ /^(.*)\s*$/i){ + $qual2 = $1; + } +#@ 2 sec + ### Verification du parsing et de la coherence sequence /qualité (n°2) + if ((!$seq1)||(!$seq2)||(!$qual1)||(!$qual2)){ + if ($VERBOSE eq "ON"){ + print "Error parsing seq / quality \n"; + print $ligne1_r1; + print $ligne2_r1; + print $ligne3_r1; + print $ligne4_r1; + print $ligne1_r2; + print $ligne2_r2; + print $ligne3_r2; + print $ligne4_r2; + print "\n"; + } + $error6++; + } + elsif ((length($seq1) != length($qual1))||(length($seq2) != length($qual2))){ + if ($VERBOSE eq "ON"){ + print "Error in seq/qual length after parsing\n"; + print $ligne1_r1; + print $ligne2_r1; + print $ligne3_r1; + print $ligne4_r1; + print $ligne1_r2; + print $ligne2_r2; + print $ligne3_r2; + print $ligne4_r2; + print "\n"; + } + $error7++; + } +#@ <1 sec + else { + my $fastq_lines_r1=""; + my $fastq_lines_r2=""; + my $nb_base_current_read1_t = 0; + my $nb_base_current_read2_t = 0; + + $fastq_lines_r1 = &grooming_and_trimming($ligne1_r1,$seq1,$qual1); + $nb_base_current_read1_t = $nb_base_current_t; + if ($fastq_lines_r1){ + $fastq_lines_r2 = &grooming_and_trimming($ligne1_r2,$seq2,$qual2); + $nb_base_current_read2_t = $nb_base_current_t; + } + if ($fastq_lines_r2){ + print OUT1 $fastq_lines_r1; + print OUT2 $fastq_lines_r2; + + $nb_read1_t++; + $nb_read2_t++; + $nb_base_read1_t += $nb_base_current_read1_t; + $nb_base_read2_t += $nb_base_current_read2_t; + + + } + } + } + + +#@ 7 sec + } +} + +close (READ1); +close (READ2); +close (OUT1); +close (OUT2); + +print LF "\n####\t Fastq preparation \n"; +print LF "Fastq format : $TYPE\n"; +print LF "## Before preparation\n"; +print LF "#Read1 :\t$nb_read1\t#Base :\t$nb_base_read1\n"; +print LF "#Read2 :\t$nb_read2\t#Base :\t$nb_base_read2\n"; +print LF "## After preparation\n"; +print LF "#Read1 :\t$nb_read1_t\t#Base :\t$nb_base_read1_t\n"; +print LF "#Read2 :\t$nb_read2_t\t#Base :\t$nb_base_read2_t\n"; +close (LF); + + +sub grooming_and_trimming{ + my $header = shift; + my $seq = shift; + my $quality = shift; + my $quality_converted=""; + my $quality_ori=$quality; + + my $lengthseq = length($seq); + my $startTrim = 0; + my $stopTrim = length($quality)-1; + my $startnoN = $startTrim; + my $stopnoN = $stopTrim; + + + my $chercheN = $seq; + my @bad_position_N; + my @bad_position_Q; + my $current_index = index($chercheN,"N"); + my $abs_index = $current_index; + while ($current_index >=0){ + push (@bad_position_N,$abs_index); + + if ($current_index<length($seq)){ + $chercheN = substr($chercheN,$current_index+1); + $current_index = index($chercheN,"N"); + $abs_index = $current_index + $bad_position_N[$#bad_position_N]+1; + } + else { + last; + } + } + + my @q = split(//,$quality); + for (my $i=0;$i<=$#q;$i++){ + my $chr = $q[$i]; + my $num = ord($q[$i]); + if ($TYPE eq "illumina"){ + $num = $num - 31; # 31 comme la difference entre la plage sanger (33-> 93 / 0->60) et illumina (64->104 / 0->40) + $quality_converted .= chr($num); + } + + if ($num < $MIN_QUALITY + 33){ #33 comme le départ de la plage sanger + push(@bad_position_Q,$i); + } + } + if ($quality_converted){$quality = $quality_converted;} + + my @bad_position = (@bad_position_N, @bad_position_Q); + + if ($#bad_position>=0){ + @bad_position = sort {$a <=> $b} @bad_position; + my %coord=%{&extract_longer_string_coordinates_from_bad_position(0,$stopTrim,\@bad_position)}; + $startTrim = $coord{"start"}; + $stopTrim = $coord{"stop"}; +#print "$startTrim .. $stopTrim\n"; + + } + my $lengthTrim = $stopTrim - $startTrim +1; + + #if ($stats_length{$lengthTrim}){ + # $stats_length{$lengthTrim} = 1; + #} + #else { + # $stats_length{$lengthTrim}++; + #} + my $fastq_lines=""; + +# if ($header =~ /GA8\-EAS671_0005\:3\:1\:1043\:4432/){ +# print "HEAD:\t$header"; +# print "SEQ:\n$seq\n"; +# print "$quality_ori\n"; +# print "$quality\n"; +# for (my $i=0;$i<=$#bad_position;$i++){ +# print $bad_position[$i]."(".$q[$bad_position[$i]]." : ".ord($q[$bad_position[$i]]).")"."\t"; +# } +# print "\n"; +# print "$startTrim .. $stopTrim / $lengthTrim \n"; +# print $fastq_lines; +# print "\n"; +# } + + #for (my $i=$startTrim;$i<=$stopTrim;$i++){ + # if ($stats_quality{ord($q{$i])}){ + # $stats_quality{ord($q{$i])}=1; + # } + # else { + # $stats_quality{ord($q{$i])}++; + # } + #} + + if ($lengthTrim >= $MIN_LENGTH){ + $fastq_lines .= $header; + my $new_seq = substr($seq,$startTrim,$lengthTrim); + $nb_base_current_t = length($new_seq); + $fastq_lines .= $new_seq."\n"; + $fastq_lines .= "+\n"; + my $new_q = substr($quality,$startTrim,$lengthTrim); + $fastq_lines .= $new_q."\n"; + return $fastq_lines; + + } + else { + #print "Insufficient length after trimming\n"; + return ""; + } +} + +sub extract_longer_string_coordinates_from_bad_position{ + my $start=shift; + my $stop =shift; + my $refbad = shift; + my @bad_position = @$refbad; + my %coord; + + my $current_start = $start; + my $current_stop = $bad_position[0]-1; + if ($current_stop < $start){$current_stop = $start;} + + + #debut -> premier N + my $current_length = $current_stop - $current_start +1; + my $test_length; + + #entre les N + for (my $i=1;$i<=$#bad_position;$i++){ + $test_length = $bad_position[$i]+1-$bad_position[$i-1]-1; + if ( $test_length > $current_length){ + $current_start = $bad_position[$i-1]+1; + $current_stop = $bad_position[$i]-1; + $current_length = $current_stop - $current_start +1; + } + } + + #dernier N -> fin + $test_length = $stop-$bad_position[$#bad_position]+1; + if ( $test_length > $current_length){ + $current_start = $bad_position[$#bad_position]+1; + if ($current_start > $stop){$current_start=$stop;} + $current_stop = $stop; + } + $coord{"start"}=$current_start; + $coord{"stop"}= $current_stop; + $coord{"lenght"}=$current_stop-$current_start+1; + + return \%coord; +}
--- a/rapsodyn/PrepareFastqLight.xml Mon Jan 26 18:10:52 2015 -0500 +++ b/rapsodyn/PrepareFastqLight.xml Thu Jan 29 08:54:06 2015 -0500 @@ -1,4 +1,4 @@ -<tool id="PrepareFastqLight" name="PrepareFastqLight" version="1.10"> +<tool id="PrepareFastqLight" name="PrepareFastqLight" version="1.11"> <description>Fastq preparation</description> <command interpreter="perl"> PrepareFastqLight.pl -read1_file $input_read1_file -read2_file $input_read2_file -output1 $output_read1_file -output2 $output_read2_file -log_file $log_file -type $quality_type -min_quality $min_quality -min_length $min_length
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rapsodyn/PrepareFastqLight.xml~ Thu Jan 29 08:54:06 2015 -0500 @@ -0,0 +1,28 @@ +<tool id="PrepareFastqLight" name="PrepareFastqLight" version="1.02"> +<description>Fastq preparation</description> +<command interpreter="perl"> + PrepareFastqLight.pl -read1_file $input_read1_file -read2_file $input_read2_file -output1 $output_read1_file -output2 $output_read2_file -log_file $log_file -type $quality_type -min_quality $min_quality -min_length $min_length +</command> +<inputs> + <param name="input_read1_file" type="data" format="txt,fastq" label="Select a suitable FASTQ READ 1 file from your history"/> + <param name="input_read2_file" type="data" format="txt,fastq" label="Select a suitable FASTQ READ 2 file from your history"/> + <param name="quality_type" type="select" label="Select input quality format"> + <option value="auto" selected="true">Auto-detect</option> + <option value="sanger">Sanger</option> + <option value="illumina">Illumina 1.3-1.7</option> + </param> + <param name="min_quality" type="integer" value="30" label="Minimum quality for 5' and 3' trimming "/> + <param name="min_length" type="integer" value="30" label="Minimum sequence length after trimming"/> +</inputs> +<outputs> + <data name="output_read1_file" format="fastqsanger" label="${tool.name} on ${on_string}"/> + <data name="output_read2_file" format="fastqsanger" label="${tool.name} on ${on_string}"/> + <data name="log_file" format="txt" label="${tool.name} LOG on ${on_string}"/> +</outputs> + +<help> + + + +</help> +</tool>