# HG changeset patch # User peterjc # Date 1307481829 14400 # Node ID b79caa511ba269343a0a6d63826ba6380782e7f7 # Parent 10e963c79a454234514bb76ecc61a57d5be5d443 Migrated tool version 0.0.3 from old tool shed archive to new tool shed repository diff -r 10e963c79a45 -r b79caa511ba2 tools/fastq/fastq_filter_by_id.xml --- a/tools/fastq/fastq_filter_by_id.xml Tue Jun 07 17:23:26 2011 -0400 +++ b/tools/fastq/fastq_filter_by_id.xml Tue Jun 07 17:23:49 2011 -0400 @@ -1,4 +1,4 @@ - + from a tabular file fastq_filter_by_id.py $input_tabular $columns $input_fastq @@ -60,16 +60,14 @@ ID present in the tabular file column(s) specified. You can opt to have a single output file of just the matching records, or just the non-matching ones. -Note that the order of sequences in the original FASTA file is preserved. +Note that the order of sequences in the original FASTQ file is preserved. Also, if any sequences share an identifier, duplicates are not removed. **Example Usage** -You may have performed some kind of contamination search, for example running -BLASTN against a database of cloning vectors or bacteria, giving you a tabular -file containing read identifiers. You could use this tool to extract only the -reads without BLAST matches (i.e. those which do not match your contaminant -database). +You may have mapped your reads against a reference genome, and thus generated +a tabular file of the mapped reads. You could use this tool to divide the reads +into those which map onto the genome, and those which don't. diff -r 10e963c79a45 -r b79caa511ba2 tools/fastq/fastq_filter_by_id.xml~ --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/fastq/fastq_filter_by_id.xml~ Tue Jun 07 17:23:49 2011 -0400 @@ -0,0 +1,75 @@ + + from a tabular file + +fastq_filter_by_id.py $input_tabular $columns $input_fastq +#if $output_choice_cond.output_choice=="both" + $output_pos $output_neg +#elif $output_choice_cond.output_choice=="pos" + $output_pos - +#elif $output_choice_cond.output_choice=="neg" + - $output_neg +#end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + output_choice_cond["output_choice"] != "neg" + + + + + + + + + + output_choice_cond["output_choice"] != "pos" + + + + + + +**What it does** + +By default it divides a FASTQ file in two, those sequences with or without an +ID present in the tabular file column(s) specified. You can opt to have a +single output file of just the matching records, or just the non-matching ones. + +Note that the order of sequences in the original FASTA file is preserved. +Also, if any sequences share an identifier, duplicates are not removed. + +**Example Usage** + +You may have performed some kind of contamination search, for example running +BLASTN against a database of cloning vectors or bacteria, giving you a tabular +file containing read identifiers. You could use this tool to extract only the +reads without BLAST matches (i.e. those which do not match your contaminant +database). + + +