Previous changeset 6:30775c836c77 (2017-03-22) Next changeset 8:1920e0508831 (2017-07-03) |
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/sra-tools commit d1347141d384ed404f674d7ce408b6769e763ea1 |
modified:
fastq_dump.xml sam_dump.xml sra_macros.xml sra_pileup.xml |
added:
test-data/fastq_dump_result.fastq.gz |
b |
diff -r 30775c836c77 -r c7620aa7e1f0 fastq_dump.xml --- a/fastq_dump.xml Wed Mar 22 05:23:31 2017 -0400 +++ b/fastq_dump.xml Wed May 10 10:45:41 2017 -0400 |
[ |
b'@@ -1,5 +1,5 @@\n-<tool id="fastq_dump" name="Extract reads" version="@VERSION@.1">\n- <description>in FASTQ/A format from NCBI SRA.</description>\n+<tool id="fastq_dump" name="Extract reads in Fastq/a" version="@VERSION@.2">\n+ <description>format from NCBI SRA</description>\n <macros>\n <import>sra_macros.xml</import>\n </macros>\n@@ -9,14 +9,21 @@\n <![CDATA[\n \n #if $input.input_select=="file_list":\n- for acc in `cat $input.file_list` ;\n- do\n+ \n+ for acc in `cat $input.file_list` ;\n+ do\n+ \n #elif $input.input_select=="accession_number":\n- acc="$input.accession" &&\n+\n+ ## Stripping leading and trailing spaces in case user typed them in \n+ acc="${input.accession}" &&\n+ \n #end if\n \n #if $input.input_select=="file_list" or $input.input_select=="accession_number":\n- [ ""\\$acc" =~ ^[E|S|D]RR[0-9]{1,}$" ] && (\n+ \n+ [ ""\\$acc" =~ ^[E|S|D]RR[0-9]{1,}$" ] && (\n+ \n #end if\n \n ## Need to set the home directory to the current working directory,\n@@ -74,38 +81,35 @@\n $adv.clip\n $adv.skip_technical\n \n- #if str( $outputformat ) == "fasta":\n- --fasta\n+ #if str( $outputformat ) == "fastqsanger.gz":\n+ --gzip\n+ #elif str( $outputformat ) == "fastqsanger.bz2": \n+ --bzip2\n #end if\n #if $input.input_select=="file":\n --stdout\n "$input.file" > "$output_file"\n- #elif $input.input_select=="file_list":\n- "\\$acc"\n- #else:\n- --stdout\n+ \n+ #elif $input.input_select=="accession_number":\n+ --stdout\n "\\$acc" > "$output_accession" )\n #end if\n \n #if $input.input_select=="file_list":\n- ) ; done\n-\n- ;\n+ ) ; done\n \n-\n-\n-\n+ ;\n \n- for i in `ls *.fast* | cut -f 1 -d \'_\' | uniq` ; do\n- count=`ls \\$i* | wc -l` ;\n- data=(\\$(ls -d \\$i*));\n+ for i in `ls *.fast* | cut -f 1 -d \'_\' | uniq` ; do\n+ count=`ls \\$i* | wc -l` ;\n+ data=(\\$(ls -d \\$i*));\n \n- if [ "\\$count" -eq 2 ]; then\n- mv "\\${data[0]}" "\\${data[0]}"_forward.$outputformat; mv "\\${data[1]}" "\\${data[1]}"_reverse.$outputformat ;\n- elif [ "\\$count" -eq 1 ]; then\n- mv "\\${data[0]}" "\\${data[0]}"__single.$outputformat ;\n- fi;\n- done\n+ if [ "\\$count" -eq 2 ]; then\n+ mv "\\${data[0]}" "\\${data[0]}"_forward.$outputformat; mv "\\${data[1]}" "\\${data[1]}"_reverse.$outputformat ;\n+ elif [ "\\$count" -eq 1 ]; then\n+ mv "\\${data[0]}" "\\${data[0]}"__single.$outputformat ;\n+ fi;\n+ done\n \n \n #end if\n@@ -115,129 +119,239 @@\n </command>\n <inputs>\n <expand macro="input_conditional"/>\n- <param name="outputformat" type="select" label="select output format">\n- <option value="fastqsanger">fastq</option>\n- <option value="fasta">fasta</option>\n+ <param name="outputformat" type="select" display="radio" label="Select output format" help="Compression will greatly reduce the amount of space occupied by downloaded data. Downstream applications such as a short-read mappers will accept compressed data as input. Consider this example: an uncoimpressed 400 Mb fastq datasets compresses to 100 Mb or 80 Mb by gzip or bzip2, respectively. " argument="--gzip --bzip2">\n+ <option value="fastqsanger.gz">gzip compressed fastq</option>\n+ <option value="fastqsanger">Uncompressed fastq</option>\n+ <option value="fastqsanger.bz2">bzip2 compressed fastq</option>\n </param>\n <section name="adv" title="Advanced Options" expanded="False">\n- <param name="minID" type="integer" label="minimum spot ID" optional="true"/>\n- <param name="maxID" type="integer" label="maximum spot ID" optional="true"/>\n- <param name="minlen" type="integer" label="minimum read length" optional="true"/>\n- <param name="split" type="boolean" checked'..b'e three ways in which you can download data:\n+\n+ 1. Data for single accession\n+ 2. Multiple datasets using a list of accessions\n+ 3. Extract data from already uploaded SRA dataset\n+\n+Below we discuss each in detail.\n+\n+------\n+\n+**Uploading data for a single accession**\n+\n+When you type a single accession number (e.g., `SRR1582967`) into **Accession** box and click **Execute** the tool will fetch data for you. It is important to keep the following in mind:\n+\n+ - if data is paired-ended (or mate-paired) the tool will generate a single *interleaved* dataset, in which forward and reverse mates are alternating (see an example dataset below)\n+ - if data is single ended, a standard single fastq dataset will be produced\n+\n+-----\n+\n+**Uploading multiple datasets using a list of accessions**\n+\n+A more realistic scenario is when you want to upload a number of datasets at once. To do this you need a list of accession, where there is only one accession per line (see below for information on how to generate such a file). Once you have this file:\n+\n+ 1. Upload it into your history using Galaxy\'s upload tool\n+ 2. Once the list of accessions is uploaded choose *List of SRA accessions, one per line* from **select input type** dropdown\n+ 3. Choose uploaded file within the **sra accession list** field\n+ 4. Click **Execute**\n+\n+.. class:: warningmark\n+\n+Fastq datasets produced by this option will be saved in Galaxy\'s history as a collection_ - a single history element containing multiple datasets. In fact, two collections will be produced: one containing paired-end data and another containing single-end data. Single-end or pair-end collections may be empty if the accessions provided in the list contain only SINGLE or PAIRED data, respectively.\n+\n+-----\n+\n+**Extract data from already uploaded SRA dataset**\n+\n+If a SRA dataset is present in the history, it can be converted into fastq dataset by setting **select input type** drop-down to *SRA archive in current history*. Just like in the case of extracting data for single accession number the following applies:\n+\n+ - if data is paired-ended (or mate-pair) the tool will generate a single *interleaved* dataset, in which forward and reverse mates are alternating (see example below).\n+ - if data is single ended, a standard fastq dataset will be produced\n+\n+@ACCESSION_LIST_HOWTO@\n+\n+-----\n+\n+**Paired-end (and mate-pair) data in fastq format**\n \n- NB: Single-end or pair-end collections may be empty if given SRRs LibraryLayout contains only either SINGLE or PAIRED respectively\n- @SRATOOLS_ATTRRIBUTION@\n+Paired end datasets can be represented as two individual datasets:\n+\n+First dataset::\n+\n+ @1/1\n+ AGGGATGTGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTA\n+ +\n+ EGGEGGGDFGEEEAEECGDEGGFEEGEFGBEEDDECFEFDD@CDD<ED\n+ @2/1\n+ AGGGATGTGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTA\n+ +\n+ HHHHHHEGFHEEFEEHEEHHGGEGGGGEFGFGGGGHHHHFBEEEEEFG\n+\n+Second dataset::\n+\n+ @1/2\n+ CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAAC\n+ +\n+ GHHHDFDFGFGEGFBGEGGEGEGGGHGFGHFHFHHHHHHHEF?EFEFF\n+ @2/2\n+ CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAAC\n+ +\n+ HHHHHHHHHHHHHGHHHHHHGHHHHHHHHHHHFHHHFHHHHHHHHHHH\n+\n+Or a single *interleaved* dataset::\n+\n+ @1/1\n+ AGGGATGTGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTA\n+ +\n+ EGGEGGGDFGEEEAEECGDEGGFEEGEFGBEEDDECFEFDD@CDD<ED\n+ @1/2\n+ CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAAC\n+ +\n+ GHHHDFDFGFGEGFBGEGGEGEGGGHGFGHFHFHHHHHHHEF?EFEFF\n+ @2/1\n+ AGGGATGTGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTA\n+ +\n+ HHHHHHEGFHEEFEEHEEHHGGEGGGGEFGFGGGGHHHHFBEEEEEFG\n+ @2/2\n+ CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAAC\n+ +\n+ HHHHHHHHHHHHHGHHHHHHGHHHHHHHHHHHFHHHFHHHHHHHHHHH\n+\n+----\n+\n+\n+.. _fastq: https://en.wikipedia.org/wiki/FASTQ_format\n+.. _fastq-dump: https://ncbi.github.io/sra-tools/fastq-dump.html\n+.. _collection: https://galaxyproject.org/tutorials/collections/\n+.. _link: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies\n+\n+@SRATOOLS_ATTRRIBUTION@\n+\n+]]>\n </help>\n <expand macro="citation"/>\n </tool>\n' |
b |
diff -r 30775c836c77 -r c7620aa7e1f0 sam_dump.xml --- a/sam_dump.xml Wed Mar 22 05:23:31 2017 -0400 +++ b/sam_dump.xml Wed May 10 10:45:41 2017 -0400 |
[ |
@@ -1,5 +1,5 @@ -<tool id="sam_dump" name="Extract reads" version="@VERSION@"> - <description>in SAM or BAM format from NCBI SRA.</description> +<tool id="sam_dump" name="Extract reads in BAM" version="@VERSION@.2"> + <description>format from NCBI SRA</description> <macros> <import>sra_macros.xml</import> </macros> @@ -11,7 +11,7 @@ for acc in `cat $input.file_list` ; do #elif $input.input_select=="accession_number": - acc="$input.accession" && + acc="${input.accession}" && #end if #if $input.input_select=="file_list" or $input.input_select=="accession_number": @@ -91,7 +91,7 @@ </command> <inputs> <expand macro="input_conditional"/> - <param name="outputformat" type="select" label="select output format"> + <param name="outputformat" type="select" display="radio" label="select output format" help="In vast majority of cases you want to download data in bam format. It is more compact and is accepted by all downstream tools."> <option value="bam">bam</option> <option value="sam">sam</option> </param> @@ -113,18 +113,18 @@ </section> </inputs> <outputs> - <collection name="output_collection" type='list'> + <collection name="output_collection" type="list" label="SAM/BAM data (fastq-dump)"> <filter>input['input_select'] == "file_list"</filter> <discover_datasets pattern="(?P<designation>.+)\.bam" directory="." ext='bam'/> <discover_datasets pattern="(?P<designation>.+)\.sam" directory="." ext='sam'/> </collection> - <data name="output_accession" format="bam" label="${input.accession}.${outputformat}"> + <data name="output_accession" format="bam" label="${input.accession} (sam-dump)"> <filter>input['input_select'] == "accession_number"</filter> <change_format> <when input="outputformat" value="sam" format="sam"/> </change_format> </data> - <data name="output_file" format="bam" label="${input.file.name}.${outputformat}"> + <data name="output_file" format="bam" label="${input.file.name} (sam-dump)"> <filter>input['input_select'] == "file"</filter> <change_format> <when input="outputformat" value="sam" format="sam"/> @@ -140,11 +140,59 @@ <output name="output_accession" file="sam_dump_result.sam" compare="contains" ftype="sam"/> </test> </tests> - <help> - This tool extracts reads from sra archives using sam-dump. - The sam-dump program is developed at NCBI, and is available at - http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software. - @SRATOOLS_ATTRRIBUTION@ - </help> + <help><![CDATA[ +**What it does?** + +This tool extracts data (in BAM_ format) from the Short Read Archive (SRA) at the National Center for Biotechnology Information (NCBI). It is based on the sam-dump_ utility of the SRA Toolkit. + +**How to use it?** + +There are three ways in which you can download data: + + 1. Data for single accession + 2. Multiple datasets using a list of accessions + 3. Extract data from already uploaded SRA dataset + +Below we discuss each in detail. + +------ + +**Uploading data for a single accession** + +When you type a single accession number (e.g., `SRR1582967`) into **Accession** box and click **Execute** the tool will fetch data for you. As a result you will get a single BAM (or SAM) dataset in the history. + +----- + +**Uploading multiple datasets using a list of accessions** + +A more realistic scenario is when you want to upload a number of datasets at once. To do this you need a list of accession, where there is only one accession per line (see below for information on how to generate such a file). Once you have this file: + + 1. Upload it into your history using Galaxy's upload tool + 2. Once the list of accessions is uploaded choose *List of SRA accessions, one per line* from **select input type** dropdown + 3. Choose uploaded file within the **sra accession list** field + 4. Click **Execute** + +.. class:: warningmark + +BAM datasets produced by this option will be saved in Galaxy's history as a collection_ - a single history element containing multiple datasets. + +----- + +**Extract data from already uploaded SRA dataset** + +If a SRA dataset is present in the history, it can be converted into BAM dataset by setting **select input type** drop-down to *SRA archive in current history*. Just like in the case of extracting data for single accession number a single BAM dataset will be generated in the history. + +@ACCESSION_LIST_HOWTO@ + +----- + +.. _BAM: https://samtools.github.io/hts-specs/SAMv1.pdf +.. _sam-dump: http://ncbi.github.io/sra-tools/sam-dump.html +.. _collection: https://galaxyproject.org/tutorials/collections/ +.. _link: http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies + + +@SRATOOLS_ATTRRIBUTION@ + ]]></help> <expand macro="citation"/> </tool> |
b |
diff -r 30775c836c77 -r c7620aa7e1f0 sra_macros.xml --- a/sra_macros.xml Wed Mar 22 05:23:31 2017 -0400 +++ b/sra_macros.xml Wed May 10 10:45:41 2017 -0400 |
b |
@@ -1,19 +1,28 @@ <macros> - <token name="@VERSION@">2.8.0</token> + <token name="@VERSION@">2.8.1</token> <macro name="requirements"> <requirements> - <requirement type="package" version="2.8.0">sra-tools</requirement> + <requirement type="package" version="2.8.1">sra-tools</requirement> </requirements> </macro> <macro name="input_conditional"> <conditional name="input"> <param name="input_select" type="select" label="select input type"> <option value="accession_number">SRR accession</option> + <option value="file_list">List of SRA accession, one per line</option> <option value="file">SRA archive in current history</option> - <option value="file_list">List of SRA accession, one per line</option> </param> <when value="accession_number"> - <param name="accession" type="text" label="SRR accession" help="Must start with SRR,DRR or ERR, e.g. SRR925743 , ERR343809"/> + <param name="accession" type="text" label="Accession" help="Must start with SRR,DRR or ERR, e.g. SRR925743 ,ERR343809"> + <sanitizer> + <valid initial="string.printable"> + <remove value=" "/> + </valid> + <mapping initial="none"> + <add source=" " target=""/> + </mapping> + </sanitizer> + </param> </when> <when value="file"> <param format="sra" name="file" type="data" label="sra archive"/> @@ -24,39 +33,45 @@ </conditional> </macro> <macro name="alignments"> - <param name="alignments" type="select" value="both"> - <label>aligned or unaligned reads</label> + <param name="alignments" type="select" value="both" label="Output aligned or unaligned reads" help="Output reads according to their alignment status." argument="--aligned and --unaligned"> <option value="both">both</option> <option value="aligned">aligned only</option> <option value="unaligned">unaligned only</option> </param> </macro> <macro name="minMapq"> - <param name="minMapq" type="integer" min="0" max="42" label="minimum mapping quality" optional="true"/> + <param name="minMapq" type="integer" min="0" max="42" label="Minimum mapping quality" optional="true" help="Minimum mapping quality an alignment has to have, to be dumped." argument="--min-mapq"/> </macro> <macro name="region"> <param format="text" name="region" type="text" label="aligned region" optional="true" - help="Filter by position on genome. Can be either accession.version (ex: NC_000001.10), chromosome name (ex:chr1 or 1) or 1-based coordinates (ex: chr1:1-101)."/> + help="Filter by position on genome. Can be either accession.version (ex: NC_000001.10), chromosome name (ex:chr1 or 1) or 1-based coordinates (ex: chr1:1-101)." argument="--aligned-region"/> </macro> <macro name="matepairDist"> <param name="matepairDist" type="text" label="mate-pair distance (from-to|unknown)" optional="true" - help="Filter by distance between matepairs. Use unknown to find matepairs split between the references. Use from-to (inclusive) to limit matepair distance on the same reference"/> + help="Filter by distance between matepairs. Use unknown to find matepairs split between the references. Use from-to (inclusive) to limit matepair distance on the same reference" argument="--matepair-distance"/> </macro> <macro name="citation"> <citations> <citation type="doi">10.1093/nar/gkq1019</citation> </citations> </macro> - <token name="@SRATOOLS_ATTRRIBUTION@"> - Browse the NCBI SRA for SRR accessions at http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=studies. + <token name="@ACCESSION_LIST_HOWTO@"> +----- - Galaxy tool wrapper originally written by Matt Shirley (mdshw5 at gmail.com). +**How to generate accession lists** - Wrapper modified by Philip Mabon ( philip.mabon at phac-aspc.gc.ca ). + 1. Go to **SRA Run Selector** by clicking this link_ + 2. Find the study you are interested in by typing a search term within the **Search** box. This can be a word (e.g., *mitochondria*) or an accession you have gotten from a paper (e.g., *SRR1582967*). + 3. Once you click on the study of interest you will see the number of datasets in this study within the **Related SRA data** box + 4. Click on the Runs number + 5. On the page that would open you will see **Accession List** button + 6. Clicking of this button will produce a file that you will need to upload into Galaxy and use as the input to this tool. + </token> - Tool dependencies, clean-up and bug-fixes by Marius van den Beek (m.vandenbeek at gmail.com). - - For support and bug reports contact Matt Shirley or Marius van den Beek or go to https://github.com/galaxyproject/tools-iuc. - + <token name="@SRATOOLS_ATTRRIBUTION@"> +Galaxy tool wrapper originally written by Matt Shirley (mdshw5 at gmail.com). +Wrapper modified by Philip Mabon ( philip.mabon at phac-aspc.gc.ca ). +Tool dependencies, clean-up and bug-fixes by Marius van den Beek (m.vandenbeek at gmail.com). +For support and bug reports contact Matt Shirley or Marius van den Beek or go to https://github.com/galaxyproject/tools-iuc. </token> </macros> |
b |
diff -r 30775c836c77 -r c7620aa7e1f0 sra_pileup.xml --- a/sra_pileup.xml Wed Mar 22 05:23:31 2017 -0400 +++ b/sra_pileup.xml Wed May 10 10:45:41 2017 -0400 |
[ |
@@ -1,5 +1,5 @@ -<tool id="sra_pileup" name="Generate pileup format" version="@VERSION@"> - <description>from NCBI sra.</description> +<tool id="sra_pileup" name="Generate pileup format" version="@VERSION@.2"> + <description>from NCBI sra</description> <macros> <import>sra_macros.xml</import> </macros> @@ -7,6 +7,11 @@ <version_command>sra-pileup --version</version_command> <command detect_errors="exit_code"> <![CDATA[ + + #if $input.input_select=="accession_number": + acc="${input.accession}" && + #end if + ## Need to set the home directory to the current working directory, ## else the tool tries to write to home/.ncbi and fails when used ## with a cluster manager. @@ -16,7 +21,7 @@ #if ( str( $adv.region ) == "" ): ASCP_PATH=`command -v ascp` && ASCP_KEY=`dirname \$ASCP_PATH`/asperaweb_id_dsa.openssh || true && - prefetch -X 200G --ascp-path "\$ASCP_PATH|\$ASCP_KEY" "$input.accession" && + prefetch -X 200G --ascp-path "\$ASCP_PATH|\$ASCP_KEY" "\$acc" && ## Duplicate vdb-config, in case settings changed between prefetch and ## sra-pileup command. vdb-config -s "/repository/user/main/public/root=\$PWD" && @@ -31,7 +36,7 @@ #if $input.input_select == "file": "$input.file" > "$output_file" #elif $input.input_select == "accession_number": - "$input.accession" > "$output_accession" + "\$acc" > "$output_accession" #elif $input.input_select == "text": `cat "$input.text"` > "$output_text" #end if @@ -48,7 +53,16 @@ <param format="sra" name="file" type="data" label="sra archive"/> </when> <when value="accession_number"> - <param format="text" name="accession" type="text" label="SRR accession" help="Must start with SRR, e.g. SRR925743"/> + <param format="text" name="accession" type="text" label="SRR accession" help="Must start with SRR, e.g. SRR925743"> + <sanitizer> + <valid initial="string.printable"> + <remove value=" "/> + </valid> + <mapping initial="none"> + <add source=" " target=""/> + </mapping> + </sanitizer> + </param> </when> <when value="text"> <param format="txt" name="text" type="data" label="text file"/> @@ -60,13 +74,13 @@ </section> </inputs> <outputs> - <data format="pileup" name="output_accession" label="${input.accession}.pileup"> + <data format="pileup" name="output_accession" label="${input.accession} (sra-pileup)"> <filter>input['input_select'] == "accession_number"</filter> </data> - <data format="pileup" name="output_file" label="${input.file.name}.pileup"> + <data format="pileup" name="output_file" label="${input.file.name} (sra-pileup)"> <filter>input['input_select'] == "file"</filter> </data> - <data format="pileup" name="output_text" label="${input.text.name}.pileup"> + <data format="pileup" name="output_text" label="${input.text.name} (sra-pileup)"> <filter>input['input_select'] == "text"</filter> </data> </outputs> @@ -79,10 +93,14 @@ </test> </tests> <help> - This tool produces pileup format from sra archives using sra-pileup. - The sra-pileup program is developed at NCBI, and is available at - http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software. - @SRATOOLS_ATTRRIBUTION@ + <![CDATA[ + +This tool produces pileup format from sra archives using sra-pileup. +The sra-pileup program is developed at NCBI, and is available at +http://www.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?view=software. +@SRATOOLS_ATTRRIBUTION@ + +]]> </help> <expand macro="citation"/> </tool> |
b |
diff -r 30775c836c77 -r c7620aa7e1f0 test-data/fastq_dump_result.fastq.gz |
b |
Binary file test-data/fastq_dump_result.fastq.gz has changed |