Mercurial > repos > devteam > bamleftalign
changeset 0:e65540a91b87 draft default tip
Uploaded
author | devteam |
---|---|
date | Tue, 24 Jun 2014 11:53:09 -0400 |
parents | |
children | |
files | bamleftalign.xml test-data/left-align-input.bam test-data/left-align-output.bam test-data/leftalign.fa tool-data/sam_fa_indices.loc.sample tool-data/tool_data_table_conf.xml.sample tool_dependencies.xml |
diffstat | 7 files changed, 122 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bamleftalign.xml Tue Jun 24 11:53:09 2014 -0400 @@ -0,0 +1,75 @@ +<?xml version="1.0"?> +<tool id="bamleftalign" name="BamLeftAlign" version="0.0.2"> + <requirements> + <requirement type="package" version="freebayes-0.9.14_8a407cf5f4">freebayes</requirement> + <requirement type="package" version="0.1.18">samtools</requirement> + </requirements> + <description> indels in BAM datasets</description> + <command> + ##set up input files + #set $reference_fasta_filename = "localref.fa" + #if str( $reference_source.reference_source_selector ) == "history": + ln -s "${reference_source.ref_file}" "${reference_fasta_filename}" && + samtools faidx "${reference_fasta_filename}" 2>&1 || echo "Error running samtools faidx for leftalign" >&2 && + #else: + #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path ) + #end if + + ##finished setting up inputs + + ##start leftalign commandline + samtools view -bh "${input_bam}" | bamleftalign + --fasta-reference "${reference_fasta_filename}" + -c + --max-iterations "${iterations}" + ##outputs + > "${output_bam}" + </command> + <inputs> + <conditional name="reference_source"> + <param name="reference_source_selector" type="select" label="Choose the source for the reference list"> + <option value="cached">Locally cached</option> + <option value="history">History</option> + </param> + <when value="cached"> + <param name="input_bam" type="data" format="bam" label="Select BAM dataset to leftalign"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_data_table" table_name="sam_fa_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> + </param> + <param name="ref_file" type="select" label="Using reference genome"> + <options from_data_table="sam_fa_indexes"> + <!--<filter type="data_meta" key="dbkey" ref="input_bam" column="value"/>--> + </options> + <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> + </param> + </when> + <when value="history"> <!-- FIX ME!!!! --> + <param name="input_bam" type="data" format="bam" label="BAM dataset to re-align" /> + <param name="ref_file" type="data" format="fasta" label="Using reference file" /> + </when> + </conditional> + <param name="iterations" type="integer" size="3" value="5" label="Maximum number of iterations" help="Iterate the left-realignment no more than this many times" /> + </inputs> + <outputs> + <data format="bam" name="output_bam" label="${tool.name} on ${on_string} (alignments)" /> + </outputs> + <tests> + <test> + <param name="reference_source_selector" value="history" /> + <param name="ref_file" ftype="fasta" value="leftalign.fa"/> + <param name="input_bam" ftype="bam" value="left-align-input.bam"/> + <param name="iterations" value="5"/> + <output name="output_bam" file="left-align-output.bam" /> + </test> + </tests> + <stdio> + <exit_code range="1:" /> + </stdio> + <help> + +When calling indels, it is important to homogenize the positional distribution of insertions and deletions in the input by using left realignment. Left realignment will place all indels in homopolymer and microsatellite repeats at the same position, provided that doing so does not introduce mismatches between the read and reference other than the indel. This method is computationally inexpensive and handles the most common classes of alignment inconsistency. + +This is leftalign utility from FreeBayes package developed and maintained by Erik Garrison (https://github.com/ekg/freebayes). + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/leftalign.fa Tue Jun 24 11:53:09 2014 -0400 @@ -0,0 +1,2 @@ +>phiX174 +GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTTGATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAAATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTGTCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTAGATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATCTGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTTTCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTTCGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCTTGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCGTCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTACGGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTACGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCaGAAGGAGTGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACTAAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGCCCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCATCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGACTCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTACTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAAGGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTTGGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACAACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGCTCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTTTCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGCATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCgTGATGTTATTTCTTCATTTGGAGGTAAAACCTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTTGATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGCCGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGACTAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTGTATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGTTTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGAAGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGATTATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTTATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAACGCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGCTTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGTTCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTATATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTGTCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGCCTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTGAATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGCCGGGCAATAAtGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGTTTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTGCTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAAAGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCTGGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTGGTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGATAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTATCTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGGTTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGAGATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGACCAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTATGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCAAACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGACTTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTTCTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGATACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCGTCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTTCTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTATTGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGCATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATGTTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGAATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGGGACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCCCTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATTGCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTaCTATTCAGCGTTTGATGAATGCAATGCGACAGGCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTTATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCGCAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGCCGTCTTCATTTCCATGCGGTGCAtTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTCGTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCATCGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAGCCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATATGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACTTCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTGTCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGCAGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACCTGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/sam_fa_indices.loc.sample Tue Jun 24 11:53:09 2014 -0400 @@ -0,0 +1,28 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Samtools indexed sequences data files. You will need +#to create these data files and then create a sam_fa_indices.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The sam_fa_indices.loc +#file has this format (white space characters are TAB characters): +# +#index <seq> <location> +# +#So, for example, if you had hg18 indexed stored in +#/depot/data2/galaxy/sam/, +#then the sam_fa_indices.loc entry would look like this: +# +#index hg18 /depot/data2/galaxy/sam/hg18.fa +# +#and your /depot/data2/galaxy/sam/ directory +#would contain hg18.fa and hg18.fa.fai files: +# +#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.fa +#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.fa.fai +# +#Your sam_fa_indices.loc file should include an entry per line for +#each index set you have stored. The file in the path does actually +#exist, but it should never be directly used. Instead, the name serves +#as a prefix for the index file. For example: +# +#index hg18 /depot/data2/galaxy/sam/hg18.fa +#index hg19 /depot/data2/galaxy/sam/hg19.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/tool_data_table_conf.xml.sample Tue Jun 24 11:53:09 2014 -0400 @@ -0,0 +1,8 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <!-- Location of SAMTools indexes and other files --> + <table name="sam_fa_indexes" comment_char="#"> + <columns>line_type, value, path</columns> + <file path="tool-data/sam_fa_indices.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Tue Jun 24 11:53:09 2014 -0400 @@ -0,0 +1,9 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="freebayes" version="freebayes-0.9.14_8a407cf5f4"> + <repository changeset_revision="77a97e5d2658" name="freebayes_0_9_14_8a407cf5f4" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" /> + </package> + <package name="samtools" version="0.1.18"> + <repository changeset_revision="171cd8bc208d" name="package_samtools_0_1_18" owner="devteam" toolshed="http://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>