Mercurial > repos > mvdbeek > bam_readtagger
changeset 31:301316cb596a draft
planemo upload for repository https://github.com/bardin-lab/readtagger/tree/master/galaxy commit 6e01a2e472ebbb07ce5181b836bae8bc5c7ecf36-dirty
| author | mvdbeek | 
|---|---|
| date | Wed, 21 Jun 2017 09:38:25 -0400 | 
| parents | db5c766503dd | 
| children | bf3d04937095 | 
| files | add_matesequence.xml allow_dovetailing.xml bam_readtagger.xml bwa_mem_index.loc.sample findcluster.xml macros.xml test-data/three_cluster_out.gff tool_data_table_conf.xml.sample update_mapq.xml write_supplementary_fastq.xml | 
| diffstat | 10 files changed, 117 insertions(+), 24 deletions(-) [+] | 
line wrap: on
 line diff
--- a/add_matesequence.xml Thu May 11 05:33:27 2017 -0400 +++ b/add_matesequence.xml Wed Jun 21 09:38:25 2017 -0400 @@ -1,7 +1,7 @@ -<tool id="add_matesequence" name="Add matesequence" version="0.3.24"> +<tool id="add_matesequence" name="Add matesequence" version="0.3.25"> <description>into tag field</description> <requirements> - <requirement type="package" version="0.3.24">readtagger</requirement> + <requirement type="package" version="0.3.25">readtagger</requirement> </requirements> <version_command>add_matesequence --version</version_command> <command detect_errors="aggressive"><![CDATA[
--- a/allow_dovetailing.xml Thu May 11 05:33:27 2017 -0400 +++ b/allow_dovetailing.xml Wed Jun 21 09:38:25 2017 -0400 @@ -1,7 +1,7 @@ -<tool id="allow_dovetailing" name="Allow dovetailing" version="0.3.24"> +<tool id="allow_dovetailing" name="Allow dovetailing" version="0.3.25"> <description>modifies proper_pair flag in bam files</description> <requirements> - <requirement type="package" version="0.3.24">readtagger</requirement> + <requirement type="package" version="0.3.25">readtagger</requirement> </requirements> <command detect_errors="aggressive"><![CDATA[ allow_dovetailing -i '$input' -o '$output'
--- a/bam_readtagger.xml Thu May 11 05:33:27 2017 -0400 +++ b/bam_readtagger.xml Wed Jun 21 09:38:25 2017 -0400 @@ -1,10 +1,10 @@ -<tool id="bam_readtagger" name="Tag alignment files" version="0.3.24"> +<tool id="bam_readtagger" name="Tag alignment files" version="0.3.25"> <description>from multiple bam files</description> <macros> <import>macros.xml</import> </macros> <requirements> - <requirement type="package" version="0.3.24">readtagger</requirement> + <requirement type="package" version="0.3.25">readtagger</requirement> </requirements> <command detect_errors="aggressive"><![CDATA[ readtagger -t '$tag_file' -s
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bwa_mem_index.loc.sample Wed Jun 21 09:38:25 2017 -0400 @@ -0,0 +1,38 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of BWA indexed sequences data files. You will need +#to create these data files and then create a bwa_index.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The bwa_index.loc +#file has this format (longer white space characters are TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, for example, if you had phiX indexed stored in +#/depot/data2/galaxy/phiX/base/, +#then the bwa_index.loc entry would look like this: +# +#phiX174 phiX phiX Pretty /depot/data2/galaxy/phiX/base/phiX.fa +# +#and your /depot/data2/galaxy/phiX/base/ directory +#would contain phiX.fa.* files: +# +#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 phiX.fa.amb +#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 phiX.fa.ann +#-rw-r--r-- 1 james universe 269808 2005-09-13 10:12 phiX.fa.bwt +#...etc... +# +#Your bwa_index.loc file should include an entry per line for each +#index set you have stored. The "file" in the path does not actually +#exist, but it is the prefix for the actual index files. For example: +# +#phiX174 phiX phiX174 /depot/data2/galaxy/phiX/base/phiX.fa +#hg18canon hg18 hg18 Canonical /depot/data2/galaxy/hg18/base/hg18canon.fa +#hg18full hg18 hg18 Full /depot/data2/galaxy/hg18/base/hg18full.fa +#/orig/path/hg19.fa hg19 hg19 /depot/data2/galaxy/hg19/base/hg19.fa +#...etc... +# +#Note that for backwards compatibility with workflows, the unique ID of +#an entry must be the path that was in the original loc file, because that +#is the value stored in the workflow for that parameter. That is why the +#hg19 entry above looks odd. New genomes can be better-looking. +#
--- a/findcluster.xml Thu May 11 05:33:27 2017 -0400 +++ b/findcluster.xml Wed Jun 21 09:38:25 2017 -0400 @@ -1,7 +1,10 @@ -<tool id="findcluster" name="Find clusters of reads" version="0.3.24"> +<tool id="findcluster" name="Find clusters of reads" version="0.3.25"> <description>in bam files</description> + <macros> + <import>macros.xml</import> + </macros> <requirements> - <requirement type="package" version="0.3.24">readtagger</requirement> + <requirement type="package" version="0.3.25">readtagger</requirement> </requirements> <version_command>findcluster --version</version_command> <command detect_errors="aggressive"><![CDATA[ @@ -9,8 +12,19 @@ ln -f -s $input.metadata.bam_index input.bam.bai && findcluster --input_path input.bam - #if $reference_fasta: - --reference_fasta '$reference_fasta' + #if $transposon_source.ref_file: + #if str($transposon_source.reference_source_selector) == "history": + --transposon_reference_fasta '$transposon_source.ref_file' + #else : + --transposon_bwa_index '$reference_source.ref_file.fields.path' + #end if + #end if + #if $genome_source.ref_file: + #if str($genome_source.reference_source_selector) == "history": + --genome_reference_fasta '$genome_source.ref_file' + #else : + --genome_bwa_index '$reference_source.ref_file.fields.path' + #end if #end if --output_bam '$output_bam' --output_gff '$output_gff' @@ -20,7 +34,9 @@ ]]></command> <inputs> <param name="input" argument="--input_path" type="data" format="bam"/> - <param argument="--reference_fasta" label="Reference Fasta" help="Reconstructed contigs at clusters will be blasted against this sequence." type="data" format="fasta" optional="True"/> + + <expand macro="reference_source_conditional" reference_type="transposon"/> + <expand macro="reference_source_conditional" reference_type="genome"/> </inputs> <outputs> <data name="output_bam" format="bam" label="findcluster BAM on $on_string"/> @@ -35,7 +51,8 @@ </test> <test> <param name="input" value="extended_and_annotated_roi.bam" ftype="bam"/> - <param name="reference_fasta" value="reference.fasta" ftype="fasta"/> + <param name="transposon_source|reference_source_selector" value="history"/> + <param name="transposon_source|ref_file" value="reference.fasta" ftype="fasta"/> <output name="output_bam" file="three_cluster_out.bam" ftype="bam" lines_diff="2"/> <output name="output_gff"> <assert_contents> @@ -51,24 +68,35 @@ Find clusters of reads that support a TE insertion. - Options: + Options: --input_path PATH Find cluster in this BAM file. + --region TEXT Find clusters in this Region (Format is + chrX:2000-1000). + --max_proper_pair_size INTEGER Maximum proper pairs size. If not given will + be inferred from the data. --output_bam PATH Write out BAM file with cluster information to this path. Reads will have an additional "CD" tag to indicate the cluster number --output_gff PATH Write out GFF file with cluster information to this path. + --output_fasta PATH Write out supporting evidence for clusters + to this path. --sample_name TEXT Sample name to use when writing out clusters in GFF file. Default is to infer the name from the input filename. --include_duplicates / --no-include_duplicates Include reads marked as duplicates when finding clusters. - --reference_fasta TEXT Blast cluster contigs against this fasta - file - --blastdb TEXT Blast cluster contigs against this blast - database + --transposon_reference_fasta TEXT + Transposon fasta to align clipped reads to. + Not necessary if BWA index is provided. + --transposon_bwa_index TEXT Transposon BWA index to align clipped reads + to + --genome_reference_fasta TEXT Genome fasta to align clipped reads to. Not + necessary if BWA index is provided. + --genome_bwa_index TEXT Genome BWA index to align clipped reads to --threads INTEGER RANGE Threads to use for cap3 assembly step + --shm_dir PATH Path to shared memory folder --version Show the version and exit. --help Show this message and exit.
--- a/macros.xml Thu May 11 05:33:27 2017 -0400 +++ b/macros.xml Wed Jun 21 09:38:25 2017 -0400 @@ -26,4 +26,24 @@ <option value="X">X</option> <option value="Y">Y</option> </xml> + <xml name="reference_source_conditional" token_reference_type="reference_type"> + <conditional name="@REFERENCE_TYPE@_source"> + <param name="reference_source_selector" type="select" label="Will you select a @REFERENCE_TYPE@ reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options. See `Indexes` section of help below"> + <option value="cached">Use a built-in @REFERENCE_TYPE@ genome index</option> + <option value="history">Use a genome from history and build index</option> + </param> + <when value="cached"> + <param name="ref_file" type="select" label="Using @REFERENCE_TYPE@ reference genome" help="Select @REFERENCE_TYPE@ genome from the list" optional="True"> + <options from_data_table="bwa_mem_indexes"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No indexes are available" /> + </options> + <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> + </param> + </when> + <when value="history"> + <param name="ref_file" type="data" format="fasta" label="Use the following dataset as the reference sequence" help="You can upload a FASTA sequence to the history and use it as reference" optional="True"/> + </when> + </conditional> + </xml> </macros>
--- a/test-data/three_cluster_out.gff Thu May 11 05:33:27 2017 -0400 +++ b/test-data/three_cluster_out.gff Wed Jun 21 09:38:25 2017 -0400 @@ -1,4 +1,3 @@ ##gff-version 3 -3R findcluster TE 13373515 13373524 22 + . ID=extended_and_annotated_roi.bam_0;genotype=homozygous;genotype_likelihoods=1.8828617809e-31,9.53673406912e-07,0.999999046327;left_insert=0,CTCGGAATGTATCTAACTAACAAACTCATATCAAATATAAGCAAGTGCGCCAATTCGTATGCATATGGACATATGGACATATACATATAGTAACATAATATGCTTCTCATATTACGTTTACATACTTACACTAATTGTACATACAATCTTGCACATGCATAAACACATCAAACCAGCTTACATTTTTACTTACACTTAAGCGCATGATTTGTTGTGCATCCATACCGTTATTTTTCC;left_support=11;non_support=0;right_insert=0,GACAACGAAGAAATAAAGATCCAAACTAAAAAAATACCTCGTGTTGATTCTGAAACTTCTTTAAAGGCGTTGATCTTAGTCAAACGACGGATCATTTGTTCGACTCGAATAGTAAAATACGTAAGTATATAGATAGTCTATATTAATTTTAAAAGCTCAAAGGGGCGCAAGTCTCTCTCATCAGTTGTGTCTTTAGTTTTTTTGATTTGGTTTGGTG;right_support=11;valid_TSD=False -3R findcluster TE 13374595 13374595 2 + . ID=extended_and_annotated_roi.bam_1;genotype=homozygous;genotype_likelihoods=0.0106846062525,0.329771797916,0.659543595832;left_insert=0,GTTCACCCGCGTCCGAGTTCCTGCTCCACTACTCCCTGGCTGCTGACTCACTGTTGTTATAGGGGTGGCTTCCCCTCTGTTCTTCCTGGGGGAATGCTGCATCTTCCCCAGCTCCAAAATGGCGG;left_support=2;non_support=0;right_insert=;right_support=0;valid_TSD=False -3R findcluster TE 13374677 13374677 1 + . ID=extended_and_annotated_roi.bam_2;genotype=heterozygous;genotype_likelihoods=0.212598425197,0.393700787402;left_insert=;left_support=0;non_support=0;right_insert=;right_support=1;valid_TSD=False +3R findcluster TE 13373515 13373524 22 + . ID=extended_and_annotated_roi.bam_0;genotype=homozygous;genotype_likelihoods=1.8828617809e-31,9.53673406912e-07,0.999999046327;left_insert=0,CTCGGAATGTATCTAACTAACAAACTCATATCAAATATAAGCAAGTGCGCCAATTCGTATGCATATGGACATATGGACATATACATATAGTAACATAATATGCTTCTCATATTACGTTTACATACTTACACTAATTGTACATACAATCTTGCACATGCATAAACACATCAAACCAGCTTACATTTTTACTTACACTTAAGCGCATGATTTGTTGTGCATCCATACCGTTATTTTTCC;left_mate_support=8;left_support=11;non_support=0;right_insert=0,GACAACGAAGAAATAAAGATCCAAACTAAAAAAATACCTCGTGTTGATTCTGAAACTTCTTTAAAGGCGTTGATCTTAGTCAAACGACGGATCATTTGTTCGACTCGAATAGTAAAATACGTAAGTATATAGATAGTCTATATTAATTTTAAAAGCTCAAAGGGGCGCAAGTCTCTCTCATCAGTTGTGTCTTTAGTTTTTTTGATTTGGTTTGGTG;right_mate_support=6;right_support=11;valid_TSD=False +3R findcluster TE 13374595 13374676 3 + . ID=extended_and_annotated_roi.bam_1;genotype=homozygous;genotype_likelihoods=0.00038864889331,0.199922270221,0.799689080885;left_insert=0,GTTCACCCGCGTCCGAGTTCCTGCTCCACTACTCCCTGGCTGCTGACTCACTGTTGTTATAGGGGTGGCTTCCCCTCTGTTCTTCCTGGGGGAATGCTGCATCTTCCCCAGCTCCAAAATGGCGG;left_mate_support=2;left_support=2;non_support=0;right_insert=;right_mate_support=1;right_support=1;valid_TSD=False
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Wed Jun 21 09:38:25 2017 -0400 @@ -0,0 +1,8 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <!-- Locations of indexes in the BWA mapper format for BWA versions 0.6 and higher including BWA MEM and ALN--> + <table name="bwa_mem_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/bwa_mem_index.loc" /> + </table> +</tables>
--- a/update_mapq.xml Thu May 11 05:33:27 2017 -0400 +++ b/update_mapq.xml Wed Jun 21 09:38:25 2017 -0400 @@ -1,7 +1,7 @@ -<tool id="update_mapq" name="Update MAPQ score" version="0.3.24"> +<tool id="update_mapq" name="Update MAPQ score" version="0.3.25"> <description>of supplementary alignments</description> <requirements> - <requirement type="package" version="0.3.24">readtagger</requirement> + <requirement type="package" version="0.3.25">readtagger</requirement> </requirements> <version_command>update_mapq --version</version_command> <command detect_errors="aggressive"><