Mercurial > repos > devteam > gmaj
changeset 0:2cd5ee197ec7 draft default tip
Imported from capsule None
author | devteam |
---|---|
date | Mon, 27 Jan 2014 09:26:22 -0500 |
parents | |
children | |
files | GMAJ.py GMAJ.xml |
diffstat | 2 files changed, 233 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GMAJ.py Mon Jan 27 09:26:22 2014 -0500 @@ -0,0 +1,23 @@ +#!/usr/bin/env python + +""" +Script that Creates a zip file for use by GMAJ +""" +import sys, zipfile + +def __main__(): + #create a new zip file + out_file = zipfile.ZipFile( sys.argv[1], "w" ) + #add info files + out_file.write( sys.argv[3], "input.gmaj" ) #THIS FILE MUST BE ADDED FIRST + out_file.write( sys.argv[2], "input.maf" ) + + #add annotation files + for line in open( sys.argv[4] ): + try: + out_file.write( *[ field.strip() for field in line.split( "=", 1 ) ] ) + except: + continue + out_file.close() + +if __name__ == "__main__": __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GMAJ.xml Mon Jan 27 09:26:22 2014 -0500 @@ -0,0 +1,210 @@ +<tool id="gmaj_1" name="GMAJ" version="2.0.1"> +<description>Multiple Alignment Viewer</description> + <command interpreter="python">GMAJ.py $out_file1 $maf_input $gmaj_file $filenames_file</command> + <inputs> + <param name="maf_input" type="data" format="maf" label="Alignment File" optional="False"> + <validator type="metadata" check="species_chromosomes" message="Metadata missing, click the pencil icon in the history item and use the auto-detect feature to correct this issue."/> + </param> + <param name="refseq" label="Reference Sequence" type="select"> + <option value="first" selected="true">First sequence in each block</option> + <option value="any">Any sequence</option> + </param> + <repeat name="annotations" title="Annotations"> + <conditional name="annotation_style"> + <param name="style" type="select" label="Annotation Style" help="If your data is not in a style similar to what is available from Galaxy (and the UCSC table browser), choose 'Basic'."> + <option value="galaxy" selected="true">Galaxy</option> + <option value="basic">Basic</option> + </param> + <when value="galaxy"> + <param name="species" type="select" label="Species" multiple="False"> + <options> + <filter type="data_meta" ref="maf_input" key="species" /> + </options> + </param> + <param name="exons_file" type="data" format="bed,gff" label="Exons File" optional="True"/> + <param name="highlights_file" type="data" format="bed,gff" label="Highlights File" optional="True"/> + <param name="underlays_file" type="data" format="bed,gff" label="Underlays File" optional="True"/> + <param name="repeats_file" type="data" format="bed,gff" label="Repeats File" optional="True"/> + <param name="links_file" type="data" format="bed,gff" label="Links File" optional="True"/> + </when> + <when value="basic"> + <param name="seq_name" label="Full Sequence Name" value="" type="text"> + <validator type="empty_field" message="You must supply the sequence name"/> + </param> + <param name="exons_file" type="data" format="bed,gff" label="Exons File" optional="True"/> + <param name="highlights_file" type="data" format="bed,gff" label="Highlights File" optional="True"/> + <param name="underlays_file" type="data" format="bed,gff" label="Underlays File" optional="True"/> + <param name="repeats_file" type="data" format="bed,gff" label="Repeats File" optional="True"/> + <param name="links_file" type="data" format="bed,gff" label="Links File" optional="True"/> + <param name="offset" label="Offset" value="0" type="integer"/> + </when> + </conditional> + </repeat> + <param name="nowarn" type="drill_down" display="checkbox" hierarchy="recurse" multiple="true" label="Choose Warnings to Suppress" separator=" " help="These do not affect behavior, only suppress warning messages."> + <options> + <option name="All" value="all"> + <option name="MAF File" value="maf"> + <option name="Invalid MAF version (maf_version)" value="maf_version"/> + <option name="Skipping unsupported paragraph (maf_paragraph)" value="maf_paragraph"/> + <option name="Unrecognized character found in alignment (bad_char_all)" value="bad_char_all"/> + <option name="Skipping all reconstruction scores: no species specified (recon_noseq)" value="recon_noseq"/> + <option name="Skipping reconstruction scores in blocks with missing row (recon_missing)" value="recon_missing"/> + <option name="The first row in some blocks is not the specified reference sequence (refseq_not_first)" value="refseq_not_first"/> + <option name="Skipping extra MAF File (unused_maf)" value="unused_maf"/> + </option> + <option name="Annotation Files" value="annotations"> + <option name="Semantic Assumptions" value="semantics"> + <option name="BED Format" value = "bed"> + <option name="BED12 blocks are exons (bed_blocks)" value="bed_blocks"/> + <option name="BED thickstart/thickend designate CDS (bed_thick)" value="bed_thick"/> + <option name="BED name is gene name when loading exons from BED12 (bed_name)" value="bed_name"/> + <option name="BED name is gene name when loading exons from exon BED (bed_name_full)" value="bed_name_full"/> + <option name="BED name's prefix is gene name when loading exons from exon BED (bed_name_prefix)" value="bed_name_prefix"/> + </option> + <option name="GFF group is gene name (gff_group)" value="gff_group"/> + </option> + <option name="Skipped Items" value="skipped"> + <option name="Skipping lines in unrecognized format (annot_format)" value="annot_format"/> + <option name="Skipping lines with no gene name when loading exons (gene_missing)" value="gene_missing"/> + <option name="Skipping lone CDS start/stop codons when strand is unknown (ambiguous_codon)" value="ambiguous_codon"/> + <option name="Skipping lines with invalid repeat types (unrec_repeat)" value="unrec_repeat"/> + <option name="Using 'Other' for missing or incomplete repeat types (repeat_type_missing)" value="repeat_type_missing"/> + <option name="Ignoring invalid strand fields (bad_strand)" value="bad_strand"/> + <option name="Ignoring invalid score fields (bad_score)" value="bad_score"/> + <option name="Ignoring invalid color fields (color_format)" value="color_format"/> + <option name="Ignoring malformed URLs (bad_url)" value="bad_url"/> + <option name="Score shading is not yet supported (score_shading)" value="score_shading"/> + </option> + <option name="Red Flags" value="red"> + <option name="Assuming that annotations in file ___ are for species ___ (seqname_fix_all)" value="seqname_fix_all"/> + <option name="BED start or end < 0 (bed_coord)" value="bed_coord"/> + <option name="GFF start or end < 1 (gff_coord)" value="gff_coord"/> + <option name="Missing item name for URL substitution (url_subst)" value="url_subst"/> + </option> + </option> + <option name="Miscellaneous" value="miscellaneous"> + <option name="No refseq specified; assuming 'first' (default_refseq)" value="default_refseq"/> + <option name="One or more bundle entries are not used in parameters file(unused_entry)" value="unused_entry"/> + <option name="Skipping blocks for export where reference sequence is hidden or all gaps (export_skip)" value="export_skip"/> + <option name="Possible parse error: token ends with an escaped quote (escaped_quote)" value="escaped_quote"/> + <option name="Draggable panel dividers will not be sticky (no_sticky)" value="no_sticky"/> + <option name="Selecting a large block may be very slow (big_block)" value="big_block"/> + </option> + </option> + </options> + </param> + </inputs> + <configfiles> + <configfile name="gmaj_file">#:gmaj + +title = "Galaxy: $maf_input.name" +alignfile = input.maf +refseq = $refseq +tabext = .bed .gff .gtf +#if $nowarn.value: +nowarn = $nowarn +#end if + +#set $seq_count = 0 +#for $annotation_count, $annotation in $enumerate( $annotations ): +#if $annotation.annotation_style.style == "galaxy": +#set $species_chromosomes = {} +#if $maf_input.dataset.metadata.species_chromosomes: +#for $line in open( $maf_input.dataset.metadata.species_chromosomes.file_name ): +#set $fields = $line.split( "\t" ) +#if $fields: +#set $spec = $fields.pop( 0 ) +#set $species_chromosomes[spec] = $fields +#end if +#end for +#end if +#if $species_chromosomes and $annotation.annotation_style['species'].value in $species_chromosomes and $species_chromosomes[$annotation.annotation_style['species'].value]: +#set $seq_names = [ "%s.%s" % ( $annotation.annotation_style['species'].value, $chrom ) for $chrom in $species_chromosomes[$annotation.annotation_style['species'].value]] +#else: +#set $seq_names = [$annotation.annotation_style['species']] +#end if +#else: +#set $seq_names = [$annotation.annotation_style['seq_name']] +#end if +#for $seq_name in $seq_names: +seq ${seq_count}: +seqname = $seq_name +#if $annotation.annotation_style['exons_file'].dataset: +exons = ${annotation_count}.exons.${annotation.annotation_style['exons_file'].extension} +#end if +#if $annotation.annotation_style['repeats_file'].dataset: +repeats = ${annotation_count}.repeats.${annotation.annotation_style['repeats_file'].extension} +#end if +#if $annotation.annotation_style['links_file'].dataset: +links = ${annotation_count}.links.${annotation.annotation_style['links_file'].extension} +#end if +#if $annotation.annotation_style['underlays_file'].dataset: +underlays = ${annotation_count}.underlays.${annotation.annotation_style['underlays_file'].extension} +#end if +#if $annotation.annotation_style['highlights_file'].dataset: +highlights = ${annotation_count}.highlights.${annotation.annotation_style['highlights_file'].extension} +#end if +#if $annotation.annotation_style.style == "basic": +offset = $annotation.annotation_style['offset'] +#end if + +#set $seq_count = $seq_count + 1 +#end for +#end for +</configfile> + <configfile name="filenames_file"> +#for $annotation_count, $annotation in $enumerate( $annotations ): +#if $annotation.annotation_style['exons_file'].dataset: +$annotation.annotation_style['exons_file'] = ${annotation_count}.exons.${annotation.annotation_style['exons_file'].extension} +#end if +#if $annotation.annotation_style['repeats_file'].dataset: +$annotation.annotation_style['repeats_file'] = ${annotation_count}.repeats.${annotation.annotation_style['repeats_file'].extension} +#end if +#if $annotation.annotation_style['links_file'].dataset: +$annotation.annotation_style['links_file'] = ${annotation_count}.links.${annotation.annotation_style['links_file'].extension} +#end if +#if $annotation.annotation_style['underlays_file'].dataset: +$annotation.annotation_style['underlays_file'] = ${annotation_count}.underlays.${annotation.annotation_style['underlays_file'].extension} +#end if +#if $annotation.annotation_style['highlights_file'].dataset: +$annotation.annotation_style['highlights_file'] = ${annotation_count}.highlights.${annotation.annotation_style['highlights_file'].extension} +#end if +#end for +</configfile> + </configfiles> + <outputs> + <data name="out_file1" format="gmaj.zip"/> + </outputs> +<help> +.. class:: infomark + +**Reference Sequence:** +The default option, "First sequence in each block", is the correct choice for the vast majority of MAF alignments. The alternative, "Any sequence", will allow you to flip the blocks to view them with any of the MAF sequences as the reference, but this is only appropriate if the file was generated by a sequence-symmetric alignment program such as TBA_. Using "Any sequence" with an ordinary MAF will **not** give the same results as if that alignment had been run with a different reference sequence. + +.. class:: infomark + +**Annotation Style:** +The default style, "Galaxy", specifies one set of annotations for each species in the MAF file; it assumes that if you have, say, exons for several chromosomes of one species, they are all together in one file. The other style, "Basic", is more flexible but cumbersome: a separate set of files is specified for each sequence (e.g. chromosome), and you must fill in the full sequence name as it appears in the MAF. The Basic style also allows you to provide a display offset that GMAJ will add to all of the position labels for that sequence. With either style, specifying more than one set of annotations for the same sequence will result in an error message from GMAJ. + +---- + +**What it does** + +GMAJ is an interactive viewer for MAF alignments, with support for optional annotation data. In addition to browsing the alignments, you can select and export them according to a variety of criteria and send the output back to your Galaxy history. + +For detailed information on GMAJ, click here_. + + +------ + +**Citation** + +If you use GMAJ, please cite `Blanchette M, Kent WJ, Riemer C, Elnitski L, Smit AF, Roskin KM, Baertsch R, Rosenbloom K, Clawson H, Green ED, Haussler D, Miller W. Aligning multiple genomic sequences with the threaded blockset aligner. Genome Res. 2004 Apr;14(4):708-15. <http://www.ncbi.nlm.nih.gov/pubmed/15060014>`_ and http://globin.cse.psu.edu/dist/gmaj/. + +If you use this tool in Galaxy, please cite `Blankenberg D, Taylor J, Nekrutenko A; The Galaxy Team. Making whole genome multiple alignments usable for biologists. Bioinformatics. 2011 Sep 1;27(17):2426-2428. <http://www.ncbi.nlm.nih.gov/pubmed/21775304>`_ + + +.. _here: /static/gmaj/docs/gmaj_readme.html +.. _TBA: http://www.bx.psu.edu/miller_lab/ + </help> +</tool>