Mercurial > repos > devteam > cuffcompare
changeset 4:cf928aeaaff7
Merge heads.
| author | Dave Bouvier <dave@bx.psu.edu> | 
|---|---|
| date | Wed, 08 Jan 2014 09:16:52 -0500 | 
| parents | 7fb01ea4a641 (current diff) 8b22e9adae34 (diff) | 
| children | 67695d7ff787 | 
| files | tool-data/sam_fa_indices.loc.sample tool_data_table_conf.xml.sample | 
| diffstat | 4 files changed, 45 insertions(+), 60 deletions(-) [+] | 
line wrap: on
 line diff
--- a/cuffcompare_wrapper.py Wed Dec 04 16:20:53 2013 -0500 +++ b/cuffcompare_wrapper.py Wed Jan 08 09:16:52 2014 -0500 @@ -8,20 +8,6 @@ sys.stderr.write( '%s\n' % msg ) sys.exit() -# Copied from sam_to_bam.py: -def check_seq_file( dbkey, cached_seqs_pointer_file ): - seq_path = '' - for line in open( cached_seqs_pointer_file ): - line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#' ) and line.startswith( 'index' ): - fields = line.split( '\t' ) - if len( fields ) < 3: - continue - if fields[1] == dbkey: - seq_path = fields[2].strip() - break - return seq_path - def __main__(): #Parse Command Line parser = optparse.OptionParser() @@ -30,8 +16,7 @@ parser.add_option( '-s', dest='use_seq_data', action="store_true", help='Causes cuffcompare to look into for fasta files with the underlying genomic sequences (one file per contig) against which your reads were aligned for some optional classification functions. For example, Cufflinks transcripts consisting mostly of lower-case bases are classified as repeats. Note that <seq_dir> must contain one fasta file per reference chromosome, and each file must be named after the chromosome, and have a .fa or .fasta extension.') # Wrapper / Galaxy options. - parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' ) - parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' ) + parser.add_option( '', '--index', dest='index', help='The path of the reference genome' ) parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' ) # Outputs. @@ -60,21 +45,15 @@ # Set/link to sequence file. if options.use_seq_data: - if options.ref_file != 'None': + if options.ref_file: # Sequence data from history. # Create symbolic link to ref_file so that index will be created in working directory. seq_path = "ref.fa" os.symlink( options.ref_file, seq_path ) else: - # Sequence data from loc file. - cached_seqs_pointer_file = os.path.join( options.index_dir, 'sam_fa_indices.loc' ) - if not os.path.exists( cached_seqs_pointer_file ): - stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file ) - # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa, - # and the equCab2.fa file will contain fasta sequences. - seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file ) - if seq_path == '': - stop_err( 'No sequence data found for dbkey %s, so sequence data cannot be used.' % options.dbkey ) + if not os.path.exists( options.index ): + stop_err( 'Reference genome %s not present, request it by reporting this error.' % options.index ) + seq_path = options.index # Build command.
--- a/cuffcompare_wrapper.xml Wed Dec 04 16:20:53 2013 -0500 +++ b/cuffcompare_wrapper.xml Wed Jan 08 09:16:52 2014 -0500 @@ -1,4 +1,4 @@ -<tool id="cuffcompare" name="Cuffcompare" version="0.0.5"> +<tool id="cuffcompare" name="Cuffcompare" version="0.0.6"> <!-- Wrapper supports Cuffcompare versions v1.3.0 and newer --> <description>compare assembled transcripts to a reference annotation and track Cufflinks transcripts across multiple experiments</description> <requirements> @@ -18,14 +18,12 @@ ## Use sequence data? #if $seq_data.use_seq_data == "Yes": - -s + -s #if $seq_data.seq_source.index_source == "history": --ref_file=$seq_data.seq_source.ref_file #else: - --ref_file="None" + --index=${seq_data.seq_source.index.fields.path} #end if - --dbkey=${first_input.metadata.dbkey} - --index_dir=${GALAXY_DATA_INDEX_DIR} #end if ## Outputs. @@ -67,7 +65,14 @@ <option value="cached">Locally cached</option> <option value="history">History</option> </param> - <when value="cached"></when> + <when value="cached"> + <param name="index" type="select" label="Using reference genome"> + <options from_data_table="fasta_indexes"> + <filter type="data_meta" ref="first_input" key="dbkey" column="1" /> + <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" /> + </options> + </param> + </when> <when value="history"> <param name="ref_file" type="data" format="fasta" label="Using reference file" /> </when>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/fasta_indexes.loc.sample Wed Jan 08 09:16:52 2014 -0500 @@ -0,0 +1,29 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Samtools indexed sequences data files. You will need +#to create these data files and then create a sam_fa_new_indices.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The sam_fa_new_indices.loc +#file has this format (white space characters are TAB characters): +# +# <unique_build_id> <dbkey> <display_name> <file_base_path> +# +#So, for example, if you had hg19 Canonical indexed stored in +# +# /depot/data2/galaxy/hg19/sam/, +# +#then the sam_fa_new_indices.loc entry would look like this: +# +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +# +#and your /depot/data2/galaxy/hg19/sam/ directory +#would contain hg19canon.fa and hg19canon.fa.fai files. +# +#Your sam_fa_new_indices.loc file should include an entry per line for +#each index set you have stored. The file in the path does actually +#exist, but it should never be directly used. Instead, the name serves +#as a prefix for the index file. For example: +# +#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa +#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa
--- a/tool-data/sam_fa_indices.loc.sample Wed Dec 04 16:20:53 2013 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -#This is a sample file distributed with Galaxy that enables tools -#to use a directory of Samtools indexed sequences data files. You will need -#to create these data files and then create a sam_fa_indices.loc file -#similar to this one (store it in this directory) that points to -#the directories in which those files are stored. The sam_fa_indices.loc -#file has this format (white space characters are TAB characters): -# -#index <seq> <location> -# -#So, for example, if you had hg18 indexed stored in -#/depot/data2/galaxy/sam/, -#then the sam_fa_indices.loc entry would look like this: -# -#index hg18 /depot/data2/galaxy/sam/hg18.fa -# -#and your /depot/data2/galaxy/sam/ directory -#would contain hg18.fa and hg18.fa.fai files: -# -#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.fa -#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.fa.fai -# -#Your sam_fa_indices.loc file should include an entry per line for -#each index set you have stored. The file in the path does actually -#exist, but it should never be directly used. Instead, the name serves -#as a prefix for the index file. For example: -# -#index hg18 /depot/data2/galaxy/sam/hg18.fa -#index hg19 /depot/data2/galaxy/sam/hg19.fa
