# HG changeset patch # User Dave Bouvier # Date 1389190612 18000 # Node ID cf928aeaaff73d4ae15406b9f26ceda42f615747 # Parent 7fb01ea4a6417056c5229c55e720dd0978f3b80a# Parent 8b22e9adae34152bd136a0c8d574a65f604aa0b9 Merge heads. diff -r 7fb01ea4a641 -r cf928aeaaff7 cuffcompare_wrapper.py --- a/cuffcompare_wrapper.py Wed Dec 04 16:20:53 2013 -0500 +++ b/cuffcompare_wrapper.py Wed Jan 08 09:16:52 2014 -0500 @@ -8,20 +8,6 @@ sys.stderr.write( '%s\n' % msg ) sys.exit() -# Copied from sam_to_bam.py: -def check_seq_file( dbkey, cached_seqs_pointer_file ): - seq_path = '' - for line in open( cached_seqs_pointer_file ): - line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#' ) and line.startswith( 'index' ): - fields = line.split( '\t' ) - if len( fields ) < 3: - continue - if fields[1] == dbkey: - seq_path = fields[2].strip() - break - return seq_path - def __main__(): #Parse Command Line parser = optparse.OptionParser() @@ -30,8 +16,7 @@ parser.add_option( '-s', dest='use_seq_data', action="store_true", help='Causes cuffcompare to look into for fasta files with the underlying genomic sequences (one file per contig) against which your reads were aligned for some optional classification functions. For example, Cufflinks transcripts consisting mostly of lower-case bases are classified as repeats. Note that must contain one fasta file per reference chromosome, and each file must be named after the chromosome, and have a .fa or .fasta extension.') # Wrapper / Galaxy options. - parser.add_option( '', '--dbkey', dest='dbkey', help='The build of the reference dataset' ) - parser.add_option( '', '--index_dir', dest='index_dir', help='GALAXY_DATA_INDEX_DIR' ) + parser.add_option( '', '--index', dest='index', help='The path of the reference genome' ) parser.add_option( '', '--ref_file', dest='ref_file', help='The reference dataset from the history' ) # Outputs. @@ -60,21 +45,15 @@ # Set/link to sequence file. if options.use_seq_data: - if options.ref_file != 'None': + if options.ref_file: # Sequence data from history. # Create symbolic link to ref_file so that index will be created in working directory. seq_path = "ref.fa" os.symlink( options.ref_file, seq_path ) else: - # Sequence data from loc file. - cached_seqs_pointer_file = os.path.join( options.index_dir, 'sam_fa_indices.loc' ) - if not os.path.exists( cached_seqs_pointer_file ): - stop_err( 'The required file (%s) does not exist.' % cached_seqs_pointer_file ) - # If found for the dbkey, seq_path will look something like /galaxy/data/equCab2/sam_index/equCab2.fa, - # and the equCab2.fa file will contain fasta sequences. - seq_path = check_seq_file( options.dbkey, cached_seqs_pointer_file ) - if seq_path == '': - stop_err( 'No sequence data found for dbkey %s, so sequence data cannot be used.' % options.dbkey ) + if not os.path.exists( options.index ): + stop_err( 'Reference genome %s not present, request it by reporting this error.' % options.index ) + seq_path = options.index # Build command. diff -r 7fb01ea4a641 -r cf928aeaaff7 cuffcompare_wrapper.xml --- a/cuffcompare_wrapper.xml Wed Dec 04 16:20:53 2013 -0500 +++ b/cuffcompare_wrapper.xml Wed Jan 08 09:16:52 2014 -0500 @@ -1,4 +1,4 @@ - + compare assembled transcripts to a reference annotation and track Cufflinks transcripts across multiple experiments @@ -18,14 +18,12 @@ ## Use sequence data? #if $seq_data.use_seq_data == "Yes": - -s + -s #if $seq_data.seq_source.index_source == "history": --ref_file=$seq_data.seq_source.ref_file #else: - --ref_file="None" + --index=${seq_data.seq_source.index.fields.path} #end if - --dbkey=${first_input.metadata.dbkey} - --index_dir=${GALAXY_DATA_INDEX_DIR} #end if ## Outputs. @@ -67,7 +65,14 @@ - + + + + + + + + diff -r 7fb01ea4a641 -r cf928aeaaff7 tool-data/fasta_indexes.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/fasta_indexes.loc.sample Wed Jan 08 09:16:52 2014 -0500 @@ -0,0 +1,29 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Samtools indexed sequences data files. You will need +#to create these data files and then create a sam_fa_new_indices.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The sam_fa_new_indices.loc +#file has this format (white space characters are TAB characters): +# +# +# +#So, for example, if you had hg19 Canonical indexed stored in +# +# /depot/data2/galaxy/hg19/sam/, +# +#then the sam_fa_new_indices.loc entry would look like this: +# +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +# +#and your /depot/data2/galaxy/hg19/sam/ directory +#would contain hg19canon.fa and hg19canon.fa.fai files. +# +#Your sam_fa_new_indices.loc file should include an entry per line for +#each index set you have stored. The file in the path does actually +#exist, but it should never be directly used. Instead, the name serves +#as a prefix for the index file. For example: +# +#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa +#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa diff -r 7fb01ea4a641 -r cf928aeaaff7 tool-data/sam_fa_indices.loc.sample --- a/tool-data/sam_fa_indices.loc.sample Wed Dec 04 16:20:53 2013 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -#This is a sample file distributed with Galaxy that enables tools -#to use a directory of Samtools indexed sequences data files. You will need -#to create these data files and then create a sam_fa_indices.loc file -#similar to this one (store it in this directory) that points to -#the directories in which those files are stored. The sam_fa_indices.loc -#file has this format (white space characters are TAB characters): -# -#index -# -#So, for example, if you had hg18 indexed stored in -#/depot/data2/galaxy/sam/, -#then the sam_fa_indices.loc entry would look like this: -# -#index hg18 /depot/data2/galaxy/sam/hg18.fa -# -#and your /depot/data2/galaxy/sam/ directory -#would contain hg18.fa and hg18.fa.fai files: -# -#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.fa -#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.fa.fai -# -#Your sam_fa_indices.loc file should include an entry per line for -#each index set you have stored. The file in the path does actually -#exist, but it should never be directly used. Instead, the name serves -#as a prefix for the index file. For example: -# -#index hg18 /depot/data2/galaxy/sam/hg18.fa -#index hg19 /depot/data2/galaxy/sam/hg19.fa diff -r 7fb01ea4a641 -r cf928aeaaff7 tool_data_table_conf.xml.sample