Mercurial > repos > devteam > sam_pileup
changeset 1:e7d863c5c5d6
Update sam_pileup to use the fasta_indexes data table.
author | Dave Bouvier <dave@bx.psu.edu> |
---|---|
date | Wed, 11 Dec 2013 12:54:32 -0500 |
parents | 95612c159681 |
children | 3ff8935743a9 |
files | sam_pileup.py sam_pileup.xml tool-data/fasta_indexes.loc.sample tool-data/sam_fa_indices.loc.sample tool-data/tool_data_table_conf.xml.sample tool_data_table_conf.xml.sample |
diffstat | 6 files changed, 51 insertions(+), 60 deletions(-) [+] |
line wrap: on
line diff
--- a/sam_pileup.py Mon Aug 26 14:21:12 2013 -0400 +++ b/sam_pileup.py Wed Dec 11 12:54:32 2013 -0500 @@ -8,9 +8,8 @@ -o, --output1=o: Output pileup -R, --ref=R: Reference file type -n, --ownFile=n: User-supplied fasta reference file - -d, --dbkey=d: dbkey of user-supplied file - -x, --indexDir=x: Index directory -b, --bamIndex=b: BAM index file + -g, --index=g: Path of the indexed reference genome -s, --lastCol=s: Print the mapping quality as the last column -i, --indels=i: Only output lines containing indels -M, --mapCap=M: Cap mapping quality @@ -31,24 +30,9 @@ sys.stderr.write( '%s\n' % msg ) sys.exit() -def check_seq_file( dbkey, GALAXY_DATA_INDEX_DIR ): - seqFile = '%s/sam_fa_indices.loc' % GALAXY_DATA_INDEX_DIR - seqPath = '' - for line in open( seqFile ): - line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#' ) and line.startswith( 'index' ): - fields = line.split( '\t' ) - if len( fields ) < 3: - continue - if fields[1] == dbkey: - seqPath = fields[2].strip() - break - return seqPath - def __main__(): #Parse Command Line options, args = doc_optparse.parse( __doc__ ) - seqPath = check_seq_file( options.dbkey, options.indexDir ) # output version # of tool try: tmp = tempfile.NamedTemporaryFile().name @@ -77,7 +61,6 @@ tmpf1 = tempfile.NamedTemporaryFile( dir=tmpDir ) tmpf1_name = tmpf1.name tmpf1.close() - tmpf1fai_name = '%s.fai' % tmpf1_name #link bam and bam index to working directory (can't move because need to leave original) os.symlink( options.input1, tmpf0bam_name ) os.symlink( options.bamIndex, tmpf0bambai_name ) @@ -100,9 +83,9 @@ try: #index reference if necessary and prepare pileup command if options.ref == 'indexed': - if not os.path.exists( "%s.fai" % seqPath ): - raise Exception, "No sequences are available for '%s', request them by reporting this error." % options.dbkey - cmd = cmd % ( opts, seqPath, tmpf0bam_name, options.output1 ) + if not os.path.exists( "%s.fai" % options.index ): + raise Exception, "Indexed genome %s not present, request it by reporting this error." % options.index + cmd = cmd % ( opts, options.index, tmpf0bam_name, options.output1 ) elif options.ref == 'history': os.symlink( options.ownFile, tmpf1_name ) cmdIndex = 'samtools faidx %s' % ( tmpf1_name )
--- a/sam_pileup.xml Mon Aug 26 14:21:12 2013 -0400 +++ b/sam_pileup.xml Wed Dec 11 12:54:32 2013 -0500 @@ -1,4 +1,4 @@ -<tool id="sam_pileup" name="Generate pileup" version="1.1.1"> +<tool id="sam_pileup" name="Generate pileup" version="1.1.2"> <description>from BAM dataset</description> <requirements> <requirement type="package" version="0.1.16">samtools</requirement> @@ -11,7 +11,7 @@ #if $refOrHistory.reference == "history": --ownFile=$refOrHistory.ownFile #else: - --ownFile="None" + --index=${refOrHistory.index.fields.path} #end if --dbkey=${input1.metadata.dbkey} --indexDir=${GALAXY_DATA_INDEX_DIR} @@ -41,7 +41,14 @@ <when value="indexed"> <param name="input1" type="data" format="bam" label="Select the BAM file to generate the pileup file for"> <validator type="unspecified_build" /> - <validator type="dataset_metadata_in_file" filename="sam_fa_indices.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." line_startswith="index" /> + <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> + + </param> + <param name="index" type="select" label="Using reference genome"> + <options from_data_table="fasta_indexes"> + <filter type="data_meta" ref="input1" key="dbkey" column="1" /> + <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" /> + </options> </param> </when> <when value="history"> @@ -100,6 +107,7 @@ --> <param name="reference" value="indexed" /> <param name="input1" value="sam_pileup_in1.bam" ftype="bam" dbkey="equCab2" /> + <param name="index" value="chr_m" /> <param name="lastCol" value="no" /> <param name="indels" value="no" /> <param name="mapCap" value="60" />
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/fasta_indexes.loc.sample Wed Dec 11 12:54:32 2013 -0500 @@ -0,0 +1,29 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Samtools indexed sequences data files. You will need +#to create these data files and then create a sam_fa_new_indices.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The sam_fa_new_indices.loc +#file has this format (white space characters are TAB characters): +# +# <unique_build_id> <dbkey> <display_name> <file_base_path> +# +#So, for example, if you had hg19 Canonical indexed stored in +# +# /depot/data2/galaxy/hg19/sam/, +# +#then the sam_fa_new_indices.loc entry would look like this: +# +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +# +#and your /depot/data2/galaxy/hg19/sam/ directory +#would contain hg19canon.fa and hg19canon.fa.fai files. +# +#Your sam_fa_new_indices.loc file should include an entry per line for +#each index set you have stored. The file in the path does actually +#exist, but it should never be directly used. Instead, the name serves +#as a prefix for the index file. For example: +# +#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa +#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa
--- a/tool-data/sam_fa_indices.loc.sample Mon Aug 26 14:21:12 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -#This is a sample file distributed with Galaxy that enables tools -#to use a directory of Samtools indexed sequences data files. You will need -#to create these data files and then create a sam_fa_indices.loc file -#similar to this one (store it in this directory) that points to -#the directories in which those files are stored. The sam_fa_indices.loc -#file has this format (white space characters are TAB characters): -# -#index <seq> <location> -# -#So, for example, if you had hg18 indexed stored in -#/depot/data2/galaxy/sam/, -#then the sam_fa_indices.loc entry would look like this: -# -#index hg18 /depot/data2/galaxy/sam/hg18.fa -# -#and your /depot/data2/galaxy/sam/ directory -#would contain hg18.fa and hg18.fa.fai files: -# -#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.fa -#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.fa.fai -# -#Your sam_fa_indices.loc file should include an entry per line for -#each index set you have stored. The file in the path does actually -#exist, but it should never be directly used. Instead, the name serves -#as a prefix for the index file. For example: -# -#index hg18 /depot/data2/galaxy/sam/hg18.fa -#index hg19 /depot/data2/galaxy/sam/hg19.fa
--- a/tool-data/tool_data_table_conf.xml.sample Mon Aug 26 14:21:12 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> -<tables> - <!-- Location of SAMTools indexes and other files --> - <table name="sam_fa_indexes" comment_char="#"> - <columns>line_type, value, path</columns> - <file path="tool-data/sam_fa_indices.loc" /> - </table> -</tables> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Wed Dec 11 12:54:32 2013 -0500 @@ -0,0 +1,7 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <table name="fasta_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/fasta_indexes.loc" /> + </table> +</tables>