Mercurial > repos > urgi-team > teiso
changeset 1:15d6811e6bf5 draft
Uploaded
author | urgi-team |
---|---|
date | Tue, 24 May 2016 08:59:28 -0400 |
parents | 3d22562b4489 |
children | 775f119b9ac2 |
files | TEiso/ClosestToStartSite_Wrapper.py TEiso/ClosestToStartSite_Wrapper.xml TEiso/CufflinksGTFToBed_Wrapper.py TEiso/CufflinksGTFToBed_Wrapper.xml TEiso/GFFToBed_Wrapper.py TEiso/GFFToBed_Wrapper.xml |
diffstat | 6 files changed, 125 insertions(+), 164 deletions(-) [+] |
line wrap: on
line diff
--- a/TEiso/ClosestToStartSite_Wrapper.py Fri Apr 29 09:11:18 2016 -0400 +++ b/TEiso/ClosestToStartSite_Wrapper.py Tue May 24 08:59:28 2016 -0400 @@ -3,7 +3,6 @@ import subprocess, tempfile, sys, os, glob, shutil, time from optparse import OptionParser -from commons.core.utils.RepetOptionParser import RepetOptionParser class ClosestToStartSiteWrapper(object): @@ -18,14 +17,16 @@ def setAttributesFromCmdLine(self): - self._toolVersion = "1.0" - description = "ClosestToStartSite version %s" % self._toolVersion - epilog = "\n parses a bed file and create a bed file to create a report about positions of features A to features B. \n" - epilog += "example: ClosestToStartSite.py -i <inputFile> -o <outputFile>\n" - parser = RepetOptionParser(description = description, epilog = epilog, version = self._toolVersion) - parser.add_option("-i", "--inputFile", dest = "inputFile", action = "store", type = "string", help = "Input GTF File name(transcript.gtf of Cufflinks).", default = "") + description = "ClosestToStartSite" + epilog = "\nParser a bed file and create a bed file to create a report about positions of features A to features B. \n" + epilog +="it can also add the class code of features A. \n" + epilog += "example: ClosestToStartSite.py -i <inputFile> -c <cuff_in.tmap> -o <outputFile>\n" + parser = OptionParser(description = description, version = "1.0") + parser.add_option("-i", "--inputFile", dest = "inputFile", action = "store", type = "string", help = "input bed file", default = "") + parser.add_option("-c", "--cuffcom_tmap", dest = "cuffcom_tmap", action = "store", type = "string", help = "input gtf file", default = "") parser.add_option("-o", "--outputFile", dest = "outputFile", action = "store", type = "string", help = "output Bed File name", default = "") - parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", help = "Verbosity [optional] [default: 3]",default = 3) + #parser.add_option("-t", "--outputFileclasscode", dest = "outputFile_classcode", action = "store", type = "string", help = "output Bed File name with class code.", default = "") + parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", help = "verbosity [optional] [default: 3]",default = 3) options = parser.parse_args()[0] self._setAttributesFromOptions(options) @@ -36,6 +37,11 @@ prg = "ClosestToStartSite.py" args = "" args += "-i %s" % self._options.inputFile + args += " " + args += "-o %s" % self._options.outputFile + if self._options.cuffcom_tmap != "": + args += " " + args += "-c %s" % self._options.cuffcom_tmap cmd = "%s %s" %(prg, args) print cmd
--- a/TEiso/ClosestToStartSite_Wrapper.xml Fri Apr 29 09:11:18 2016 -0400 +++ b/TEiso/ClosestToStartSite_Wrapper.xml Tue May 24 08:59:28 2016 -0400 @@ -1,132 +1,69 @@ <tool id="ClosestToStartSite" name="ClosestToStartSite" version="1.0"> - <description>ClosestToStartSite parses a bed file and create a bed file to create a report about positions of features A to features B.</description> + <description>ClosestToStartSite parses a bed file and generate a report about positions of features A to features B. It can also add the class code of features A.</description> <requirements> <requirement type="package" version="1.0">TEiso_Tools</requirement> </requirements> <version_command> ClosestToStartSite.py --version </version_command> - <command interpreter="python"> - ClosestToStartSite_Wrapper.py -i $inputFile -o $outputFile + <command interpreter="python"> + #if $ClassCode.get_class_code + ClosestToStartSite_Wrapper.py -i $inputFile -c $cuffcom_tmap -o $outputFile + #else + ClosestToStartSite_Wrapper.py -i $inputFile -o $outputFile + #end if </command> <inputs> <param name="inputFile" type="data" format="bed" label="indicate a bed file."/> + <conditional name="ClassCode"> + <param name="get_class_code" type="boolean" label="get information of class code from cuff_in.tmap" truevalue="yes" falsevalue="no" checked="off" /> + <when value="no"/> + <when value="yes"> + <param name="cuffcom_tmap" type="data" format="tabular" label="indicate cuff_in.tmap" help="default = 1"/> + </when> + </conditional> </inputs> <outputs> <data name="outputFile" format="bed" label="${tool.name} on ${on_string} (BED)"/> </outputs> <help><![CDATA[ -**ClosestToStartSite_wrapper parses a bed file and create a bed file to create a report about positions of features A to features B.** - - -**what it does :** - -parses a bed file and create a bed file to create a report about positions of features A to features B - ------ - -**input format :** - -.. class:: infomark - -**output format :** - -fake 140 532 CUFF.1.1 CUFF.1 + 26875.607 - ------ -****** "B_close_TSS" -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +**ClosestToStartSite parses a bed file and generate a report about positions of features A (according to its TSS) to features B. It can also add the class code of features A.** + +**what it does :** - F[1] gene F[2] - =========================> - ------------ - F[8] F[9] - - - F[1] F[2] - <========================= - --------------- +generate a report about positions of features B to features A (according to its TSS). In input file, there are the information of the two features. ClosestToStartSite reports only the cases that features Bis near to TSS or has overlap with TSS of features A. it can be also run cuffcopare to features A, and add the class code information to output file. + + +----- + +**example :** -****** "B_overlap_TSS" -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - F[1] gene F[2] - =========================> - ------------- - F[8] F[9] - - gene - F[1]=========================>F[2] - - F[8]---------------F[9] - - - - F[1]=============================>F[2] - F[8]---------------F[9] - - - F[1]<=============================F[2] - --------------------------- - F[8] F[9] - - - F[1]<=============================F[2] - F[8]---------------F[9] - - - F[1]<=============================F[2] - F[8]---------------F[9] +Bed input file: :: + + 2L 239751 240346 CUFF.36.1 CUFF.36 - 2L 239932 241306 RXX + 415 TE_overlap_TSS + 2L 113233 113411 CUFF.14.1 CUFF.14 - 2L 113496 113509 RLX - -854 TE_near_TSS + + +---- + +output format: :: + -****** "B-inclus-A" -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - - F[1] gene F[2] - ============================== - ------------- - F[8] F[9] - -****** "A-inclus-B" -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - - F[1]======================F[2] - F[8]----------------------------------------------------F[9] - - - - - F[1]==================================>F[2] - F[8]----------------------------------------------------------F[9] - - - - F[1]<==================================F[2] - F[8]----------------------------------------------------------F[9] - - - -****** "A-inclus-B-inTSS" -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - F[1]<==================================F[2] - [8]----------------------------------------------------------F[9] - - - F[1]==================================>F[2] - F[8]----------------------------------------------------------F[9] - - ------ + 2L 239751 240346 CUFF.36.1 CUFF.36 - 2L 239932 241306 RXX + 415 TE_overlap_TSS + 2L 113233 113411 CUFF.14.1 CUFF.14 - 2L 113496 113509 RLX - -854 TE_near_TSS + -**reference :** +output format with calss code: :: + + + 2L 239751 240346 CUFF.36.1 CUFF.36 - 2L 239932 241306 RXX + 415 TE_overlap_TSS c gene-id-1 + 2L 113233 113411 CUFF.14.1 CUFF.14 - 2L 113496 113509 RLX - -854 TE_near_TSS = gene-id-2 + ]]> </help>
--- a/TEiso/CufflinksGTFToBed_Wrapper.py Fri Apr 29 09:11:18 2016 -0400 +++ b/TEiso/CufflinksGTFToBed_Wrapper.py Tue May 24 08:59:28 2016 -0400 @@ -1,9 +1,7 @@ #!/usr/bin/env python - - -import subprocess, tempfile, sys, os, glob, shutil, time +import subprocess, tempfile, sys, os from optparse import OptionParser -from commons.core.utils.RepetOptionParser import RepetOptionParser +#from commons.core.utils.RepetOptionParser import RepetOptionParser class CufflinksGTFToBedWrapper(object): @@ -18,11 +16,12 @@ def setAttributesFromCmdLine(self): - self._toolVersion = "1.0" - description = "CufflinksGTFToBed version %s" % self._toolVersion + #self._toolVersion = "1.0" + description = "CufflinksGTFToBed " epilog = "\n parses a GTF file of Cufflinks and create a bed file. \n" epilog += "example: CufflinksGTFToBed.py -i <inputFile> -o <outputFile>\n" - parser = RepetOptionParser(description = description, epilog = epilog, version = self._toolVersion) + #parser = RepetOptionParser(description = description, epilog = epilog, version = self._toolVersion) + parser = OptionParser(description = description, version = "1.0") parser.add_option("-i", "--inputFile", dest = "inputFile", action = "store", type = "string", help = "Input GTF File name(transcript.gtf of Cufflinks).", default = "") parser.add_option("-o", "--outputFile", dest = "outputFile", action = "store", type = "string", help = "output Bed File name", default = "") parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", help = "Verbosity [optional] [default: 3]",default = 3) @@ -33,9 +32,12 @@ self._options = options def run(self): + tmp = "%s_tmp" % ((os.path.splitext(self._options.outputFile)[0])) prg = "CufflinksGTFToBed.py" args = "" args += "-i %s" % self._options.inputFile + args += " " + args += "-o %s" % tmp cmd = "%s %s" %(prg, args) print cmd @@ -61,6 +63,12 @@ raise Exception, stderr except Exception, e: self.stop_err( 'Error in TranscriptToBed:\n' + str( e ) ) + try: + cmdsort= "bedtools sort -i %s > %s" % (tmp, self._options.outputFile) + os.system(cmdsort) + except Exception, e: + self.stop_err( 'Error in bedtools sort:\n' + str( e ) ) + if __name__ == "__main__": iWrapper = CufflinksGTFToBedWrapper()
--- a/TEiso/CufflinksGTFToBed_Wrapper.xml Fri Apr 29 09:11:18 2016 -0400 +++ b/TEiso/CufflinksGTFToBed_Wrapper.xml Tue May 24 08:59:28 2016 -0400 @@ -16,36 +16,35 @@ <data name="outputFile" format="bed" label="${tool.name} on ${on_string} (BED)"/> </outputs> <help><![CDATA[ - + + + **CufflinksGTFToBed_wrapper converts a result GTF file of Cufflinks into a bed file.** - **what it does :** -converts a result GTF file of Cufflinks into a bed file. - +Converts a GTF file of transcripts into a bed file. In input file, there are all the information of the transcript and their exon. CufflinksGTFToBed takes only the transcripts to convert into a bed file. + It can take: Chromosome, Start, End, strand, Isoform ID, Gene ID, value of FPKM ----- -**input format :** +**example :** -.. class:: infomark -fake Cufflinks transcript 140 532 1000 + . gene_id "CUFF.1"; -transcript_id "CUFF.1.1"; FPKM "26875.6073354154"; frac "1.000000"; conf_lo "24989.806681"; conf_hi "28761.407990"; cov "752.375132"; ------ +Transcript input file: :: -**output format :** - -fake 140 532 CUFF.1.1 CUFF.1 + 26875.607 - ------ + 2L cufflinks transcript 10487 18076 372 - . gene_id "CUFF.1"; transcript_id "CUFF.1.1"; FPKM "1.3627628649"; frac "0.197041"; conf_lo "0.805777"; conf_hi "1.919748"; cov "1.706083"; ------ + + +---- -**reference :** +output format: :: + + 2L 10487 18076 CUFF.1.1 CUFF.1 - 1.363 + ]]> </help>
--- a/TEiso/GFFToBed_Wrapper.py Fri Apr 29 09:11:18 2016 -0400 +++ b/TEiso/GFFToBed_Wrapper.py Tue May 24 08:59:28 2016 -0400 @@ -3,7 +3,7 @@ import subprocess, tempfile, sys, os, glob, shutil, time from optparse import OptionParser -from commons.core.utils.RepetOptionParser import RepetOptionParser +#from commons.core.utils.RepetOptionParser import RepetOptionParser class GFFToBedWrapper(object): @@ -18,12 +18,13 @@ def setAttributesFromCmdLine(self): - self._toolVersion = "1.0" - description = "GFFToBed version %s" % self._toolVersion - epilog = "\n parses a GFF file and create a bed file. \n" + ## self._toolVersion = "1.0" + description = "GFFToBed version" + epilog = "\n parses a GFF3 file and create a bed file. \n" epilog += "example: GFFToBed.py -i <inputFile> -o <outputFile>\n" - parser = RepetOptionParser(description = description, epilog = epilog, version = self._toolVersion) - parser.add_option("-i", "--inputFile", dest = "inputFile", action = "store", type = "string", help = "Input GFF File name.", default = "") + #parser = RepetOptionParser(description = description, epilog = epilog, version = self._toolVersion) + parser = OptionParser(description = description, version = "1.0") + parser.add_option("-i", "--inputFile", dest = "inputFile", action = "store", type = "string", help = "Input GFF3 File name.", default = "") parser.add_option("-o", "--outputFile", dest = "outputFile", action = "store", type = "string", help = "output Bed File name", default = "") parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", help = "Verbosity [optional] [default: 3]",default = 3) options = parser.parse_args()[0] @@ -33,12 +34,15 @@ self._options = options def run(self): - prg = "GFFToBed.py" + tmp = "%s_tmp" % ((os.path.splitext(self._options.outputFile)[0])) + prg = "GFFToBed.py" args = "" args += "-i %s" % self._options.inputFile + args += " " + args += "-o %s" % tmp cmd = "%s %s" %(prg, args) print cmd - + try: tmp_err = tempfile.NamedTemporaryFile().name tmp_stderr = open( tmp_err, 'wb' ) @@ -61,7 +65,16 @@ raise Exception, stderr except Exception, e: self.stop_err( 'Error in GFFToBed:\n' + str( e ) ) - + + + try: + cmdsort= "bedtools sort -i %s > %s" % (tmp, self._options.outputFile) + os.system(cmdsort) + except Exception, e: + self.stop_err( 'Error in bedtools sort:\n' + str( e ) ) + + + if __name__ == "__main__": iWrapper = GFFToBedWrapper() iWrapper.setAttributesFromCmdLine()
--- a/TEiso/GFFToBed_Wrapper.xml Fri Apr 29 09:11:18 2016 -0400 +++ b/TEiso/GFFToBed_Wrapper.xml Tue May 24 08:59:28 2016 -0400 @@ -1,5 +1,5 @@ <tool id="GFFToBed" name="GFFToBed" version="1.0"> - <description>GFFToBed can convert a result GTF file of Cufflinks into a bed file.</description> + <description>GFFToBed can convert a result GTF file into a bed file.</description> <requirements> <requirement type="package" version="1.0">TEiso_Tools</requirement> </requirements> @@ -7,7 +7,7 @@ GFFToBed.py --version </version_command> <command interpreter="python"> - GFFToBedWrapper_Wrapper.py -i $inputFile -o $outputFile + GFFToBed_Wrapper.py -i $inputFile -o $outputFile </command> <inputs> <param name="inputFile" type="data" format="gff" label="indicate a transcript GTF file of cufflinks."/> @@ -16,37 +16,35 @@ <data name="outputFile" format="bed" label="${tool.name} on ${on_string} (BED)"/> </outputs> <help><![CDATA[ - -**GFFToBed_wrapper converts a result GTF file of Cufflinks into a bed file.** + +**GFFToBed_wrapper converts a result GTF file into a bed file.** **what it does :** -converts a result GTF file of Cufflinks into a bed file. - -It can take: Chromosome, Start, End, ID, Target, strand +converts a result GFF3 file into a bed file + +it can take: Chromosome, Start, End, ID, Target, strand. ----- -**input format :** +**example :** + +a GFF3 file of the Transposable elements as input file: :: -.. class:: infomark - -2L DmelCaf1_2_2_REPET_TEs match 47519 52563 0.0 + . ID=ms1_dmel_chr2L_RIX-comp_DmelCaf1_2_2-B-P58.20-Map11;Target=RIX-comp_DmelCaf1_2_2-B-P58.20-Map11 6 5050;Identity=99.8 - + 2L DmelCaf1_2_2_REPET_TEs match 47519 52563 0.0 + . ID=ms1_dmel_chr2L_RIX-comp_DmelCaf1_2_2-B-P58.20-Map11;Target=RIX-comp_DmelCaf1_2_2-B-P58.20-Map11 6 5050;Identity=99.8 + 2L DmelCaf1_2_2_REPET_TEs match 16205 16490 0.0 + . ID=ms2_dmel_chr2L_RXX-LARD_DmelCaf1_2_2-L-B578-Map1;Target=RXX-LARD_DmelCaf1_2_2-L-B578-Map1 2551 2828;Identity=97.5 + 2R DmelCaf1_2_2_REPET_TEs match 24134 24428 0.0 - . ID=ms4350_dmel_chr2R_RXX-LARD-chim_DmelCaf1_2_2-L-B414-Map1;Target=RXX-LARD-chim_DmelCaf1_2_2-L-B414-Map1 2894 3273 ----- -**output format :** +output format: :: -2L 47519 52563 ms1_dmel_chr2L_RIX-comp_DmelCaf1_2_2-B-P58.20-Map11 RIX-comp_DmelCaf1_2_2-B-P58.20-Map11 6 5050 + - ------ + 2L 47519 52563 ms1_dmel_chr2L_RIX-comp_DmelCaf1_2_2-B-P58.20-Map11 RIX-comp_DmelCaf1_2_2-B-P58.20-Map11 6 5050 + + 2L 16205 16490 ms2_dmel_chr2L_RXX-LARD_DmelCaf1_2_2-L-B578-Map1 RXX-LARD_DmelCaf1_2_2-L-B578-Map1 2551 2828 + + 2R 24134 24428 ms4350_dmel_chr2R_RXX-LARD-chim_DmelCaf1_2_2-L-B414-Map1 RXX-LARD-chim_DmelCaf1_2_2-L-B414-Map1 2894 3273 - ------ - -**reference :** ]]> </help>