Previous changeset 0:69ea2a13947f (2015-06-21) Next changeset 2:bb0d4cd765c5 (2015-09-29) |
Commit message:
planemo upload for repository https://bitbucket.org/drosofff/gedtools/ commit 1cc2b50091f512593c502176619998f5908fc8e8 |
modified:
BlastParser_and_hits.py BlastParser_and_hits.xml |
added:
test-data/al_sequences.fa test-data/un_sequences.fa |
b |
diff -r 69ea2a13947f -r 1964514aabde BlastParser_and_hits.py --- a/BlastParser_and_hits.py Sun Jun 21 14:31:29 2015 -0400 +++ b/BlastParser_and_hits.py Mon Sep 14 12:18:46 2015 -0400 |
[ |
@@ -14,6 +14,11 @@ the_parser.add_argument('--tabularOutput', action="store", type=str, help="tabular output file of blast analysis") the_parser.add_argument('--flanking', action="store", type=int, help="number of flanking nucleotides added to the hit sequences") the_parser.add_argument('--mode', action="store", choices=["verbose", "short"], type=str, help="reporting (verbose) or not reporting (short) oases contigs") + the_parser.add_argument('--filter_relativeCov', action="store", type=float, default=0, help="filter out relative coverages below the specified ratio (float number)") + the_parser.add_argument('--filter_maxScore', action="store", type=float, default=0, help="filter out maximum BitScore below the specified float number") + the_parser.add_argument('--filter_meanScore', action="store", type=float, default=0, help="filter out maximum BitScore below the specified float number") + the_parser.add_argument('--al_sequences', action="store", type=str, help="sequences that have been blast aligned") + the_parser.add_argument('--un_sequences', action="store", type=str, help="sequences that have not been blast aligned") args = the_parser.parse_args() if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ): the_parser.error('argument(s) missing, call the -h option of the script') @@ -122,12 +127,14 @@ leftCoordinate = 1 return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity) -def outputParsing (F, Fasta, results, Xblastdict, fastadict, mode="verbose"): +def outputParsing (F, Fasta, results, Xblastdict, fastadict, filter_relativeCov=0, filter_maxScore=0, filter_meanScore=0, mode="verbose"): F= open(F, "w") Fasta=open(Fasta, "w") if mode == "verbose": print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n" for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): + if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore: + continue print >> F, "#\n# %s" % subject print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"]) print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"]) @@ -149,6 +156,8 @@ else: print >>F, "# subject\tsubject length\tTotal Subject Coverage\tRelative Subject Coverage\tMaximum Bit Score\tMean Bit Score" for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): + if results[subject]["RelativeSubjectCoverage"]<filter_relativeCov or results[subject]["maxBitScores"]<filter_maxScore or results[subject]["meanBitScores"]<filter_meanScore: + continue line = [] line.append(subject) line.append(results[subject]["subjectLength"]) @@ -164,14 +173,33 @@ F.close() Fasta.close() - +def sort_sequences (fastadict, blastdict, matched_sequences, unmatched_sequences): + '''to output the sequences that matched and did not matched in the blast''' + blasted_transcripts = [] + for subject in blastdict: + for transcript in blastdict[subject]: + blasted_transcripts.append(transcript) + blasted_transcripts = list( set( blasted_transcripts)) + F_matched = open (matched_sequences, "w") + F_unmatched = open (unmatched_sequences, "w") + for transcript in fastadict: + if transcript in blasted_transcripts: + print >> F_matched, ">%s\n%s" % (transcript, insert_newlines(fastadict[transcript]) ) + else: + print >> F_unmatched, ">%s\n%s" % (transcript, insert_newlines(fastadict[transcript]) ) + F_matched.close() + F_unmatched.close() + return def __main__ (): args = Parser() fastadict = getfasta (args.sequences) Xblastdict = getblast (args.blast) + sort_sequences (fastadict, Xblastdict, args.al_sequences, args.un_sequences) results = defaultdict(dict) for subject in Xblastdict: results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) - outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, args.mode) + outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, + filter_relativeCov=args.filter_relativeCov, filter_maxScore=args.filter_maxScore, + filter_meanScore=args.filter_meanScore, mode=args.mode) if __name__=="__main__": __main__() |
b |
diff -r 69ea2a13947f -r 1964514aabde BlastParser_and_hits.xml --- a/BlastParser_and_hits.xml Sun Jun 21 14:31:29 2015 -0400 +++ b/BlastParser_and_hits.xml Mon Sep 14 12:18:46 2015 -0400 |
b |
@@ -1,4 +1,4 @@ -<tool id="BlastParser_and_hits" name="Parse blast output and compile hits" version="2.1.0"> +<tool id="BlastParser_and_hits" name="Parse blast output and compile hits" version="2.3.0"> <description>for virus discovery</description> <requirements></requirements> <command interpreter="python"> @@ -9,6 +9,15 @@ --fastaOutput $fastaOutput --flanking $flanking --mode $mode + ## Additional parameters. + #if $additional_filters.use_filters == "yes": + --filter_relativeCov $additional_filters.filter_relativeCov + --filter_maxScore $additional_filters.filter_maxScore + --filter_meanScore $additional_filters.filter_meanScore + #end if + --al_sequences $al_sequences + --un_sequences $un_sequences + </command> <inputs> <param name="sequences" type="data" format="fasta" label="fasta sequences that have been blasted" /> @@ -18,10 +27,25 @@ <option value="verbose" default="true">verbose</option> <option value="short">do not report oases contigs</option> </param> + <conditional name="additional_filters"> + <param name="use_filters" type="select" label="Use Additional Filters?"> + <option value="no">No</option> + <option value="yes">Yes</option> + </param> + <when value="no"> + </when> + <when value="yes"> + <param name="filter_relativeCov" type="float" value="0" max="1" label="Minimum Relative Subject Coverage" help=""/> + <param name="filter_maxScore" type="float" value="0" label="Minimum maximum BitScore" help=""/> + <param name="filter_meanScore" type="float" value="0" label="Minimum mean BitScore" help=""/> + </when> + </conditional> </inputs> <outputs> <data name="tabularOutput" format="tabular" label="blast analysis, by subjects"/> <data name="fastaOutput" format="fasta" label="hits"/> + <data name="al_sequences" format="fasta" label="Blast aligned sequences"/> + <data name="un_sequences" format="fasta" label="Blast unaligned sequences"/> </outputs> <tests> @@ -29,9 +53,12 @@ <param ftype="fasta" name="sequences" value="input.fa" /> <param ftype="tabular" name="blast" value="blast.tab" /> <param name="flanking" value="5" /> + <param name="use_filters" value="no" /> <param name="mode" value="verbose" /> <output name="tabularOutput" ftype="tabular" file="output.tab" /> <output name="fastaOutput" ftype="fasta" file="output.fa" /> + <output name="al_sequences" ftype="fasta" file="al_sequences.fa" /> + <output name="un_sequences" ftype="fasta" file="un_sequences.fa" /> </test> </tests> @@ -39,7 +66,7 @@ **What it does** -Parse blast outputs for viruses genome assembly. Outputs analysis and hit sequences for further assembly +Parse blast outputs for viruses genome assembly. Outputs analysis and hit sequences for further assembly. Output also the contig sequences which have or not been blast aligned with the indicated cut-off </help> </tool> |
b |
diff -r 69ea2a13947f -r 1964514aabde test-data/al_sequences.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/al_sequences.fa Mon Sep 14 12:18:46 2015 -0400 |
b |
b'@@ -0,0 +1,1228 @@\n+>Locus_81_Transcript_1/1_Confidence_1.000_Length_155\n+GAGGTCCCTGCGCACGTTGTTCGGTGCGTCCTGATAGCTCATTCCCATTGATTAGCGCGA\n+GTTTGCTTGGCATCGTTAAGCTTAGGTAACGATTCATCCACGGAGCCCGGCATAACGCCG\n+TCTTAATCTTCTCCCAGCATGATCCGTGTAACTCG\n+>Locus_10_Transcript_6/8_Confidence_0.333_Length_1121\n+CGTTATCCAGCTGGATTATGTTGTTATTGTGTAGAAAATATAGGAGTCTCGAAGACTCTT\n+TAGAAAGCATACATGTAGTTGCCTCCCTCGTTCTGGATGTACTGAACGGCCTGCTTGCAT\n+ACTGATGTTAAGTGCAGAGCTATGTTTTCTCTTCCTCGCCAAAAAGAAAAACCACTTTTG\n+ACGACTATCTTACTTATCCAGGGTGTTAGTATTGGATTCAATATCCCTGTTGGTTCAAGA\n+GGGACTTTACCTGTTGCAATGTTAATTGCATCAGTCCTATGCCATCCTTTCCTTTCATAT\n+TCCGTCATAGTTTTGTAAGCTTCAGGATACCATTTGGCCAAACATTCTTTTAGACTAGGT\n+ATCTCAATCTTAGCTTTCTTCGCGATTTGATGTTGTCTTATAAATTCGCTGAATATTGGC\n+ATTCCGTTTGGACCAGCCTCATGAGATTGAATCATACGTCTAGGCCAAATGTTATTTTCG\n+CACGTTGGTCCTTCTGTTTTGTAAAACCTGTATGTGGGTACAACCGTCTTCTCCCATACC\n+GGTTTCAACTTCCTTAAATTTTTGATATAATTGCTGGAAATGTGTTTTTGCGGTCCAGGA\n+ATGTCGTCGGCTGCTATTTTCATTGAAAATTCGACTTGTTGATAAGCCGTCTTTTCCTCA\n+TCTGTCATTTGCTTCCACGTCAAGTTGATGTTTTCACTAAACAGTCCAGGAACATCGAAC\n+GTTGGGATGTCAGCAAGTGGTAGCTTGCTAGTAGTTTTCCACCCTTTCCATTCGTATATT\n+CCGAAACCACCCAGTCGTTTAGGCAGGTGCAGCCAGTGATAGCTCTGTCCCGTGTATTTA\n+CTCCACTTAATTTTGTTTGCTTGATGTAACCAATCCAATTGTTTCTTGCTTCTCCTTTCT\n+AATAAATAGATATTATTAGCGGTTGTCTCAACTTGTTGATTTGCAGTCCATGGTTGTGGG\n+TTCCACGGTTTCCTTTGTGTCACGCTAGGGATAGCCCGATTTGTCCAGCCTCTGACGCCT\n+TGAGTAGATATTTCTGTCCTTAAGAATTCGCAACAATTTTGCATAATTCCAAACTTGCTG\n+TTTTCTCCAACGGCGTTGATTGCCTGATAGCTATACCTAAA\n+>Locus_20_Transcript_1/1_Confidence_0.000_Length_191\n+CCCCCCCCACCTTCGCAATCACAGACAAATTAAACGTAAAACAAACGACATGGATGCATC\n+CAACCCTATTATTTCAAGCGATAGGAGCACTGTGAATCAGGTTTATGCGAAGATGGTGAG\n+GAGAAAACGGCAAGATCTTGCCTATCTCCAGGAGTTGGTGAGGACAGCCCATCGAGAGAT\n+TCTCCCCCAAG\n+>Locus_1_Transcript_5/7_Confidence_0.231_Length_163\n+AATAATGAGGCGGAATCGCCGTCGTGGACGTGGCAAGGCAAAGCTAACGGTAAGCGAAGG\n+AAGGGCCCCCGAGGAGGCAAAATCAGCACTCGAAGAACGCCTCCGAAAGCTGGAGCTCAG\n+CCACAGCCTTCCAACAACCGGAAGTGACCCCCCACCCGCTAAA\n+>Locus_15_Transcript_1/1_Confidence_0.000_Length_436\n+AGCCAGAGCATCCTGTCTAGCACCCCTCTGCGTCCCTGGTGCATCTCGGTGTACATTCTT\n+AACTTACCTATTTAATCACCCATGCGAGAACGATACTACGAAGATACTGAACTCAATAAC\n+CTCAGCTACAGTGAACAAGATAAACAGCTAAAACAAATATGTCAAACGAGTACCTTAGAT\n+CAATCCTCATGCCTGAGAGAGGTCCCTCCAGTATACCAGACGACAACGTCCGCCGTCATT\n+GCGTACGACAAGAAACAATCACTGCGAACATTGTGGTCGGATCTTCTGGAAAAGGGGCCT\n+TTGTTCTGTTCCCTAACAATCCTAGCAGCCTTATTGGCGCTCATTTTAAGTATGATGACC\n+AGGGCAAATCTTACAAATACACCCAATCACTCGTTGTCGCCCAACGCCTCAATGAGTCCT\n+ATAATTACGGAAGAAA\n+>Locus_58_Transcript_1/2_Confidence_0.333_Length_476\n+CCGAGTCTCCTCGGAGATTGCCTTTGCAGCGTTTCTTGGTATCGGTTGGTCCAACTCTTC\n+CAGGAATGGGTTCGTGTTCATGAATGGGTTCATGCTCGCGGACCCGGCTGTCCAGTGCGG\n+GAGGGTCTGTTTTGGGGTTCTTCTCTTTACGTCTCCCATATCTTTTATCTTTTTGTCCAG\n+AGATGTCAGGGTCTTTACTTGAACTAATCCTGGTGAGTTCCCAAGTAGTGGAATCCCAAG\n+AGAGTCCGTCAATTGGAGTTTCAGGTTGATGCCAAAAACTTCTCCCACACTGGACCCGAC\n+AACGGATCCTGTGAAAGCTGCAAATGGTGCCTTCCCCAAAACTCCACTCTTTAGTAAGTC\n+GTCTGCTGCATAAATGGCAAGCTGGTGGGACGTTCCCGTTACCTTGTTTGGTGATGTCGA\n+GGATGCGAGTCTTCCTTCTAATAATTGATAGCATGGTAGGAGGATGTAATCGCTTC\n+>Locus_48_Transcript_1/1_Confidence_0.000_Length_372\n+ATTGACGCGGCTCACCTCCGATTATCTCGAAAACCGCCCTAAGTTGTAGTTGTTGGGACG\n+GCTTGTTCAGACGCTCAATCGTTTCCTCGATTGACAATGGGTCAAGGTTGTTGAAAGGCC\n+CGTTCATCAACCGTACGAAAGTGTCGGCGATCCTCGCGATACGGTCGCTTGGTTTCTTGT\n+CGTTGGCGACAAAGGTTACCCTGCGTTCAATTGATTCAGACATTGTCTCCCAGCGTTTAA\n+TCATTGGCATCATCATACAGTCACTCACGATAGGCAAGGTGTATTGGCGTGCGCTCACTT\n+CTGGTACATCTGCGTCACTGGTTACTGGCCAATGGACGCGTGGCATTGTAGGTTTATACA\n+CAGTTGGACTAA\n+>Locus_4_Transcript_4/5_Confidence_0.333_Length_1170\n+GTCTTTTTCTTTCTCATTTGGTTCATGCAATTTTTCACCTGCGCAGGTATTTTTCTGTCT\n+TGTTGTTGATCTTGTTGTTGCCTTTGGTATCTGTCTATTTATTGCTTTAAGTTCTCCGAT\n+TGTTATTCGGGACAATGCGTCTGCGACATGATTATCCCTCCCCTTGAGATATTCTACTGT\n+GAATTCAAACTCCTCCAAGTCTAGTCTCATTCTGGTTAATTTTGAACTGGGGTTTCTCAT\n+TGAAAAGAGATGTGAACGCGATTAAGACTCATGGGAATGGTTTTGGCATCATGAAACCTG\n+ATAGTAGTTGGGAAATTGCTCCACCTCAACCCAAGGAAAAATACCTCAGATATTACGCAA\n+ATGGTGAATTCGTTGATATGAAAAACCTCGTTAACGAGAAACACCCCGTCATCGTTAACG\n+ATTATTGTGAATTTGCCCTGGAACATGAAATGTATCGTATTCTCCAACCTATGGACCCTT\n+CCAATTTTGCACCTCCACGGGAAACGGGAACAA'..b'AGAAAATTTAAGACAGAAGTACTCGATGAGCTTTGGGGAAGTGGTTGGC\n+AGGAACGAAATAAGATGAACCAGTATGAATGGTTGTCTTACTGCTGGGCAAATAATGTCA\n+CTAAGGTCGATACTCAAACCGTGCTTCTATCTTATGACATCAAATGGCAACAACTACCTG\n+CTGATATGAAAATGGCTATTCTCGGCGATTCGCGAGCTGATCTTGAAGCTCAAAAAACTC\n+ACAATAAAGTGATGCATGCATACAACGGTAACCCTTTGTGTCAGGGATTTCAAGAAGTTG\n+AAGCTTCAAAAACCTTCCTCAACATCGCGGAAGAGAGTAATTCAGTTCTGAAACCATATA\n+CTGGACTGGAAGCTGAGAAATACATCACCAACATTGTAGGAGACATGAATCCGAATCAAT\n+CAAGGATCTTCGATCAGGACAGGCTTAGAGGTAACCAATACAATGCCAATGGGGCTGTGG\n+TTCATAATGCTGTATCAACTATTCCGTTTACAAACCTCATTCCTAGGACGATTCGATCTG\n+ATGATGACGTCCTTGAGAAGTCGGCCAACAGATTACAGGTTACAGAGACAAACGTTACGG\n+ATTACTACGTTAATCCGATTGAGCCAACTGAATTATCCAAAACAATAAGTGACCAGATCA\n+AAAACAATCAATCATCTAACTGGCGACGAGATAACACGTCATTGGCTGGTTTCAATAGTT\n+TCGACATTGCGACAGTCAACACTGCACTAATTGCAAGAGGTCTAAGCACTGAATCAATGA\n+CTCTCAAGTTAGAGCTATTGCACGGAATAATGGCTATGCAGGTTGAAGCACCAATGATCA\n+ATTCCAGCACTTATTCGATCGTAGATAATCATACAATCCCGACCGTAACTGACAGGGCCG\n+TCATAGGCATCAATGACTCGCCTGTGTTTGGCGAGGACTGTGGTGGTGATCTTCCTGAAT\n+ATCCTTTCGGCGGCGGAACCGGTACAATTGCCTTTCACCTAACATTGCAAACTGTTCCTG\n+AAGAGAGGAGAGATAAGGCAATCTTCTGCCCTCCTGGTTTGTTGCAAGCAGCCCGAGATG\n+GAGCAGAGGCATTGGCCCTATTTGTTTTGTCGATGTCTGAATGGCCTTTCGGTATTTATA\n+CTGTCACCAAGAGAACAACCGATGAGAAGGGACTGAATCCTGCGGATCAGGTTTACGTGC\n+CGATGGAGACCATAACCCGTGTAGGTGGAGATAGAGTATTGGACGTTGTACTTCCTCGAA\n+GGTATGCGGTTGCAAATCCAACGACTCAAGGAAATGCTAATGCTCTAGCAGTTATACAGC\n+CTCAAGCTGGGCCTTTAGATAACGGTGCGGACGGATTAGCCGCTGGTGAATTATTGGATG\n+TCAATTTCATCGGCGCCGACGGCATTACTGAATATCCATTGACATATTACTTGTATACCT\n+GGGCACTTCGATTTGATATAACGACGATTAGGCAATACATTGGTAGAATGGCAGCGTTAA\n+TTGGAGTGAAACACCAACTATGGGCTAGTCATGAAATCAGAGTAGCTTTGTGTCAAGTTG\n+CACCCAAAATGGTGGTCGGAGTTACAGGTTCGGGAGACCTGCCAAGAGGATCAGCCGCTG\n+CAAGCGAGGTATGTTACTCAAGCTTATTGGAGGTCTCACGCTCTGAAGAAGATTTTCCGC\n+TGCTTGGTCAGGTTCAAGCCGATTTCAGAGTCTTTGAAACCAATACAAGCACATGGAATA\n+AAGTAGTTTTGGGATTGGCAACAGCACCAAACGTGACAAGTGAACAAAACATGCATGTAC\n+CATTCGTTGTTGGCGATCCGAGATCTAACGCGTGGGACCGACTCGAAGCAGTACCAATTG\n+CTGCTGCTTGGCAGATGTACTACCATTCAAGGGGCGTAACTACTGCCGCTTGGAATGATG\n+CGTACACTAACGTAAATAACGTTTGGTTGCAAAAGATGGCCCGTGATAGCTTCTCAACGA\n+CCCAAAGCACTGGGACGATACTGCCTGCCAGATATGGTAAGATAGTCAAAAACCTGATGA\n+GAAACATGTTTGAAAGAGAACCTGCCAAAGTAGTAACAAGCGTGGGAGGCGATGAATATG\n+AGATAACCCATTTTGAGCGCTGGTTACCGGGTAATAGATATGCTTCCGTGTTTGAACAAG\n+ATGAAACTGAAGTTAATCTGTTTCCTCCAACTTTATTACCGGATATTTGGGTTCAATATC\n+CAGCGACTCACACCCCAATCATGTGTGCTTCGTTCCCACCCGTTTTCGGCCAAGACTCAA\n+CACAAGGGTTCGGCAAAGAATCACAACTTATACCCTTTCGAAACGCAAACAACAATCTTG\n+TAGCACCATACGTTGAAGCTTTCGTTGCCAATCAAGCTTATTTCCCGATAGGATCGGGCC\n+CAAACATCAACGACAAGGTTCTGTGGAATAGTAGATTGTGGATGACAAGTGGTTTCGTTC\n+AGTACTTGGATTACGCTGGCAACGCAATCAACGAAGTGGTTCCTGCAGCAGGGCTACCTT\n+TGGGTAGGTCAATCCCATTACTGCCAGGGGAAGTTCAGCCGGTTGGTAACACCAACATGA\n+GCACAAGCTGCGTTCCTCGTTATTCTGTGGACGGTCGCCGAATTTTTACTTATGTCAACA\n+CAGCTCAATCCGTTCCTTTGATACAAGCGTGTAATAGAGCCAATAGATTGGCCAGATCGG\n+CATGGCTATTATTGCATGTCTACATCGAACCAGAGTTGCAGCTATTGAGCGATGAAGTGG\n+TGGACATATTCGACCAACTGACAAGCAAGACTTTTTTAGATGTAGCAAAATCGGCTGCGG\n+ACAGTGCGGAGGGCAACATTCCGGCAACGAAGGTATTGACAGACCTCCAGGCAGTGGATT\n+CAGCAACGCTGCCGAGTACTTTGGATCCATCCACAAATATGCTCCAACCAGCTCCTTTAC\n+TCGGCGAACCTACGACAAATTAACTCGATACATTCATGATGGGGTAAGTCGAAGTAGTCA\n+GAGAGAAATTTCATCATTGTCGGATGATTTAGTTAGATTTAGTACTCTCACAGATTTTAC\n+TGTCATCGATTTTATTACTAAGAAATTAGATTTAGAACGTCCGATTAGTACTCAAGGAGA\n+TTTATTGGCGTTAGAACCTAGATGCAAGGGTGACCTCGCTGTAGCAAGACTTAAGATTAA\n+GGACATAATAGGTAGGATTGATAAGGATATTAGAACGTGGTGTGAAGCAAATTTATGCCA\n+TTTGGATGCAATCTTGGTAACGAACTTGATAATCTGGGGACAAATCTGGGGGTTGGAAAT\n+TCTGAAAGCTTTACATTCAACAGGAATATTAAACGATTTCGATACTTTCGCAACAAAAGG\n+AAGCAAGATTAGTGCATTTGTCAAGCGTTTTCCCTTCGACAAGGATGATGCCAAAGCTAG\n+ATGGGCAGAGATCAACACGCTGACGGGCTATTTGCAGAATGATTTTGGAAACTTCGATTA\n+CGATAAGGAATTTGAAGCTTTAGCTACCGGAGATAGTAACCACCCCGCTTGGTGGGAAGA\n+AGTATTCACTAAGAAAATAAAAGAGTTGATGACACATCAAGAACACAAAAAGTATATCAG\n+TTTTGAAAAATATGTTAAGGAAGGATATTGGATAACATCTGGCAGCAGCAGCATCGGCAA\n+AGTCAATTGGTCATATGACGGAGACTTGGGCAAATTTAAGGCTAGAAAGAACATGTTATT\n+AGATCTATATACGCCGGACGAAATCTACAAAATGGCTGTAGAATGGGACGGGAAACTAGA\n+GAATAGAGTGTTCATCAAAGATGAATTGGCAAAGAGAAGATTGGCAGTGGCAAGCAATAT\n+TGAAGCATATTTGAATCAAGGGTATATATTTTATCTATTTG\n' |
b |
diff -r 69ea2a13947f -r 1964514aabde test-data/un_sequences.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/un_sequences.fa Mon Sep 14 12:18:46 2015 -0400 |
b |
b'@@ -0,0 +1,484 @@\n+>Locus_8_Transcript_1/4_Confidence_0.400_Length_658\n+TTTGCTTTTTTCTTTTATTCTCTGCCTTCCATCTTCACTATGACCATCTCATCATTAAGG\n+CAAATCCGAAGAGGCTCTAAGACAATCGGATCGATTTATGCAAATGATACTCCAATCTTC\n+GAGCGTTTCTTCCTGTGCCTCTCCGCCACCACGAAGAATAGCAACTCGTCATTATGGCAA\n+ACAAAGATCGAAGACCAGTTTTCACCTCAATCTAATGATGTTTGCCTCATTTATCCTGAT\n+CCGCTGTGATCCGCTTGTAATCAATCCCCATCCATCCTCAGCCAGGGGCAAGTTCAAGTT\n+GCCAGCTCTCCGCAGCGATCTCCTCCACGGTTCCTTTATGATCTCGAGTTTCATCCTTTC\n+CACTCCACCTGGCTTGAGTGGGTGGCTCAACATAATGGGGGGATCTGCCGTTATTCGGTT\n+GTATCCCGATCGCAGCCGCAGCTGCAAATAAAATAAGAGTCGACTTCAACACTCGACTTG\n+GCTTAATGCAAAGTGCCGCTGAGGAGAGATGCGTGGGTGGGCCCACATTGGGAAGCCAAC\n+TGGGTCTGCTCTAAGTCGCAGAGTGCAGATAAACACTGCCAAAAAATATGTGACTAAAAT\n+CGAATTTATTTCCTGGGTCGTCGATTAGGAAATAAGAATGAGAGGTAAATGCGCAGTC\n+>Locus_32_Transcript_4/5_Confidence_0.286_Length_242\n+GGTGGTGCAATTTGGAGTCTGGCCGCGCGATTAGCTAAGCGCTGCGACCATATAATCGCA\n+TTGACAACTCCGATTTGTGTGCGGGAGTCGAGGAAGTGCTCCACTTTATGGCAGTCACAA\n+TTTCATGTTTGAATGTTTCGCCTCGATAGGATTCGGAATGGGGATTTTCGAATTGCGATT\n+TGGTCCGCTGGCCTGGCCTGGACTGGCTGGCCGCTTGTCCGATTCTATCGAATGCGTGTC\n+TC\n+>Locus_5_Transcript_3/6_Confidence_0.222_Length_630\n+CGCCTGCATTAAAGCACTCCCACAGATTTACGGTCTCACCTTCCCCCTTCAAAAGATTGC\n+AGGCAACTTCTGCCAAGTGTTCAGCTCCTTTTTAGAGCATCGCAATGGATCTTGGGGCAG\n+CTGAGACTTTTCCGGTATTCCACTCGACAGCAACTTTCATGTTCTCGGCAGATGCGCGGC\n+AAATGATAACATAAAATGCGTTTTCCATTTCGAGGCATAATTTCGTGGCGCGGGCAAAAC\n+CATTTTCCGCATTTTCTCGGCCAATTTTCGTTGCCTCTGCGTGCTGTTCACAACTTGGCG\n+GTGAAAGAGGCACTATGAAAGAATCAAAAGCAGTCAACTCCTGTTCAGCAGCAAACAAAC\n+AGCAAAGAGGCGGGAAAAACAATATTGTATTTTGCGGAAGGGGAAAGGTACGAGGTCCCC\n+CCCGGGCTCCAGAAGTCTGCTGGAGCAATAAAAATAATAATCGAACTGCAGCCGCATTGC\n+GTTGGCCGCAAGGTCATTGCCCATTGGCTCTGATCGATTGGGCCAACAAGGTGGGCAAGA\n+GGGGCGATTCGAGCCAAGAGAAATGAGAGAGGGCAGAAACTGCCGTTGGTTAGCGATGGT\n+AATTTGATAATTGGCAAAACTGTCTGCTCG\n+>Locus_3_Transcript_3/5_Confidence_0.375_Length_130\n+TCCCATCGGCGGGGGATGGGAGGAAGGATGCCGGTGATGCGCCTTACGACACCTTAAGGT\n+GTCATAGACACGGTGAGGTGGCAATTAGGGCGCACCGTGGGGCACCCTTCACCGTCAACC\n+CTTCTTATCC\n+>Locus_8_Transcript_4/4_Confidence_0.200_Length_135\n+GCAACGAATATCTTTAATTACGCTCTATAATCAAATATAGAGAGGGTCTTAAATATTCCC\n+AGAAGAGGCCATTTCAAGTCACTAACAATTTGACTTTGCTTTTTTCTTTTATTCTCTGCC\n+CTCCATCTTCACCAT\n+>Locus_32_Transcript_2/5_Confidence_0.286_Length_124\n+CGCAGGAAAGGTGAAGGTGGTGCAATTTGGAGTCTGGCCGCGCGATTAGCTAAGCGCTGC\n+GACCATATAATCGCATTGACAACTCCGATTTGTGTGCGGGAGTCGAGGAAGTGCTCCACT\n+TTAT\n+>Locus_39_Transcript_1/2_Confidence_0.667_Length_187\n+ATTAAAACAGAGGAGACAGATGGCCCTTGCGAGCGTTGACTGTCTCTGATTTCTGCCCAG\n+TGCTCTGAATGTCAAAGTGAAGAAATTCAAGTAAGCGCGGGTCAACGGCGGGAGTAACTA\n+TGACTCTCTTAAGGTAGCCAAATGCCTCTTCATCTTGGAGACCAGCTGCGGATATTGGTA\n+CGGCCTG\n+>Locus_11_Transcript_2/2_Confidence_0.333_Length_1110\n+GCTTATCTTTCATATGGTCCATAGTAAATACATTAAAAACTAACGACCTTGGGACACGGC\n+CGCTGACACTGCGAAGGATGTCAAAAAAAAGAACCTGATTTTTAATTTCGACTGGACGAC\n+AGTTGAAATAGTGGGAGCGGTTTCCATCCCAGTATTCAAAACGGCGGTTTGTGATGATGT\n+TTACATAATCCTCAAATTTGTGCTCGTATCCTGCTTGTGTGTCGTTCTCAAACCAAAAGC\n+GTATAAAGACAGTGCCGGAACGCTTGAATTTCTTGAAATAACATTCCTGTTCTGAAAGGT\n+ATCCGTCGGTTTCGACAAGCACTACAGGATCAAAAATCATACACCCATATGCGATATTTG\n+CTCGTGAAATATGCATTATTTTTCCAATGTCTGGTGTAGAAATGTCATAGATAGAATGCA\n+AGAACATCAATCGTTCTGAAGTTACTGTACAATATTGAGCCGGTTTACGGCAAATGACTT\n+TCGGGTCTTCATTGAGATGCAGTTTTAGAACTTCCGAATGGTGTTTGGTGAGACCCTCTT\n+GTTTGTATGATCGAAGTTCAAATAAGTAACGACTGTGACGGTAATCGTCGTTAGCATCGA\n+GTAAGGGGTAACATGTATGGGTATATTTCTCTGCTGCATTCAGATGGGTCAAAGGGTTAC\n+CCCCAATATCTTTTAGTACACAATCAAATCCGTTGATTGGTACAGTGCCTCGTTCAATCC\n+TCATTCGATACTTCATGTAACGTTCGCTCAATAAGCGATGTGCACGTGCGAATGCATGAG\n+GTGCACTATCCGTAGCTTTCGAATAATCTAGGTTGAACATGCTGAATGATTTATTCAGTT\n+TGGATTCTTGATTCGATGTGAGATGTTGCTTGATGTATACTTTTTGGAAGTTGCGCTTCG\n+CAAGTATTTCTTGCTTAGCTTTGCAAATTCTTTTGTGTGCATAGGCTTCTTGGAACAATG\n+CATCACTGCCCAGACTCTTTGCAAGTTGAGCTTGTGTGACAGCATGTGTTGATGGAAATT\n+CAGAAGCGACGTTATCAAGAACAAGGTCAATTGCGGCACTATTTTCAATCGAATCAAAAG\n+TGACGTCTGAACGATCAACAGACGTCAAAA\n+>Locus_25_Transcript_2/3_Confidence_0.200_Length_363\n+ATTACTTGAATTTTTCAACACTTCAATACAAGATTCCTGACAACCACATTGTACCTTCTC\n+CACTAAGTGGTTGCAATCCTATGGTAATCGACGATCAACTCTTTAACAAGCACATGAACT\n+GATGGACTAACTAATGGGCAAGCAAAGTGTTTTAGGCAT'..b'GGCATTTTGTTTTGCATTATCCAAGATTCTGTGC\n+TGGCGCAAACAACAAGCAACTCATCTGCCACTTTTCACACCCTTGAAGCGGAGTTTTCAC\n+TTCTCTTTGGGTGAGCTTTAGCCGCAAATGGAGGAGCCGCCTCATAAATCATCGCCCAGG\n+CCACCGGTGGGCGTCGTCCAACGTCCAATGCTAGAAGTGTGCGCAGGCGCAGCTGCTGCT\n+CACCCCCTCCTCCCCTTAATTTCCCCAACTCATTGTCAGCTGCAAAGGTGCCAAAGAAGT\n+GTACAAACTTCGCGACTGATTTGGGGGGATTTGGCCAGCCGTA\n+>Locus_6_Transcript_1/3_Confidence_0.400_Length_598\n+AAATGCGCAGTCTGCTCCAATATCTTATGGTTTGAAACAAAACAAAAGCAGTATACCATC\n+GACTATATATTAAGTAGATAAAACTATTTTCCAGTGAGAGATTGTGCTTCATTAACACTC\n+GCCGCTCGTTCGCCTGAGTTTCCTTTTGTTTGCCGCAGTTTTTGACACCAAACTTGGCGT\n+CACTTCAAAGCCCTGGCGATTAATCTTGAGTGCTGGTGGGGAAGGAGTGGGGCTTCTCCA\n+GTGCCAGATCTTCCAGATCCGGTCGTGTCTGCAATTTGCAGGCATTTTGTTTTGCATTAT\n+CCAAGATTCTGTGCTGGCGCAAACAACAAGCAACTCATCTGCCACTTTTCACACCCTTGA\n+AGCGGAGTTTTCACTTCTCTTTGGGTGAGCTTTAGCCGCAAATGGAGGAGCCGCCTCATA\n+AATCATCGCCCAGGCCACCGGTGGGCGTCGTCCAACGTCCAATGCTAGAAGTGTGCGCAG\n+GCGCAGCTGCTGCTCACCCCCTCCTCCCCTTAATTTCCCCAACTCATTGTCAGCTGCAAA\n+GGTGCCAAAGAAGTGTACAAACTTCGCGACTGATTTGGGGGGATTTGGCCAGCCGTAC\n+>Locus_22_Transcript_1/1_Confidence_0.000_Length_312\n+ATCGAAAAATTTGTCAAATCTGCTAAGAAAAAACTAACAGATGAATGTTTCCCAGTCGAA\n+GCTTGCAACGAACATGAACCTGAATTCGATGAATCAGATTTAGGTACAGGAATAACCTAT\n+TCACCCATTTATGCAGTCGTCAAAGTACAAAAATGTGAACTTCCTGCAACTCCCGTGCCG\n+TTTGATGAACCTGTCGAGAAGGATAAACCAGATACAGAAAGGATAGAGGTTGGTGATATA\n+CGCAAATCTATGGATGAATTCACACGCTACCTTAAATTCACACATGATTCAGAAATTAAC\n+AATATGAAATCA\n+>Locus_37_Transcript_2/3_Confidence_0.400_Length_145\n+TTATTTAGGTCACAAGTGTACTGACAAAGGTATATTGCCAGATGACTCCAAATATGAGGT\n+AATAAAGAACTGCCCCAAACCAGTAAACGCAGACGAAGCTAGACGCTTCGTGGCATTTTG\n+CAATTATTACAGAGGATTTATTAAG\n+>Locus_3_Transcript_1/5_Confidence_0.125_Length_388\n+CCACATAATACAAATAAATTTCAGGCATCGGAAAAATATATAAGGAGATTGCAGCCAGCA\n+CCTTGGGGAACCAGCAAAGATCTCAGCTCGTTATGGCCATCGACATTGTCCGGCATCTGA\n+CAAATTTTTGGATTTTGCTTCACGGCGATCATCTCTCAATCTCAATTTTGGACCCAATCT\n+CGATCGCCATCTTCCCCGGCTGCCAATCCATTTGGTCTGTGGCCAGTAGCTGTGGGGCTT\n+GATTGCGTCGATCGGCCTGTGTGGAATTGGAATTGGAAGTGGAGCTCTAGTTAACAGTTG\n+AGACCTGGACACCGAATGCGTGTCTCCGCCCCCGGGAACTGCAGAAGCAACAACTGCAGC\n+CGCAAACAATTGACAGAGACAAGCGCCT\n+>Locus_13_Transcript_2/3_Confidence_0.600_Length_223\n+CTTCTGAGAAATCAAAGCCGGTGGTCTCGAAAGGCGGAAGAGTGAGGATTTTGGTGGGCG\n+ATAGGTGCCAAGTGTTAAGGGTTGTCGTGGGTTGATAGTCGTTAAACGTCAGCGAGGGTC\n+AGTGGCTTACCAGGGTCTAAGGATATTTGAGATGTGGACTTTTGCTTAGAGAGGAAAATG\n+CAAGTGAAAGAGTCAAGCTATAATTGTAAATGGTGAACTACAA\n+>Locus_23_Transcript_1/1_Confidence_0.000_Length_197\n+AATTGAGATTAAATGACAAAACTCCGGTCTATATCAAAAACTATAGAATGCCAGAAAGTC\n+AAAAACCAGAAATTCAAAGGCAAGTTGACAAATTAATAAAAGATGGCATCGTCGAACCAT\n+CTATTTCAGAATATAATAGCCCTCTTCTCTTGGTACCCAAGAAATCACTGCCTAACTCGG\n+AGGAAAAGAGATGGCGA\n+>Locus_41_Transcript_1/2_Confidence_0.667_Length_155\n+CAACTGGACTCCAATTTCGACTCGGATGCGGCCCAGGTGGCCAGCTGCAGTTGCTGGGCG\n+GCAATAAAACATTTACCACCGAATTAGCCCAGTCGGAGAGTAGTTCAAGTAGTTTAAGTG\n+CAAGACCACTTAAAATTCAGTTACGACTGCTGCCC\n+>Locus_71_Transcript_1/2_Confidence_0.333_Length_170\n+GATTAATGAAAACATCTTTGGCAAATGCTTTCGCAGTCGGACGTCTCGCTACGGTCCAAG\n+AATTTCACCTCTCGCGTCGTAATACTAATGCCCCCAAACTGCTTCTATTAATCATTACCT\n+CTTGATCTGAAAACCAATGAAAGCAGAACAGAGGTCTTATTTCATTATCC\n+>Locus_64_Transcript_1/1_Confidence_0.000_Length_172\n+AGGGATGTTGTTTCCGTAAAGCGCCACGGTTCCTGTGGTGTCTCGTGCGCTCTATTCGGC\n+CCTTGAAAAACCGAGGGAGGCTATTTGAATTTCGTGCCAGGCCGTACCGATATCCGCAGC\n+AGGTCTCCAAGGTGAACAGCCTCTAGTCGATAGAATAATGTAGGTAAGGGAA\n+>Locus_5_Transcript_6/6_Confidence_0.111_Length_262\n+AGGGATTAATCCCTAATCAAAGATCCATTCTTACACTGAAGTTTGGTTAAGATTGAAGCT\n+ATAGCTTGAGCGGAGTTAAGCCATCTTCAAGGAAACTTCTTCTTACGAATCGCAGCGATC\n+CCTAAACCTATTTCGAGTTGACCCCAATTTCCAGACCTCCAATGGTTATTACGGTTAACC\n+AGAGATTACCCAACCGACTAACCGACTAACATTTGCCACTTCATTGCCCTGCCACCTTCC\n+GCCGAACGCCGAACGCCGAACG\n+>Locus_50_Transcript_1/1_Confidence_0.000_Length_132\n+TATAATGGATCATATCCGCGAATATATCACTGACATGACCCATTTTTAAGTTGTTGGTGA\n+ACAAGTCATGTTGAAGAATTTCCGCGCTGACAAAGCAAATAAGCACGAGGAACACAAACA\n+TTTTGATATGAC\n+>Locus_61_Transcript_1/2_Confidence_0.667_Length_164\n+CACCGTCCTGCTGTCTATATCAACCAACGCCTTTCATGGGGTCTCATGAGCGGGAAGTTT\n+GGCACTTTAACCCGACGTTTGGTTCATCCCACAGCGCCAGTTCTGCTTACCAAAAGTGGC\n+CCACTGGGCACATTATATCATAACCTTGAACTTCATATCAGGAA\n' |