| Next changeset 1:8acb72264319 (2016-03-21) |
|
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sam_to_fastq commit 0651eb8c86d890e4b223fec82ab3980932710030 |
|
added:
sam_to_fastq.py sam_to_fastq.xml test-data/input.sam test-data/output.fastq |
| b |
| diff -r 000000000000 -r 55107d4a728c sam_to_fastq.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sam_to_fastq.py Mon Mar 21 17:33:26 2016 -0400 |
| [ |
| @@ -0,0 +1,33 @@ +#!/usr/bin/python +# +import sys +import argparse + +def Parser(): + the_parser = argparse.ArgumentParser() + the_parser.add_argument( + '--input', action="store", type=str, help="input SAM file") + the_parser.add_argument( + '--output', action="store", type=str, help="output FASTQ file") + args = the_parser.parse_args() + return args + + +def print_fastq_sequence(samline, file): + samfields = samline[:-1].split("\t") + file.write ( '@%s\n%s\n+\n%s' % (samfields[0], samfields[9], samfields[10]) ) + +def main(input, output): + infile = open (input, "r") + outfile = open (output, "w") + with open (input, "r") as infile: + with open (output, "w") as outfile: + for line in infile: + if line[0] == "@": + continue + if line.split("\t")[1] != "4": + print_fastq_sequence (line, outfile) + +if __name__ == "__main__": + args = Parser() + main (args.input, args.output) |
| b |
| diff -r 000000000000 -r 55107d4a728c sam_to_fastq.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sam_to_fastq.xml Mon Mar 21 17:33:26 2016 -0400 |
| b |
| @@ -0,0 +1,28 @@ +<tool id="sam_to_fastq" name="SAM to FASTQ" version="0.1"> + <description></description> + <command interpreter="python">sam_to_fastq.py --input $input --output $output</command> + <inputs> + <param name="input" type="data" format="sam" label="SAM file for extraction of aligned reads in FASTQ format"/> + </inputs> + + <outputs> + <data format="fastqsanger" name="output" label="FASTQ extraction" /> +</outputs> + + <tests> + <test> + <param ftype="sam" name="input" value="input.sam" /> + <output file="output.fastq" name="output" /> + </test> + </tests> + + +<help> + +**What it does** + +Extract sequence and sequence quality of aligned reads in a SAM alignment file and return a FASTQ file containing those reads + +</help> + +</tool> |
| b |
| diff -r 000000000000 -r 55107d4a728c test-data/input.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input.sam Mon Mar 21 17:33:26 2016 -0400 |
| b |
| b'@@ -0,0 +1,201 @@\n+@HD\tVN:1.0\tSO:unsorted\n+@SQ\tSN:chr1\tLN:249250621\n+@SQ\tSN:chr2\tLN:243199373\n+@SQ\tSN:chr3\tLN:198022430\n+@SQ\tSN:chr4\tLN:191154276\n+@SQ\tSN:chr5\tLN:180915260\n+@SQ\tSN:chr6\tLN:171115067\n+@SQ\tSN:chr7\tLN:159138663\n+@SQ\tSN:chr8\tLN:146364022\n+@SQ\tSN:chr9\tLN:141213431\n+@SQ\tSN:chr10\tLN:135534747\n+@SQ\tSN:chr11\tLN:135006516\n+@SQ\tSN:chr12\tLN:133851895\n+@SQ\tSN:chr13\tLN:115169878\n+@SQ\tSN:chr14\tLN:107349540\n+@SQ\tSN:chr15\tLN:102531392\n+@SQ\tSN:chr16\tLN:90354753\n+@SQ\tSN:chr17\tLN:81195210\n+@SQ\tSN:chr18\tLN:78077248\n+@SQ\tSN:chr19\tLN:59128983\n+@SQ\tSN:chr20\tLN:63025520\n+@SQ\tSN:chr21\tLN:48129895\n+@SQ\tSN:chr22\tLN:51304566\n+@SQ\tSN:chrX\tLN:155270560\n+@SQ\tSN:chrY\tLN:59373566\n+@SQ\tSN:chrM\tLN:16571\n+@SQ\tSN:chr1_gl000191_random\tLN:106433\n+@SQ\tSN:chr1_gl000192_random\tLN:547496\n+@SQ\tSN:chr4_gl000193_random\tLN:189789\n+@SQ\tSN:chr4_gl000194_random\tLN:191469\n+@SQ\tSN:chr7_gl000195_random\tLN:182896\n+@SQ\tSN:chr8_gl000196_random\tLN:38914\n+@SQ\tSN:chr8_gl000197_random\tLN:37175\n+@SQ\tSN:chr9_gl000198_random\tLN:90085\n+@SQ\tSN:chr9_gl000199_random\tLN:169874\n+@SQ\tSN:chr9_gl000200_random\tLN:187035\n+@SQ\tSN:chr9_gl000201_random\tLN:36148\n+@SQ\tSN:chr11_gl000202_random\tLN:40103\n+@SQ\tSN:chr17_gl000203_random\tLN:37498\n+@SQ\tSN:chr17_gl000204_random\tLN:81310\n+@SQ\tSN:chr17_gl000205_random\tLN:174588\n+@SQ\tSN:chr17_gl000206_random\tLN:41001\n+@SQ\tSN:chr18_gl000207_random\tLN:4262\n+@SQ\tSN:chr19_gl000208_random\tLN:92689\n+@SQ\tSN:chr19_gl000209_random\tLN:159169\n+@SQ\tSN:chr21_gl000210_random\tLN:27682\n+@SQ\tSN:chrUn_gl000211\tLN:166566\n+@SQ\tSN:chrUn_gl000212\tLN:186858\n+@SQ\tSN:chrUn_gl000213\tLN:164239\n+@SQ\tSN:chrUn_gl000214\tLN:137718\n+@SQ\tSN:chrUn_gl000215\tLN:172545\n+@SQ\tSN:chrUn_gl000216\tLN:172294\n+@SQ\tSN:chrUn_gl000217\tLN:172149\n+@SQ\tSN:chrUn_gl000218\tLN:161147\n+@SQ\tSN:chrUn_gl000219\tLN:179198\n+@SQ\tSN:chrUn_gl000220\tLN:161802\n+@SQ\tSN:chrUn_gl000221\tLN:155397\n+@SQ\tSN:chrUn_gl000222\tLN:186861\n+@SQ\tSN:chrUn_gl000223\tLN:180455\n+@SQ\tSN:chrUn_gl000224\tLN:179693\n+@SQ\tSN:chrUn_gl000225\tLN:211173\n+@SQ\tSN:chrUn_gl000226\tLN:15008\n+@SQ\tSN:chrUn_gl000227\tLN:128374\n+@SQ\tSN:chrUn_gl000228\tLN:129120\n+@SQ\tSN:chrUn_gl000229\tLN:19913\n+@SQ\tSN:chrUn_gl000230\tLN:43691\n+@SQ\tSN:chrUn_gl000231\tLN:27386\n+@SQ\tSN:chrUn_gl000232\tLN:40652\n+@SQ\tSN:chrUn_gl000233\tLN:45941\n+@SQ\tSN:chrUn_gl000234\tLN:40531\n+@SQ\tSN:chrUn_gl000235\tLN:34474\n+@SQ\tSN:chrUn_gl000236\tLN:41934\n+@SQ\tSN:chrUn_gl000237\tLN:45867\n+@SQ\tSN:chrUn_gl000238\tLN:39939\n+@SQ\tSN:chrUn_gl000239\tLN:33824\n+@SQ\tSN:chrUn_gl000240\tLN:41933\n+@SQ\tSN:chrUn_gl000241\tLN:42152\n+@SQ\tSN:chrUn_gl000242\tLN:43523\n+@SQ\tSN:chrUn_gl000243\tLN:43341\n+@SQ\tSN:chrUn_gl000244\tLN:39929\n+@SQ\tSN:chrUn_gl000245\tLN:36651\n+@SQ\tSN:chrUn_gl000246\tLN:38154\n+@SQ\tSN:chrUn_gl000247\tLN:36422\n+@SQ\tSN:chrUn_gl000248\tLN:39786\n+@SQ\tSN:chrUn_gl000249\tLN:38502\n+@PG\tID:bowtie2\tPN:bowtie2\tVN:2.2.6\tCL:"/home/galaxy/tool_dependency/bowtie2/2.2.6/iuc/package_bowtie_2_2_6/0d9cd7487cc9/bin/bowtie2-align-s --wrapper basic-0 -p 8 -x /home/galaxy/galaxy-dist/tool-data/hg19/bowtie2_index/hg19.fa -k 1 --passthrough -U /home/galaxy/galaxy-dist/database/files/000/162/dataset_162397.dat"\n+1/1\t4\t*\t0\t0\t*\t*\t0\t0\tGGGTTCTTGGCATCAGTTGCTGAATGAAGCCTCTCTGATGTAGCTTGGTCTTCTTCAGGGACTCCTAACAGTGGAGTGGAGGCTGTCTATGACTCTTTTAC\tBBCDFFFFHHHHHJJJJJJJJJJIJIJJJJJIIIIJIHIIIIJJJJJJIIIJIJJJIIJJJJJJJJJGIGHGHJHDEHHHFFDDCAAC>@@;ACCDDDDCC\tYT:Z:UU\n+4/2\t4\t*\t0\t0\t*\t*\t0\t0\tCACATCAACATGATTCATTACCATCATTTCATCAATAATCACTTTATTTATCTTGTTTCAACTAAAAATTTCTTTACAATCATTTCCTCTATCACCCTCAT\tCCCFFFFDHHHHDIJJJJJJJJJJIJJJJJJJJJIJJJJJJJJJJJJJJJGIJJIJJJJJJJJJJJJHIIJJJJJJIIIJJJJJJHHHHHHHFFFFFDDED\tYT:Z:UU\n+1/2\t4\t*\t0\t0\t*\t*\t0\t0\tCTGCTGCTTCTATGATTGCCATCTCCTGGAACTCAGACCCCTTCTAGAGTCTTTCTCCATTTAAAAAAGTTGTCTCATATGTATATTACATCCCAACCACA\tCCCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJGJJJJJJJJJJJJJJJIHIIJJJJJJJJJJJIJJJJHIIHHHHHHFFFFEDEFFEEEEEEDDDDDDDD\tYT:Z:UU\n+5/1\t4\t*\t0\t0\t*\t*\t0\t0\tGTTTCTATCTTACTAATTTGTCTTGAGTTTTATTATTTCTTGCCATGTAAACATTTATGTGTTAGTTTTTTTGTGAGTGTGCATGTATACATGCATGTGGG\tCCCFFFFFHHHHHJJIJJJJJIJJJHJHJJJHIJIJJJIJJJJJJJJJJJIJEIJJIIJIIJJJIIJJJJJJJHGHEEHHFFFFFFFFFEEEEECEDACDD\tYT:Z:UU\n+5/2\t4\t*\t0\t0\t'..b'GGCGCGGCGACGGGGATCAGGCTTCCTCGGCCCCGG\tCCCFFFFFHFHHHJJJJJJJHFDDDDDDDDDDDD69BDD05)07<>CCD58BDD-5?BBDDDD&5&0)0&5>-5&&09>&&)8<@C38CCC@>@52<<@99\tYT:Z:UU\n+50/2\t4\t*\t0\t0\t*\t*\t0\t0\tGGGAGCGGCGACGACGGCGGGAGCGGCCTCAAGAGTCTGAGGAGAGCCAGGAGGCTCCCCTACGGGCCCCACCGCCCCCCGACCCCGGGGGCGGAGGGGCG\tCCCFFFFFHHHHHJJJIIJDD@8@BBBB3?CC>><3>A>ACDCBA<ADDDAB<@<BBBDDCB?7BB@B@DBBDDDBDDDD99B@BD9<@BD-9@5<@@D##\tYT:Z:UU\n+48/1\t4\t*\t0\t0\t*\t*\t0\t0\tCCAGCCTGTTGTGATAGGTTTTGAGGGGGTTATGGGTAAGAAGTTAACTGCTTGTTCAATTTTATGTATGACTTACACTGAAGTGAAGGAATGCTGGATAA\tCCCFFFFFHHHHHHJJJJDHIJIGHIJJJDHFHIJJBGEGHGGHIJIJJCHIJJGHHHFHFFFFFFFFEEEEEEDCDDDDDDDCDDEDCDDDDDDDDDDDC\tYT:Z:UU\n+52/2\t4\t*\t0\t0\t*\t*\t0\t0\tCTTAAATGAACATAAATATATAAAGTCTTTTTGTTTGTTTTGTATTTAGTAGAGTTTAAAATCTTGGCTCTTACTGTGGTACAAGCCCAAAATAAGAACAA\t@CCDDFFFGHHHHIJIJIJJJJJJJJJIJJJJJIJJJJJJJIGGIJJJJIFGGIHIJJHIJJIJIGIJJJJJJIJJHIIIJHHGHHFFDFECDEEEDDDD;\tYT:Z:UU\n+51/1\t4\t*\t0\t0\t*\t*\t0\t0\tTGGGCATCGAGGCTTGGCACTCAGATAGGAGGAGGAGGAATGGCTTTCTCCTGTTTTCTCTGGCATCACCCCTGCTGCCAGTCTCCTTTGATCCTGCTGCT\tCCCFFFFFHHHHHJJJJJJJJJJJIJIIJGIJGHJDFHGIIIIJJJJJIJJJIIJJJHHHHHHFFFFFEEEDDDDDDDDDDDDDDDDDDDCDDDDDDDDDD\tYT:Z:UU\n+49/2\t4\t*\t0\t0\t*\t*\t0\t0\tGCATGGTCATGGTATCTCTTCACAGCAATAAAAACCCTACTAATAACTAAGACAATCCAATCTCTTATCTGTCATAAAACCAGCTCAAAATGAATTAAAGG\t@C@FFFEEHHHHHFIJJJJJJJJJJJJJJIJJJIJIJJJJJJJJIJJJJJJJJJHJJJJJJJJJJJJJJJIJJJJJIHHHHHFFFFEDEEEEDDEEDCCDD\tYT:Z:UU\n+53/2\t4\t*\t0\t0\t*\t*\t0\t0\tCCGGAGGATTCAACCCGGCGGCGCGCGTCCGGCCGTGCCGGTGGTCCCGGCGGATCTTTCCCGCTCCCCGTTCCTCCCGACCCCTTCACCCGCGCGTCGTT\t@BCFFFFFHHHGGJJJJJJJJJJHFDDDDDDDDDDDDDDDD@BDBDDDDDBBD@DDDDDDDDDDDDDDDD@BDDDDDDDBDDDD?BD:ACDBBBDDDDDD<\tYT:Z:UU\n+55/1\t4\t*\t0\t0\t*\t*\t0\t0\tGATCCTCTTGCCTCTGGTGCTGCCAGCACCACCACCCAGCAAGAAAACTTTTAAAGACAGTCTCATTAGCTGAGCATGGTCTTGAACTTAACATATAGCTA\tCCCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJIJJJJIJJJJJJJJJJJJJJJJJJJJJIIJJJJJEIHHHHHFCEFFCCEEEEDDDDDDDDDDEEDCCE\tYT:Z:UU\n+56/1\t4\t*\t0\t0\t*\t*\t0\t0\tCTGCGGTTATGAGTACGACCAGGCGTGAGACTAATGCGTTCCGTGGGATTTTCAAGGACTGTCAGGGACGCACCGGACACAACACAAGTGTTGTGCTCTAC\t@CCFFFFFHHHHHIHJJJJJIJJJJIJIIHJJJIJJIJIJJIJFHIJJJHHHHHHFFFFFDEEEEEDDDDDBDDBDBBDDCDBDDDD?>ACDBCDDDDDDC\tYT:Z:UU\n+56/2\t4\t*\t0\t0\t*\t*\t0\t0\tCGCTCAAGCGTGTTACCCATACCTCACCGTTGATGTTAAAGTGATGCATCAACGAGTAGGCAGGCGTGGAGGTTGTGAAGAAGCCTTGGCAGTGATGCCGG\tCCCFFFFFHHHHHJJJJJIJIJJJJJJJJJJJJJJIIEIIIGGGIJIIJIHGIIIJGHIJJJJHHHBDDACD@BD@BDEDDDDDDDCCDDDD<CCCDDDDD\tYT:Z:UU\n+40/1\t4\t*\t0\t0\t*\t*\t0\t0\tCCTCTCTCTCTCTCCCTCTCTCTCTCTCTGCCCTTCTGTGCCTCTACTACCCTCTTAACTCCCCTCTCCGTGCCCTGAATAAACTCTATTCTATACTGTAC\tBBBFFFFFHHHHHJJJJJJJJJJJJJJJIIJIGGHIJJGHGIJJIEIJGIGIJIIJHFHIJIIIJJJJJIAHHFFEDDCEECCEEDDDDEEEEEEDECDDD\tYT:Z:UU\n+57/2\t4\t*\t0\t0\t*\t*\t0\t0\tTGCACCGGCGGCGGGGTGAGGTGTAGGGGGATGGGCGGGACGCTGGGAGGAGCCAATGACCCGTGCGAACGTTCAGGTCGATGGCAGCTGCGACGTGCGTC\t#####################################################################################################\tYT:Z:UU\n+58/1\t4\t*\t0\t0\t*\t*\t0\t0\tAGCAGGAAGTAGCCCAAGAGATACGACGCCCTCATTCCCTTTTCATCATCGGCTTCCCCTTCCCTTTTTCTTTCCTTCTCTTTATAATATAAAAAAGGGAG\t??@DADDDDHHHAGHIBB;FACDB@DHIF@??G>DBBEBFG?C=@=F4==F4ADGHCHH;;3@DC>;;>CCCCC(-(5:A>3:;>>;BCDDE;@><<BBBB\tYT:Z:UU\n+59/1\t4\t*\t0\t0\t*\t*\t0\t0\tGCCCAAGGTACACGCCCCTCAGCTCTGCCCTACATAGAAATGGGGCTTGGACAGGAGGTACTGTCCAGTCTAGGTGCTGGCCAGCGGAAAAAATCCTCAGT\tBCCFFFFFFFHHHJJJJJJJJJJJJIJJJJJJJJJIIJJJJJJJJJJJJJIIIJJIIHAEHFFFFFFECCECCECACDDD?ACB?=B9BDCA<8>CCCA>:\tYT:Z:UU\n+59/2\t4\t*\t0\t0\t*\t*\t0\t0\tGTGTTTATGAATCAAGGCAGGCTAGGTGGAACCTGTTCAAGGCCGCTGGATGCTGAAGGATGGGATGGTCGAGTCACGTGTTCATCGCCTCCCTTTTCTCC\tBBBDFFFFHHHHHJHIJJJJJJJJJJCFHIJJJJIGHGGIJJJJJJJIJHHHIJJJJJJHHHHFFFFDDEDDDDDDDDBB?BDDEEDDDDDDDDDDDDDDC\tYT:Z:UU\n+55/2\t4\t*\t0\t0\t*\t*\t0\t0\tTCTGACCTGTTATTCTCCATATATTTATATTTATATTTCTCCATATATTTATGAATATTATATGAGGTATATAGTGTGGAAGTAAAGGCCTATAATTCCAG\tCCCFFFFFHHHHHJJJJJJJJJJJJJIJIJJJIIGIJJJJJIIJJJJJJJJIJIJJJJJJJJJJIJJFHIJJJJIJIIJJJJJJJIIIJJHHHHFGFFFFB\tYT:Z:UU\n+54/1\t16\tchrUn_gl000220\t158967\t255\t54M3D11M4D36M\t*\t0\t0\tCTAGTAGCTGGTTCCCTCCGAAGTTTCCCTCAGGATAGCTGGCGCTCTCGCAGAGACTCGAAACCCGCAGTTTTATCCGGTAAAGCGAATGATTAGAGGTC\tDDDDDDDDDDDDBDBDDDDDDDDDDDDDDDDEDDDDDDDDDDDDFFHHJJJJJJJJJIHGIHGJJJJJJJJJJJJJJJJJJJJJJJJJHHHHGFFFFFCCC\tAS:i:-58\tXN:i:0\tXM:i:5\tXO:i:2\tXG:i:7\tNM:i:12\tMD:Z:54^CCC3G1A0C0C0C2^GCCA36\tYT:Z:UU\n' |
| b |
| diff -r 000000000000 -r 55107d4a728c test-data/output.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.fastq Mon Mar 21 17:33:26 2016 -0400 |
| b |
| @@ -0,0 +1,20 @@ +@34/1 +GTTTCCCTCAGGATAGCTGGCGCTCTCGCAGAGACTCGAAACCCGCAGTTTTATCCGGTAAAGCGAATGATTAGAGGTCTTGGGGCCGAAACGATCTCAAC ++ +DBDBDDDDDEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD?FFHHHJJJJJJIHJJJJIJJJJIJJJJJJIIJIIJJJHJJIJJJJIJHHHHHFFFFFCCC +@37/2 +GCGCAGTCCGCCCGGAGGATTCAACCCGGCGGCGCGCGTCCGGCCGTGCCGGTGGTCCCGGCGGATCTTTCCCGCTCCCCGTTCCTCCCGCCCCCTTCACC ++ +@@CFFFFFHHHHGGJHIJHIJGHHJJJAFEBEE?@BBDDBD6BDD5:B8B<88@@5@C?@B7<B87;CAAAADD<&&&8590<CCCDCBD&&0590<?CCC +@43/1 +TGGTTCCCTCCGAAGTTTCCCTCAGGATAGCTGGCGCTCTCGCAGAGACTCGAAACCCGCAGTTTTATCCGGTAAAGCGAATGATTAGAGGTCTTGGGGCC ++ +BDBBDDDDDDDDDDDDDDDDDDDEDDDDDBBDDDDDDDDDDFFFFFHHGHHJIHGJJJIJJJJJJJJJJJJJJJJJJJJJJJJJJJJJHHHHHFFFFFCCC +@36/2 +CCGAAGTTTCCCTCAGGATAGCTGGCGCTCTCGCAGAGACTCGAAACCCGCAGTTTTATCCGGTAAAGCGAATGATTAGAGGTCTTGGGGCCGAAACGATC ++ +BCCFFFFFHFFHFJJIJIJJIJJJJJIIGHGIJIIHIIDIJJHIIIJIIJBHDBDFFEEEEDDDBBDDDD@BDDDDEDDCCBACACCCDDDDDDBBDDBDD +@54/1 +CTAGTAGCTGGTTCCCTCCGAAGTTTCCCTCAGGATAGCTGGCGCTCTCGCAGAGACTCGAAACCCGCAGTTTTATCCGGTAAAGCGAATGATTAGAGGTC ++ +DDDDDDDDDDDDBDBDDDDDDDDDDDDDDDDEDDDDDDDDDDDDFFHHJJJJJJJJJIHGIHGJJJJJJJJJJJJJJJJJJJJJJJJJHHHHGFFFFFCCC |