Repository 'sam_to_fastq'
hg clone https://toolshed.g2.bx.psu.edu/repos/drosofff/sam_to_fastq

Changeset 0:55107d4a728c (2016-03-21)
Next changeset 1:8acb72264319 (2016-03-21)
Commit message:
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sam_to_fastq commit 0651eb8c86d890e4b223fec82ab3980932710030
added:
sam_to_fastq.py
sam_to_fastq.xml
test-data/input.sam
test-data/output.fastq
b
diff -r 000000000000 -r 55107d4a728c sam_to_fastq.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sam_to_fastq.py Mon Mar 21 17:33:26 2016 -0400
[
@@ -0,0 +1,33 @@
+#!/usr/bin/python
+#
+import sys
+import argparse
+
+def Parser():
+    the_parser = argparse.ArgumentParser()
+    the_parser.add_argument(
+        '--input', action="store", type=str, help="input SAM file")
+    the_parser.add_argument(
+        '--output', action="store", type=str, help="output FASTQ file")
+    args = the_parser.parse_args()
+    return args
+    
+        
+def print_fastq_sequence(samline, file):
+  samfields = samline[:-1].split("\t")
+  file.write ( '@%s\n%s\n+\n%s' % (samfields[0], samfields[9], samfields[10]) )
+
+def main(input, output):
+    infile = open (input, "r")
+    outfile = open (output, "w")
+    with open (input, "r") as infile:
+        with open (output, "w") as outfile:
+            for line in infile:
+                if line[0] == "@":
+                    continue
+                if line.split("\t")[1] != "4":
+                    print_fastq_sequence (line, outfile)
+
+if __name__ == "__main__":
+    args = Parser()
+    main (args.input, args.output)
b
diff -r 000000000000 -r 55107d4a728c sam_to_fastq.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/sam_to_fastq.xml Mon Mar 21 17:33:26 2016 -0400
b
@@ -0,0 +1,28 @@
+<tool id="sam_to_fastq" name="SAM to FASTQ" version="0.1">
+  <description></description>
+  <command interpreter="python">sam_to_fastq.py --input $input --output $output</command>
+  <inputs>
+       <param name="input" type="data" format="sam" label="SAM file for extraction of aligned reads in FASTQ format"/>
+   </inputs>
+
+ <outputs>
+   <data format="fastqsanger" name="output" label="FASTQ extraction" />
+</outputs>
+
+    <tests>
+        <test>
+            <param ftype="sam" name="input" value="input.sam" />
+            <output file="output.fastq" name="output" />
+        </test>
+    </tests>
+
+
+<help>
+
+**What it does**
+
+Extract sequence and sequence quality of aligned reads in a SAM alignment file and return a FASTQ file containing those reads
+
+</help>
+
+</tool>
b
diff -r 000000000000 -r 55107d4a728c test-data/input.sam
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.sam Mon Mar 21 17:33:26 2016 -0400
b
b'@@ -0,0 +1,201 @@\n+@HD\tVN:1.0\tSO:unsorted\n+@SQ\tSN:chr1\tLN:249250621\n+@SQ\tSN:chr2\tLN:243199373\n+@SQ\tSN:chr3\tLN:198022430\n+@SQ\tSN:chr4\tLN:191154276\n+@SQ\tSN:chr5\tLN:180915260\n+@SQ\tSN:chr6\tLN:171115067\n+@SQ\tSN:chr7\tLN:159138663\n+@SQ\tSN:chr8\tLN:146364022\n+@SQ\tSN:chr9\tLN:141213431\n+@SQ\tSN:chr10\tLN:135534747\n+@SQ\tSN:chr11\tLN:135006516\n+@SQ\tSN:chr12\tLN:133851895\n+@SQ\tSN:chr13\tLN:115169878\n+@SQ\tSN:chr14\tLN:107349540\n+@SQ\tSN:chr15\tLN:102531392\n+@SQ\tSN:chr16\tLN:90354753\n+@SQ\tSN:chr17\tLN:81195210\n+@SQ\tSN:chr18\tLN:78077248\n+@SQ\tSN:chr19\tLN:59128983\n+@SQ\tSN:chr20\tLN:63025520\n+@SQ\tSN:chr21\tLN:48129895\n+@SQ\tSN:chr22\tLN:51304566\n+@SQ\tSN:chrX\tLN:155270560\n+@SQ\tSN:chrY\tLN:59373566\n+@SQ\tSN:chrM\tLN:16571\n+@SQ\tSN:chr1_gl000191_random\tLN:106433\n+@SQ\tSN:chr1_gl000192_random\tLN:547496\n+@SQ\tSN:chr4_gl000193_random\tLN:189789\n+@SQ\tSN:chr4_gl000194_random\tLN:191469\n+@SQ\tSN:chr7_gl000195_random\tLN:182896\n+@SQ\tSN:chr8_gl000196_random\tLN:38914\n+@SQ\tSN:chr8_gl000197_random\tLN:37175\n+@SQ\tSN:chr9_gl000198_random\tLN:90085\n+@SQ\tSN:chr9_gl000199_random\tLN:169874\n+@SQ\tSN:chr9_gl000200_random\tLN:187035\n+@SQ\tSN:chr9_gl000201_random\tLN:36148\n+@SQ\tSN:chr11_gl000202_random\tLN:40103\n+@SQ\tSN:chr17_gl000203_random\tLN:37498\n+@SQ\tSN:chr17_gl000204_random\tLN:81310\n+@SQ\tSN:chr17_gl000205_random\tLN:174588\n+@SQ\tSN:chr17_gl000206_random\tLN:41001\n+@SQ\tSN:chr18_gl000207_random\tLN:4262\n+@SQ\tSN:chr19_gl000208_random\tLN:92689\n+@SQ\tSN:chr19_gl000209_random\tLN:159169\n+@SQ\tSN:chr21_gl000210_random\tLN:27682\n+@SQ\tSN:chrUn_gl000211\tLN:166566\n+@SQ\tSN:chrUn_gl000212\tLN:186858\n+@SQ\tSN:chrUn_gl000213\tLN:164239\n+@SQ\tSN:chrUn_gl000214\tLN:137718\n+@SQ\tSN:chrUn_gl000215\tLN:172545\n+@SQ\tSN:chrUn_gl000216\tLN:172294\n+@SQ\tSN:chrUn_gl000217\tLN:172149\n+@SQ\tSN:chrUn_gl000218\tLN:161147\n+@SQ\tSN:chrUn_gl000219\tLN:179198\n+@SQ\tSN:chrUn_gl000220\tLN:161802\n+@SQ\tSN:chrUn_gl000221\tLN:155397\n+@SQ\tSN:chrUn_gl000222\tLN:186861\n+@SQ\tSN:chrUn_gl000223\tLN:180455\n+@SQ\tSN:chrUn_gl000224\tLN:179693\n+@SQ\tSN:chrUn_gl000225\tLN:211173\n+@SQ\tSN:chrUn_gl000226\tLN:15008\n+@SQ\tSN:chrUn_gl000227\tLN:128374\n+@SQ\tSN:chrUn_gl000228\tLN:129120\n+@SQ\tSN:chrUn_gl000229\tLN:19913\n+@SQ\tSN:chrUn_gl000230\tLN:43691\n+@SQ\tSN:chrUn_gl000231\tLN:27386\n+@SQ\tSN:chrUn_gl000232\tLN:40652\n+@SQ\tSN:chrUn_gl000233\tLN:45941\n+@SQ\tSN:chrUn_gl000234\tLN:40531\n+@SQ\tSN:chrUn_gl000235\tLN:34474\n+@SQ\tSN:chrUn_gl000236\tLN:41934\n+@SQ\tSN:chrUn_gl000237\tLN:45867\n+@SQ\tSN:chrUn_gl000238\tLN:39939\n+@SQ\tSN:chrUn_gl000239\tLN:33824\n+@SQ\tSN:chrUn_gl000240\tLN:41933\n+@SQ\tSN:chrUn_gl000241\tLN:42152\n+@SQ\tSN:chrUn_gl000242\tLN:43523\n+@SQ\tSN:chrUn_gl000243\tLN:43341\n+@SQ\tSN:chrUn_gl000244\tLN:39929\n+@SQ\tSN:chrUn_gl000245\tLN:36651\n+@SQ\tSN:chrUn_gl000246\tLN:38154\n+@SQ\tSN:chrUn_gl000247\tLN:36422\n+@SQ\tSN:chrUn_gl000248\tLN:39786\n+@SQ\tSN:chrUn_gl000249\tLN:38502\n+@PG\tID:bowtie2\tPN:bowtie2\tVN:2.2.6\tCL:"/home/galaxy/tool_dependency/bowtie2/2.2.6/iuc/package_bowtie_2_2_6/0d9cd7487cc9/bin/bowtie2-align-s --wrapper basic-0 -p 8 -x /home/galaxy/galaxy-dist/tool-data/hg19/bowtie2_index/hg19.fa -k 1 --passthrough -U /home/galaxy/galaxy-dist/database/files/000/162/dataset_162397.dat"\n+1/1\t4\t*\t0\t0\t*\t*\t0\t0\tGGGTTCTTGGCATCAGTTGCTGAATGAAGCCTCTCTGATGTAGCTTGGTCTTCTTCAGGGACTCCTAACAGTGGAGTGGAGGCTGTCTATGACTCTTTTAC\tBBCDFFFFHHHHHJJJJJJJJJJIJIJJJJJIIIIJIHIIIIJJJJJJIIIJIJJJIIJJJJJJJJJGIGHGHJHDEHHHFFDDCAAC>@@;ACCDDDDCC\tYT:Z:UU\n+4/2\t4\t*\t0\t0\t*\t*\t0\t0\tCACATCAACATGATTCATTACCATCATTTCATCAATAATCACTTTATTTATCTTGTTTCAACTAAAAATTTCTTTACAATCATTTCCTCTATCACCCTCAT\tCCCFFFFDHHHHDIJJJJJJJJJJIJJJJJJJJJIJJJJJJJJJJJJJJJGIJJIJJJJJJJJJJJJHIIJJJJJJIIIJJJJJJHHHHHHHFFFFFDDED\tYT:Z:UU\n+1/2\t4\t*\t0\t0\t*\t*\t0\t0\tCTGCTGCTTCTATGATTGCCATCTCCTGGAACTCAGACCCCTTCTAGAGTCTTTCTCCATTTAAAAAAGTTGTCTCATATGTATATTACATCCCAACCACA\tCCCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJGJJJJJJJJJJJJJJJIHIIJJJJJJJJJJJIJJJJHIIHHHHHHFFFFEDEFFEEEEEEDDDDDDDD\tYT:Z:UU\n+5/1\t4\t*\t0\t0\t*\t*\t0\t0\tGTTTCTATCTTACTAATTTGTCTTGAGTTTTATTATTTCTTGCCATGTAAACATTTATGTGTTAGTTTTTTTGTGAGTGTGCATGTATACATGCATGTGGG\tCCCFFFFFHHHHHJJIJJJJJIJJJHJHJJJHIJIJJJIJJJJJJJJJJJIJEIJJIIJIIJJJIIJJJJJJJHGHEEHHFFFFFFFFFEEEEECEDACDD\tYT:Z:UU\n+5/2\t4\t*\t0\t0\t'..b'GGCGCGGCGACGGGGATCAGGCTTCCTCGGCCCCGG\tCCCFFFFFHFHHHJJJJJJJHFDDDDDDDDDDDD69BDD05)07<>CCD58BDD-5?BBDDDD&5&0)0&5>-5&&09>&&)8<@C38CCC@>@52<<@99\tYT:Z:UU\n+50/2\t4\t*\t0\t0\t*\t*\t0\t0\tGGGAGCGGCGACGACGGCGGGAGCGGCCTCAAGAGTCTGAGGAGAGCCAGGAGGCTCCCCTACGGGCCCCACCGCCCCCCGACCCCGGGGGCGGAGGGGCG\tCCCFFFFFHHHHHJJJIIJDD@8@BBBB3?CC>><3>A>ACDCBA<ADDDAB<@<BBBDDCB?7BB@B@DBBDDDBDDDD99B@BD9<@BD-9@5<@@D##\tYT:Z:UU\n+48/1\t4\t*\t0\t0\t*\t*\t0\t0\tCCAGCCTGTTGTGATAGGTTTTGAGGGGGTTATGGGTAAGAAGTTAACTGCTTGTTCAATTTTATGTATGACTTACACTGAAGTGAAGGAATGCTGGATAA\tCCCFFFFFHHHHHHJJJJDHIJIGHIJJJDHFHIJJBGEGHGGHIJIJJCHIJJGHHHFHFFFFFFFFEEEEEEDCDDDDDDDCDDEDCDDDDDDDDDDDC\tYT:Z:UU\n+52/2\t4\t*\t0\t0\t*\t*\t0\t0\tCTTAAATGAACATAAATATATAAAGTCTTTTTGTTTGTTTTGTATTTAGTAGAGTTTAAAATCTTGGCTCTTACTGTGGTACAAGCCCAAAATAAGAACAA\t@CCDDFFFGHHHHIJIJIJJJJJJJJJIJJJJJIJJJJJJJIGGIJJJJIFGGIHIJJHIJJIJIGIJJJJJJIJJHIIIJHHGHHFFDFECDEEEDDDD;\tYT:Z:UU\n+51/1\t4\t*\t0\t0\t*\t*\t0\t0\tTGGGCATCGAGGCTTGGCACTCAGATAGGAGGAGGAGGAATGGCTTTCTCCTGTTTTCTCTGGCATCACCCCTGCTGCCAGTCTCCTTTGATCCTGCTGCT\tCCCFFFFFHHHHHJJJJJJJJJJJIJIIJGIJGHJDFHGIIIIJJJJJIJJJIIJJJHHHHHHFFFFFEEEDDDDDDDDDDDDDDDDDDDCDDDDDDDDDD\tYT:Z:UU\n+49/2\t4\t*\t0\t0\t*\t*\t0\t0\tGCATGGTCATGGTATCTCTTCACAGCAATAAAAACCCTACTAATAACTAAGACAATCCAATCTCTTATCTGTCATAAAACCAGCTCAAAATGAATTAAAGG\t@C@FFFEEHHHHHFIJJJJJJJJJJJJJJIJJJIJIJJJJJJJJIJJJJJJJJJHJJJJJJJJJJJJJJJIJJJJJIHHHHHFFFFEDEEEEDDEEDCCDD\tYT:Z:UU\n+53/2\t4\t*\t0\t0\t*\t*\t0\t0\tCCGGAGGATTCAACCCGGCGGCGCGCGTCCGGCCGTGCCGGTGGTCCCGGCGGATCTTTCCCGCTCCCCGTTCCTCCCGACCCCTTCACCCGCGCGTCGTT\t@BCFFFFFHHHGGJJJJJJJJJJHFDDDDDDDDDDDDDDDD@BDBDDDDDBBD@DDDDDDDDDDDDDDDD@BDDDDDDDBDDDD?BD:ACDBBBDDDDDD<\tYT:Z:UU\n+55/1\t4\t*\t0\t0\t*\t*\t0\t0\tGATCCTCTTGCCTCTGGTGCTGCCAGCACCACCACCCAGCAAGAAAACTTTTAAAGACAGTCTCATTAGCTGAGCATGGTCTTGAACTTAACATATAGCTA\tCCCFFFFFHHHHHJJJJJJJJJJJJJJJJJJJJIJJJJIJJJJJJJJJJJJJJJJJJJJJIIJJJJJEIHHHHHFCEFFCCEEEEDDDDDDDDDDEEDCCE\tYT:Z:UU\n+56/1\t4\t*\t0\t0\t*\t*\t0\t0\tCTGCGGTTATGAGTACGACCAGGCGTGAGACTAATGCGTTCCGTGGGATTTTCAAGGACTGTCAGGGACGCACCGGACACAACACAAGTGTTGTGCTCTAC\t@CCFFFFFHHHHHIHJJJJJIJJJJIJIIHJJJIJJIJIJJIJFHIJJJHHHHHHFFFFFDEEEEEDDDDDBDDBDBBDDCDBDDDD?>ACDBCDDDDDDC\tYT:Z:UU\n+56/2\t4\t*\t0\t0\t*\t*\t0\t0\tCGCTCAAGCGTGTTACCCATACCTCACCGTTGATGTTAAAGTGATGCATCAACGAGTAGGCAGGCGTGGAGGTTGTGAAGAAGCCTTGGCAGTGATGCCGG\tCCCFFFFFHHHHHJJJJJIJIJJJJJJJJJJJJJJIIEIIIGGGIJIIJIHGIIIJGHIJJJJHHHBDDACD@BD@BDEDDDDDDDCCDDDD<CCCDDDDD\tYT:Z:UU\n+40/1\t4\t*\t0\t0\t*\t*\t0\t0\tCCTCTCTCTCTCTCCCTCTCTCTCTCTCTGCCCTTCTGTGCCTCTACTACCCTCTTAACTCCCCTCTCCGTGCCCTGAATAAACTCTATTCTATACTGTAC\tBBBFFFFFHHHHHJJJJJJJJJJJJJJJIIJIGGHIJJGHGIJJIEIJGIGIJIIJHFHIJIIIJJJJJIAHHFFEDDCEECCEEDDDDEEEEEEDECDDD\tYT:Z:UU\n+57/2\t4\t*\t0\t0\t*\t*\t0\t0\tTGCACCGGCGGCGGGGTGAGGTGTAGGGGGATGGGCGGGACGCTGGGAGGAGCCAATGACCCGTGCGAACGTTCAGGTCGATGGCAGCTGCGACGTGCGTC\t#####################################################################################################\tYT:Z:UU\n+58/1\t4\t*\t0\t0\t*\t*\t0\t0\tAGCAGGAAGTAGCCCAAGAGATACGACGCCCTCATTCCCTTTTCATCATCGGCTTCCCCTTCCCTTTTTCTTTCCTTCTCTTTATAATATAAAAAAGGGAG\t??@DADDDDHHHAGHIBB;FACDB@DHIF@??G>DBBEBFG?C=@=F4==F4ADGHCHH;;3@DC>;;>CCCCC(-(5:A>3:;>>;BCDDE;@><<BBBB\tYT:Z:UU\n+59/1\t4\t*\t0\t0\t*\t*\t0\t0\tGCCCAAGGTACACGCCCCTCAGCTCTGCCCTACATAGAAATGGGGCTTGGACAGGAGGTACTGTCCAGTCTAGGTGCTGGCCAGCGGAAAAAATCCTCAGT\tBCCFFFFFFFHHHJJJJJJJJJJJJIJJJJJJJJJIIJJJJJJJJJJJJJIIIJJIIHAEHFFFFFFECCECCECACDDD?ACB?=B9BDCA<8>CCCA>:\tYT:Z:UU\n+59/2\t4\t*\t0\t0\t*\t*\t0\t0\tGTGTTTATGAATCAAGGCAGGCTAGGTGGAACCTGTTCAAGGCCGCTGGATGCTGAAGGATGGGATGGTCGAGTCACGTGTTCATCGCCTCCCTTTTCTCC\tBBBDFFFFHHHHHJHIJJJJJJJJJJCFHIJJJJIGHGGIJJJJJJJIJHHHIJJJJJJHHHHFFFFDDEDDDDDDDDBB?BDDEEDDDDDDDDDDDDDDC\tYT:Z:UU\n+55/2\t4\t*\t0\t0\t*\t*\t0\t0\tTCTGACCTGTTATTCTCCATATATTTATATTTATATTTCTCCATATATTTATGAATATTATATGAGGTATATAGTGTGGAAGTAAAGGCCTATAATTCCAG\tCCCFFFFFHHHHHJJJJJJJJJJJJJIJIJJJIIGIJJJJJIIJJJJJJJJIJIJJJJJJJJJJIJJFHIJJJJIJIIJJJJJJJIIIJJHHHHFGFFFFB\tYT:Z:UU\n+54/1\t16\tchrUn_gl000220\t158967\t255\t54M3D11M4D36M\t*\t0\t0\tCTAGTAGCTGGTTCCCTCCGAAGTTTCCCTCAGGATAGCTGGCGCTCTCGCAGAGACTCGAAACCCGCAGTTTTATCCGGTAAAGCGAATGATTAGAGGTC\tDDDDDDDDDDDDBDBDDDDDDDDDDDDDDDDEDDDDDDDDDDDDFFHHJJJJJJJJJIHGIHGJJJJJJJJJJJJJJJJJJJJJJJJJHHHHGFFFFFCCC\tAS:i:-58\tXN:i:0\tXM:i:5\tXO:i:2\tXG:i:7\tNM:i:12\tMD:Z:54^CCC3G1A0C0C0C2^GCCA36\tYT:Z:UU\n'
b
diff -r 000000000000 -r 55107d4a728c test-data/output.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.fastq Mon Mar 21 17:33:26 2016 -0400
b
@@ -0,0 +1,20 @@
+@34/1
+GTTTCCCTCAGGATAGCTGGCGCTCTCGCAGAGACTCGAAACCCGCAGTTTTATCCGGTAAAGCGAATGATTAGAGGTCTTGGGGCCGAAACGATCTCAAC
++
+DBDBDDDDDEDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD?FFHHHJJJJJJIHJJJJIJJJJIJJJJJJIIJIIJJJHJJIJJJJIJHHHHHFFFFFCCC
+@37/2
+GCGCAGTCCGCCCGGAGGATTCAACCCGGCGGCGCGCGTCCGGCCGTGCCGGTGGTCCCGGCGGATCTTTCCCGCTCCCCGTTCCTCCCGCCCCCTTCACC
++
+@@CFFFFFHHHHGGJHIJHIJGHHJJJAFEBEE?@BBDDBD6BDD5:B8B<88@@5@C?@B7<B87;CAAAADD<&&&8590<CCCDCBD&&0590<?CCC
+@43/1
+TGGTTCCCTCCGAAGTTTCCCTCAGGATAGCTGGCGCTCTCGCAGAGACTCGAAACCCGCAGTTTTATCCGGTAAAGCGAATGATTAGAGGTCTTGGGGCC
++
+BDBBDDDDDDDDDDDDDDDDDDDEDDDDDBBDDDDDDDDDDFFFFFHHGHHJIHGJJJIJJJJJJJJJJJJJJJJJJJJJJJJJJJJJHHHHHFFFFFCCC
+@36/2
+CCGAAGTTTCCCTCAGGATAGCTGGCGCTCTCGCAGAGACTCGAAACCCGCAGTTTTATCCGGTAAAGCGAATGATTAGAGGTCTTGGGGCCGAAACGATC
++
+BCCFFFFFHFFHFJJIJIJJIJJJJJIIGHGIJIIHIIDIJJHIIIJIIJBHDBDFFEEEEDDDBBDDDD@BDDDDEDDCCBACACCCDDDDDDBBDDBDD
+@54/1
+CTAGTAGCTGGTTCCCTCCGAAGTTTCCCTCAGGATAGCTGGCGCTCTCGCAGAGACTCGAAACCCGCAGTTTTATCCGGTAAAGCGAATGATTAGAGGTC
++
+DDDDDDDDDDDDBDBDDDDDDDDDDDDDDDDEDDDDDDDDDDDDFFHHJJJJJJJJJIHGIHGJJJJJJJJJJJJJJJJJJJJJJJJJHHHHGFFFFFCCC