Mercurial > repos > qfabrepo > metadegalaxy_reheader
comparison reheader.py @ 0:331fd79a9341 draft
"planemo upload for repository https://github.com/QFAB-Bioinformatics/metaDEGalaxy/tree/master/reheader commit 6783cd68521863b34f8e77cbb7ba404700c72313-dirty"
author | qfabrepo |
---|---|
date | Thu, 03 Sep 2020 01:32:31 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:331fd79a9341 |
---|---|
1 #!/usr/bin/env python | |
2 import sys | |
3 from Bio.Seq import Seq | |
4 from Bio import SeqIO | |
5 from Bio.SeqRecord import SeqRecord | |
6 from os.path import basename | |
7 import os | |
8 import re | |
9 import argparse | |
10 | |
11 parser = argparse.ArgumentParser( | |
12 description="reformat the read name (header) by appending the sample name to the read name\n" + | |
13 "Example:\n python reheader.py -n F3D0_R1.fastq -i test-data/F3D0_R1.fastq -o test-data/test -l mylog -d test-data/") | |
14 parser.add_argument("-v","--version",action="version",version="%(prog)s 1.0") | |
15 parser.add_argument("-n","--samplename",dest="samplename",default=False,help="input sample name") | |
16 parser.add_argument("-i","--input",dest="inputfile",default=False,help="input filename in FASTQ format") | |
17 parser.add_argument("-l","--log", dest="logfile",default=False,help="output log file") | |
18 parser.add_argument("-o","--outfile",dest="outputfile",default=False,help="output filename") | |
19 parser.add_argument("-d","--outdir",dest="outputdir",default=False,help="output directory") | |
20 | |
21 | |
22 if(len(sys.argv) == 1): | |
23 parser.print_help(sys.stderr) | |
24 sys.exit() | |
25 | |
26 args = parser.parse_args() | |
27 | |
28 | |
29 filename = args.samplename | |
30 infile = args.inputfile | |
31 str_to_add = os.path.splitext(basename(filename))[0] | |
32 outfile = args.outputfile | |
33 outdir = args.outputdir | |
34 logfile = args.logfile | |
35 | |
36 | |
37 rdict = { | |
38 '_R1': '/1', | |
39 '_R2': '/2', | |
40 '_1': '/1', | |
41 '_2': '/2', | |
42 } | |
43 | |
44 rdict_remove = { | |
45 '_R1': '', | |
46 '_R2': '', | |
47 '_1': '', | |
48 '_2': '', | |
49 } | |
50 | |
51 def makesubs(s): | |
52 for pattern, repl in rdict.items(): | |
53 pat1 = pattern +'_?[A-Za-z0-9]+$' | |
54 pat2 = pattern | |
55 combined_pat = r'|'.join((pat1, pat2)) | |
56 s = re.sub(combined_pat, repl,s) | |
57 return s | |
58 | |
59 def makesubs_remove(s): | |
60 for pattern, repl in rdict_remove.items(): | |
61 pat1 = pattern +'_?[A-Za-z0-9]+$' | |
62 pat2 = pattern | |
63 combined_pat = r'|'.join((pat1, pat2)) | |
64 s = re.sub(combined_pat, repl,s) | |
65 return s | |
66 | |
67 def appendStringToSequenceHeader(inputfile,header_to_add): | |
68 records=[] | |
69 for seq_record in SeqIO.parse(inputfile, "fastq"): | |
70 header =seq_record.id | |
71 header = "{0}".format(header) + "_" +header_to_add | |
72 record = SeqRecord(seq_record.seq,id=header,description="") | |
73 record.letter_annotations["phred_quality"]=seq_record.letter_annotations["phred_quality"] | |
74 records.append(record) | |
75 return records | |
76 | |
77 str_to_search = makesubs_remove(str_to_add) | |
78 str_to_add = makesubs(str_to_add) | |
79 final_records=[] | |
80 outlogfile=open(os.path.join(outdir,logfile),"w") | |
81 | |
82 final_records=appendStringToSequenceHeader(infile,str_to_add) | |
83 outlogfile.write(str_to_search) | |
84 SeqIO.write(final_records, outfile , "fastq") | |
85 outlogfile.close() |