Mercurial > repos > bvalot > fasta_filter
annotate fastq_subsampling.py @ 0:c74e633b40e9 draft default tip
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
author | bvalot |
---|---|
date | Tue, 14 Jun 2022 08:51:44 +0000 |
parents | |
children |
rev | line source |
---|---|
0
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
2 # -*- coding: utf-8 -*- |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
3 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
4 """Return a subsampling fastq file""" |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
5 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
6 import argparse |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
7 import gzip |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
8 import io |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
9 import os |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
10 import random |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
11 import sys |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
12 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
13 import pysam |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
14 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
15 desc = "Subsampling a single or a paired of fastq(.gz) file" |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
16 command = argparse.ArgumentParser( |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
17 prog="fastq_subsampling.py", description=desc, usage="%(prog)s [options] genomeSize" |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
18 ) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
19 command.add_argument("--galaxy", action="store_true", help=argparse.SUPPRESS) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
20 command.add_argument( |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
21 "-d", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
22 "--outdir", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
23 default="./", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
24 type=str, |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
25 nargs="?", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
26 help="Directory to store subsampling fastq file, default: current directory", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
27 ) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
28 command.add_argument( |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
29 "-p", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
30 "--prefix", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
31 default="_sub", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
32 type=str, |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
33 nargs="?", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
34 help="Output fastq file with prefix, default:_sub", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
35 ) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
36 command.add_argument( |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
37 "-c", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
38 "--coverage", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
39 type=int, |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
40 nargs="?", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
41 default=60, |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
42 help="Mean coverage to sampling (default=60)", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
43 ) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
44 command.add_argument( |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
45 "--copy", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
46 action="store_true", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
47 help="Perform a copy of sample without enough coverage (default=no subsampling)", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
48 ) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
49 command.add_argument( |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
50 "-s", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
51 "--sfastq", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
52 nargs="?", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
53 type=argparse.FileType("r"), |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
54 help="Fastq(.gz) file to subsampling (single)", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
55 ) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
56 command.add_argument( |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
57 "-r", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
58 "--rfastq", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
59 nargs="?", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
60 type=argparse.FileType("r"), |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
61 help="Fastq(.gz) file to subsampling in paired (right)", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
62 ) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
63 command.add_argument( |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
64 "-l", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
65 "--lfastq", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
66 nargs="?", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
67 type=argparse.FileType("r"), |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
68 help="Fastq(.gz) file to subsampling in paired (left)", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
69 ) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
70 command.add_argument( |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
71 "genomesize", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
72 type=int, |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
73 help="Size of the genome (bp) for calculation of subsampling", |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
74 ) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
75 command.add_argument("-v", "--version", action="version", version="%(prog)s 0.2.0") |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
76 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
77 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
78 def outname(inname, outdir, append): |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
79 inname = outdir + inname.split("/")[-1] |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
80 if isgzip(inname): |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
81 if inname[-5:] == "fq.gz": |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
82 return inname.rstrip(".fq.gz") + append + ".fastq.gz" |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
83 elif inname[-8:] == "fastq.gz": |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
84 return inname.rstrip(".fastq.gz") + append + ".fastq.gz" |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
85 else: |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
86 return inname + append + ".fastq.gz" |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
87 elif inname[-2:] == "fq": |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
88 return inname.rstrip(".fq") + append + ".fastq.gz" |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
89 elif inname[-5:] == "fastq": |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
90 return inname.rstrip(".fastq") + append + ".fastq.gz" |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
91 else: |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
92 return inname + append + ".fastq.gz" |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
93 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
94 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
95 def isgzip(filename): |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
96 if filename[-2:] == "gz": |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
97 return True |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
98 else: |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
99 return False |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
100 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
101 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
102 def complet_count(fastq): |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
103 count = 0 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
104 total = 0.0 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
105 for read in pysam.FastxFile(fastq): |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
106 count += 1 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
107 total += len(read.sequence) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
108 return count, int(total / count) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
109 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
110 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
111 def make_copy(infile, outdir, prefix): |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
112 output = outname(infile, outdir, prefix) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
113 os.popen(" ".join(["cp", infile, output])) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
114 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
115 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
116 def make_subsampling(infile, outdir, prefix, select): |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
117 output = io.BufferedWriter(gzip.open(outname(infile, outdir, prefix), "wb", 5)) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
118 for i, read in enumerate(pysam.FastxFile(infile)): |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
119 if i in select: |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
120 output.write((str(read) + "\n").encode()) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
121 output.flush() |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
122 output.close() |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
123 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
124 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
125 def make_subsampling_galaxy(infile, number, select): |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
126 output = io.BufferedWriter(gzip.open("./" + str(number) + ".fastq.gz", "wb", 5)) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
127 for i, read in enumerate(pysam.FastxFile(infile)): |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
128 if i in select: |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
129 output.write((str(read) + "\n").encode()) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
130 output.flush() |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
131 output.close() |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
132 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
133 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
134 if __name__ == "__main__": |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
135 """Performed job on execution script""" |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
136 args = command.parse_args() |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
137 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
138 # verify input |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
139 if args.sfastq is None and args.lfastq is None and args.rfastq is None: |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
140 raise Exception("At least single or paired reads must be defined") |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
141 outdir = args.outdir |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
142 if not os.path.isdir(outdir): |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
143 raise Exception(outdir + " is not a valid directory") |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
144 else: |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
145 outdir = outdir.rstrip("/") + "/" |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
146 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
147 # compute count and length |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
148 fastqname = None |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
149 if args.sfastq is not None: |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
150 fastqname = args.sfastq.name |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
151 elif args.rfastq is not None and args.lfastq is not None: |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
152 fastqname = args.rfastq.name |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
153 else: |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
154 raise Exception("You must defined right and left fastq file for paired data") |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
155 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
156 count = None |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
157 length = None |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
158 count, length = complet_count(fastqname) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
159 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
160 # Calculate coverage and report |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
161 coverage = 0 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
162 if args.sfastq is not None: |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
163 coverage = int(float(count) * length / args.genomesize) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
164 print( |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
165 " : ".join([fastqname, str(length) + "bp", str(count), str(coverage) + "X"]) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
166 ) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
167 else: |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
168 coverage = int(float(count) * length * 2 / args.genomesize) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
169 print( |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
170 " : ".join( |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
171 [fastqname, str(length) + "bp*2", str(count), str(coverage) + "X"] |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
172 ) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
173 ) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
174 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
175 # check coverage |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
176 if coverage <= args.coverage: |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
177 if args.copy and args.galaxy is False: |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
178 if args.sfastq is not None: |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
179 make_copy(args.sfastq.name, outdir, args.prefix) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
180 else: |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
181 make_copy(args.rfastq.name, outdir, args.prefix) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
182 make_copy(args.lfastq.name, outdir, args.prefix) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
183 print("Coverage is less than threshold, no subsampling need") |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
184 sys.exit(0) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
185 |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
186 # performed subsampling |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
187 if args.sfastq is not None: |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
188 subread = int((float(args.genomesize) * args.coverage) / length) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
189 select = set(random.sample(range(0, count), subread)) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
190 if args.galaxy: |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
191 make_subsampling_galaxy(args.sfastq.name, 1, select) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
192 else: |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
193 make_subsampling(args.sfastq.name, outdir, args.prefix, select) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
194 print("Subsampling to " + str(subread)) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
195 else: |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
196 subread = int((float(args.genomesize) * args.coverage) / (length * 2)) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
197 select = set(random.sample(range(0, count), subread)) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
198 if args.galaxy: |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
199 make_subsampling_galaxy(args.rfastq.name, 1, select) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
200 make_subsampling_galaxy(args.lfastq.name, 2, select) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
201 else: |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
202 make_subsampling(args.rfastq.name, outdir, args.prefix, select) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
203 make_subsampling(args.lfastq.name, outdir, args.prefix, select) |
c74e633b40e9
planemo upload for repository https://github.com/bvalot/galaxy commit d57c24d4b2c0c741d572af9ca0d09f8b82689640
bvalot
parents:
diff
changeset
|
204 print("Subsampling to " + str(subread)) |