Mercurial > repos > jackcurragh > ribogalaxy_get_chrom_sizes
comparison get_chrom_sizes/calculating_chrom.sizes.py @ 3:cfdf764b9226 draft
Uploaded
author | jackcurragh |
---|---|
date | Thu, 21 Apr 2022 10:39:18 +0000 |
parents | 27f3669eda60 |
children | c6a297d05c8e |
comparison
equal
deleted
inserted
replaced
2:a244b29ce89a | 3:cfdf764b9226 |
---|---|
1 # input a genome file and return a file genome.chrom.sizes to be associated with the custom build (or just have it as an output to be used later in the history. | 1 # input a genome file and return a file genome.chrom.sizes to be associated with the custom build (or just have it as an output to be used later in the history. |
2 # adapted from https://bioexpressblog.wordpress.com/2014/04/15/calculate-length-of-all-sequences-in-an-multi-fasta-file/ | 2 # adapted from https://bioexpressblog.wordpress.com/2014/04/15/calculate-length-of-all-sequences-in-an-multi-fasta-file/ |
3 from sys import argv | 3 from sys import argv |
4 # python calculating_chrom.sizes.py genome_input.fa output.chrom.sizes | 4 # python calculating_chrom.sizes.py genome_input.fa output.chrom.sizes |
5 genome = str(argv[1]) | 5 genome = str(argv[1]) |
6 output = str(argv[2]) | 6 prefix = str(argv[2]) |
7 output = str(argv[3]) | |
7 # genome = 'test-data/test.fasta' | 8 # genome = 'test-data/test.fasta' |
8 # output = "test-data/test_chrom.sizes" | 9 # output = "test-data/test_chrom.sizes" |
9 | 10 |
10 chromSizesoutput = open(output,"w") | 11 chromSizesoutput = open(output,"w") |
11 | 12 |
18 record = [line.strip("\n").split(' ')[0][1:], 0] | 19 record = [line.strip("\n").split(' ')[0][1:], 0] |
19 | 20 |
20 else: | 21 else: |
21 sequence = line.strip('\n') | 22 sequence = line.strip('\n') |
22 record[1] += len(sequence) | 23 record[1] += len(sequence) |
23 | 24 |
25 if record not in records: | |
26 records.append(record) | |
27 | |
24 for seq_record in records: | 28 for seq_record in records: |
25 output_line = '%s\t%i\n' % (seq_record[0], seq_record[1]) | 29 output_line = f"{prefix}{seq_record[0]}\t{seq_record[1]}\n" |
26 chromSizesoutput.write(output_line) | 30 chromSizesoutput.write(output_line) |
27 | 31 |
28 chromSizesoutput.close() | 32 chromSizesoutput.close() |