Mercurial > repos > iuc > microsatbed
comparison find_str.py @ 4:5f8efb080f49 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/microsatbed commit 891fc6321cd94c9a63c880d75989d79521f1a9b6
| author | iuc |
|---|---|
| date | Sat, 14 Sep 2024 12:17:02 +0000 |
| parents | 2b970db61912 |
| children |
comparison
equal
deleted
inserted
replaced
| 3:8c8299e553ec | 4:5f8efb080f49 |
|---|---|
| 5 from pyfastx import Fastx # 0.5.2 | 5 from pyfastx import Fastx # 0.5.2 |
| 6 | 6 |
| 7 """ | 7 """ |
| 8 Allows all STR or those for a subset of motifs to be written to a bed file | 8 Allows all STR or those for a subset of motifs to be written to a bed file |
| 9 Designed to build some of the microsatellite tracks from https://github.com/arangrhie/T2T-Polish/tree/master/pattern for the VGP. | 9 Designed to build some of the microsatellite tracks from https://github.com/arangrhie/T2T-Polish/tree/master/pattern for the VGP. |
| 10 Note that there are only four possible types of dinucleotide repeat, because CA = AC = GT = TG, GA = AG = CT = TC, AT = TA, and GC = CG. | |
| 10 """ | 11 """ |
| 11 | 12 |
| 12 | 13 |
| 13 def getDensity(name, bed, chrlen, winwidth): | 14 def getDensity(name, bed, chrlen, winwidth): |
| 14 """ | 15 """ |
| 20 for b in bed: | 21 for b in bed: |
| 21 nt = b[5] | 22 nt = b[5] |
| 22 bin = int(b[1] / winwidth) | 23 bin = int(b[1] / winwidth) |
| 23 d[bin] += nt | 24 d[bin] += nt |
| 24 bedg = [ | 25 bedg = [ |
| 25 (name, (x * winwidth), ((x + 1) * winwidth) - 1, float(d[x])) | 26 (name, (x * winwidth), ((x + 1) * winwidth), float(d[x])) |
| 26 for x in range(nwin + 1) | 27 for x in range(nwin + 1) |
| 27 if (x + 1) * winwidth <= chrlen | 28 if (x + 1) * winwidth <= chrlen |
| 28 ] | 29 ] |
| 29 return bedg | 30 return bedg |
| 30 | 31 |
| 80 cbed.append(row) | 81 cbed.append(row) |
| 81 elif args.hexa and len(ssr.motif) == 6: | 82 elif args.hexa and len(ssr.motif) == 6: |
| 82 cbed.append(row) | 83 cbed.append(row) |
| 83 if args.bigwig: | 84 if args.bigwig: |
| 84 w = getDensity(name, cbed, chrlen, args.winwidth) | 85 w = getDensity(name, cbed, chrlen, args.winwidth) |
| 85 wig += w | 86 wig.extend(w) |
| 86 bed += cbed | 87 bed.extend(cbed) |
| 87 if args.bigwig: | 88 if args.bigwig: |
| 88 wig.sort() | 89 wig.sort() |
| 89 bedg = ["%s %d %d %.2f" % x for x in wig] | |
| 90 with open("temp.bedg", "w") as bw: | 90 with open("temp.bedg", "w") as bw: |
| 91 bw.write("\n".join(bedg)) | 91 for row in wig: |
| 92 bw.write("%s %d %d %.2f\n" % row) | |
| 92 chroms = ["%s\t%s" % (x, chrlens[x]) for x in chrlens.keys()] | 93 chroms = ["%s\t%s" % (x, chrlens[x]) for x in chrlens.keys()] |
| 93 with open("temp.chromlen", "w") as cl: | 94 with open("temp.chromlen", "w") as cl: |
| 94 cl.write("\n".join(chroms)) | 95 cl.write("\n".join(chroms)) |
| 95 cmd = ["bedGraphToBigWig", "temp.bedg", "temp.chromlen", args.bed] | 96 cmd = ["bedGraphToBigWig", "temp.bedg", "temp.chromlen", args.bed] |
| 96 subprocess.run(cmd) | 97 subprocess.run(cmd) |
| 97 else: | 98 else: |
| 98 bed.sort() | 99 bed.sort() |
| 99 obed = ["%s\t%d\t%d\t%s_%d\t%d" % x for x in bed] | |
| 100 with open(args.bed, "w") as outbed: | 100 with open(args.bed, "w") as outbed: |
| 101 outbed.write("\n".join(obed)) | 101 for row in bed: |
| 102 outbed.write("\n") | 102 outbed.write("%s\t%d\t%d\t%s_%d\t%d\n" % row) |
| 103 | 103 |
| 104 | 104 |
| 105 if __name__ == "__main__": | 105 if __name__ == "__main__": |
| 106 parser = argparse.ArgumentParser() | 106 parser = argparse.ArgumentParser() |
| 107 a = parser.add_argument | 107 a = parser.add_argument |
