Mercurial > repos > yufei-luo > s_mart
comparison commons/core/seq/test/Test_FastaUtils.py @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | 769e306b7933 |
children |
comparison
equal
deleted
inserted
replaced
17:b0e8584489e6 | 18:94ab73e8a190 |
---|---|
1500 | 1500 |
1501 os.remove(inFileName) | 1501 os.remove(inFileName) |
1502 os.remove(fileObs) | 1502 os.remove(fileObs) |
1503 os.remove(fileExp) | 1503 os.remove(fileExp) |
1504 | 1504 |
1505 def test_getNstreches_2(self): | |
1506 fileName = "dummy.fa" | |
1507 with open(fileName, "w") as f: | |
1508 f.write(">seq2\n") | |
1509 f.write("NNNNxxNNnNTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") | |
1510 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") | |
1511 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") | |
1512 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") | |
1513 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") | |
1514 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") | |
1515 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGGTTAGGGTTAGGGTTAGGGTTAGGGT\n") | |
1516 f.write("TAGGGCTAGGGTTAGGGGTTAGGGTTAGGGTTAGGCTTAGGGTTAGGGTTAGGGTTAGGG\n") | |
1517 f.write("\n") | |
1518 f.write("TTAGGGTTAGGGTTAGGGTTAGGAGTTAGGGTGTAGGGTTAGGGTTAGGGTTAGGGTTAG\n") | |
1519 f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAG\n") | |
1520 f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGCTAGGGTTAGGGTTAG\n") | |
1521 f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAG\n") | |
1522 f.write("\n") | |
1523 f.write(">seq1\n") | |
1524 f.write("AAGTTGGACATTGAGGGCTTTCTTCGCCGTGTTTCGTTCTTTTCGACAAACAGCAGTGCT\n") | |
1525 f.write("TTGCGGATCATxxxxxxxxxxxxxxxTTTGTTTGAACAACCGACAATGCGACCAATTTCA\n") | |
1526 f.write("GCGTAGGTTTTACCTTCAGAGATCACGTTTTTAATCAAATTTCTTTTTTCGACGGTACAA\n") | |
1527 f.write("TGCTTTCCGCGACCCATGACTAGAGAATTTTTGGTCTTCGTTTGGAAAAAATTCAATTAA\n") | |
1528 f.write("AACCTTTAATACAACTCCTTNNTTTTCAAAATTTTTCGAAAAAAACCCAAAGCAATCACT\n") | |
1529 f.write("CCTATTAATTTTATTCAGCAAATACGTGTTCAGTGCTATTTTTGTNTACCGCCTCATTTC\n") | |
1530 f.write("\n") | |
1531 f.write("GCGCACTTTTGCAGCAAGTGCCCAAAAACAAAAAGAACCGTTACATTGAGAGACTAAAAA\n") | |
1532 f.write("TTTCTTGCTCAGAGAGCCAACATATGGTACTTATTATTCATGCAATCTGACTTAAAAAAA\n") | |
1533 f.write("TATAAACATTTAATAATTTTTTTTAGGAANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") | |
1534 f.write("NNATCAACTTTCCACCTGCAGTAGTGCTATTATTTTAACCGCAGCTGTATAxx\n") | |
1535 f.write("\n") | |
1536 f.write("") | |
1537 | |
1538 FastaUtils.writeNstreches(fileName, 2) | |
1539 obsFileName = "%s_Nstretches.map" % os.path.splitext(os.path.split(fileName)[1])[0] | |
1540 | |
1541 expFileName = "expNstretches.map" | |
1542 with open(expFileName, "w") as f: | |
1543 f.write("N_stretch\tseq1\t72\t86\n") | |
1544 f.write("N_stretch\tseq1\t261\t262\n") | |
1545 f.write("N_stretch\tseq1\t510\t542\n") | |
1546 f.write("N_stretch\tseq1\t592\t593\n") | |
1547 f.write("N_stretch\tseq2\t1\t10\n") | |
1548 | |
1549 self.assertTrue(FileUtils.are2FilesIdentical(obsFileName, expFileName)) | |
1550 | |
1551 os.remove(obsFileName) | |
1552 os.remove(expFileName) | |
1553 os.remove(fileName) | |
1554 | |
1555 def test_getNstreches_1(self): | |
1556 fileName = "dummy.fa" | |
1557 with open(fileName, "w") as f: | |
1558 f.write(">seq2\n") | |
1559 f.write("NNNNNNNNNNTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") | |
1560 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") | |
1561 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") | |
1562 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") | |
1563 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") | |
1564 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") | |
1565 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGGTTAGGGTTAGGGTTAGGGTTAGGGT\n") | |
1566 f.write("TAGGGCTAGGGTTAGGGGTTAGGGTTAGGGTTAGGCTTAGGGTTAGGGTTAGGGTTAGGG\n") | |
1567 f.write("TTAGGGTTAGGGTTAGGGTTAGGAGTTAGGGTGTAGGGTTAGGGTTAGGGTTAGGGTTAG\n") | |
1568 f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAG\n") | |
1569 f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGCTAGGGTTAGGGTTAG\n") | |
1570 f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAG\n") | |
1571 f.write(">seq1\n") | |
1572 f.write("AAGTTGGACATTGAGGGCTTTCTTCGCCGTGTTTCGTTCTTTTCGACAAACAGCAGTGCT\n") | |
1573 f.write("TTGCGGATCATNNNNNNNNNNNNNNNTTTGTTTGAACAACCGACAATGCGACCAATTTCA\n") | |
1574 f.write("GCGTAGGTTTTACCTTCAGAGATCACGTTTTTAATCAAATTTCTTTTTTCGACGGTACAA\n") | |
1575 f.write("TGCTTTCCGCGACCCATGACTAGAGAATTTTTGGTCTTCGTTTGGAAAAAATTCAATTAA\n") | |
1576 f.write("AACCTTTAATACAACTCCTTNNTTTTCAAAATTTTTCGAAAAAAACCCAAAGCAATCACT\n") | |
1577 f.write("CCTATTAATTTTATTCAGCAAATACGTGTTCAGTGCTATTTTTGTNTACCGCCTCATTTC\n") | |
1578 f.write("GCGCACTTTTGCAGCAAGTGCCCAAAAACAAAAAGAACCGTTACATTGAGAGACTAAAAA\n") | |
1579 f.write("TTTCTTGCTCAGAGAGCCAACATATGGTACTTATTATTCATGCAATCTGACTTAAAAAAA\n") | |
1580 f.write("TATAAACATTTAATAATTTTTTTTAGGAANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") | |
1581 f.write("NNATCAACTTTCCACCTGCAGTAGTGCTATTATTTTAACCGCAGCTGTATAx\n") | |
1582 | |
1583 FastaUtils.writeNstreches(fileName, 1) | |
1584 obsFileName = "%s_Nstretches.map" % os.path.splitext(os.path.split(fileName)[1])[0] | |
1585 | |
1586 expFileName = "expNstretches.map" | |
1587 with open(expFileName, "w") as f: | |
1588 f.write("N_stretch\tseq1\t72\t86\n") | |
1589 f.write("N_stretch\tseq1\t261\t262\n") | |
1590 f.write("N_stretch\tseq1\t346\t346\n") | |
1591 f.write("N_stretch\tseq1\t510\t542\n") | |
1592 f.write("N_stretch\tseq1\t592\t592\n") | |
1593 f.write("N_stretch\tseq2\t1\t10\n") | |
1594 | |
1595 self.assertTrue(FileUtils.are2FilesIdentical(obsFileName, expFileName)) | |
1596 | |
1597 os.remove(obsFileName) | |
1598 os.remove(expFileName) | |
1599 os.remove(fileName) | |
1600 | |
1601 def test_getNstreches_0(self): | |
1602 fileName = "dummy.fa" | |
1603 with open(fileName, "w") as f: | |
1604 f.write(">seq2\n") | |
1605 f.write("NNNNNNNNNNTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") | |
1606 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") | |
1607 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") | |
1608 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") | |
1609 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") | |
1610 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") | |
1611 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGGTTAGGGTTAGGGTTAGGGTTAGGGT\n") | |
1612 f.write("TAGGGCTAGGGTTAGGGGTTAGGGTTAGGGTTAGGCTTAGGGTTAGGGTTAGGGTTAGGG\n") | |
1613 f.write("TTAGGGTTAGGGTTAGGGTTAGGAGTTAGGGTGTAGGGTTAGGGTTAGGGTTAGGGTTAG\n") | |
1614 f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAG\n") | |
1615 f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGCTAGGGTTAGGGTTAG\n") | |
1616 f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAG\n") | |
1617 f.write(">seq1\n") | |
1618 f.write("AAGTTGGACATTGAGGGCTTTCTTCGCCGTGTTTCGTTCTTTTCGACAAACAGCAGTGCT\n") | |
1619 f.write("TTGCGGATCATNNNNNNNNNNNNNNNTTTGTTTGAACAACCGACAATGCGACCAATTTCA\n") | |
1620 f.write("GCGTAGGTTTTACCTTCAGAGATCACGTTTTTAATCAAATTTCTTTTTTCGACGGTACAA\n") | |
1621 f.write("TGCTTTCCGCGACCCATGACTAGAGAATTTTTGGTCTTCGTTTGGAAAAAATTCAATTAA\n") | |
1622 f.write("AACCTTTAATACAACTCCTTNNTTTTCAAAATTTTTCGAAAAAAACCCAAAGCAATCACT\n") | |
1623 f.write("CCTATTAATTTTATTCAGCAAATACGTGTTCAGTGCTATTTTTGTNTACCGCCTCATTTC\n") | |
1624 f.write("GCGCACTTTTGCAGCAAGTGCCCAAAAACAAAAAGAACCGTTACATTGAGAGACTAAAAA\n") | |
1625 f.write("TTTCTTGCTCAGAGAGCCAACATATGGTACTTATTATTCATGCAATCTGACTTAAAAAAA\n") | |
1626 f.write("TATAAACATTTAATAATTTTTTTTAGGAANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") | |
1627 f.write("NNATCAACTTTCCACCTGCAGTAGTGCTATTATTTTAACCGCAGCTGTATANN\n") | |
1628 | |
1629 FastaUtils.writeNstreches(fileName, 0) | |
1630 obsFileName = "%s_Nstretches.map" % os.path.splitext(os.path.split(fileName)[1])[0] | |
1631 | |
1632 expFileName = "expNstretches.map" | |
1633 with open(expFileName, "w") as f: | |
1634 pass | |
1635 | |
1636 self.assertTrue(FileUtils.are2FilesIdentical(obsFileName, expFileName)) | |
1637 | |
1638 os.remove(obsFileName) | |
1639 os.remove(expFileName) | |
1640 os.remove(fileName) | |
1641 | |
1642 def test_getNstreches_2_GFF(self): | |
1643 fileName = "dummy.fa" | |
1644 with open(fileName, "w") as f: | |
1645 f.write(">seq2\n") | |
1646 f.write("NNNNxxNNnNTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") | |
1647 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") | |
1648 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") | |
1649 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") | |
1650 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") | |
1651 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n") | |
1652 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGGTTAGGGTTAGGGTTAGGGTTAGGGT\n") | |
1653 f.write("TAGGGCTAGGGTTAGGGGTTAGGGTTAGGGTTAGGCTTAGGGTTAGGGTTAGGGTTAGGG\n") | |
1654 f.write("\n") | |
1655 f.write("TTAGGGTTAGGGTTAGGGTTAGGAGTTAGGGTGTAGGGTTAGGGTTAGGGTTAGGGTTAG\n") | |
1656 f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAG\n") | |
1657 f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGCTAGGGTTAGGGTTAG\n") | |
1658 f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAG\n") | |
1659 f.write("\n") | |
1660 f.write(">seq1\n") | |
1661 f.write("AAGTTGGACATTGAGGGCTTTCTTCGCCGTGTTTCGTTCTTTTCGACAAACAGCAGTGCT\n") | |
1662 f.write("TTGCGGATCATxxxxxxxxxxxxxxxTTTGTTTGAACAACCGACAATGCGACCAATTTCA\n") | |
1663 f.write("GCGTAGGTTTTACCTTCAGAGATCACGTTTTTAATCAAATTTCTTTTTTCGACGGTACAA\n") | |
1664 f.write("TGCTTTCCGCGACCCATGACTAGAGAATTTTTGGTCTTCGTTTGGAAAAAATTCAATTAA\n") | |
1665 f.write("AACCTTTAATACAACTCCTTNNTTTTCAAAATTTTTCGAAAAAAACCCAAAGCAATCACT\n") | |
1666 f.write("CCTATTAATTTTATTCAGCAAATACGTGTTCAGTGCTATTTTTGTNTACCGCCTCATTTC\n") | |
1667 f.write("\n") | |
1668 f.write("GCGCACTTTTGCAGCAAGTGCCCAAAAACAAAAAGAACCGTTACATTGAGAGACTAAAAA\n") | |
1669 f.write("TTTCTTGCTCAGAGAGCCAACATATGGTACTTATTATTCATGCAATCTGACTTAAAAAAA\n") | |
1670 f.write("TATAAACATTTAATAATTTTTTTTAGGAANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n") | |
1671 f.write("NNATCAACTTTCCACCTGCAGTAGTGCTATTATTTTAACCGCAGCTGTATAxx\n") | |
1672 f.write("\n") | |
1673 f.write("") | |
1674 | |
1675 FastaUtils.writeNstreches(fileName, 2, outFormat = "gff") | |
1676 obsFileName = "%s_Nstretches.gff3" % os.path.splitext(os.path.split(fileName)[1])[0] | |
1677 | |
1678 expFileName = "expNstretches.gff3" | |
1679 with open(expFileName, "w") as f: | |
1680 f.write("##gff-version 3\n") | |
1681 f.write("seq1\tFastaUtils\tN_stretch\t72\t86\t.\t.\t.\tName=N_stretch_72-86\n") | |
1682 f.write("seq1\tFastaUtils\tN_stretch\t261\t262\t.\t.\t.\tName=N_stretch_261-262\n") | |
1683 f.write("seq1\tFastaUtils\tN_stretch\t510\t542\t.\t.\t.\tName=N_stretch_510-542\n") | |
1684 f.write("seq1\tFastaUtils\tN_stretch\t592\t593\t.\t.\t.\tName=N_stretch_592-593\n") | |
1685 f.write("seq2\tFastaUtils\tN_stretch\t1\t10\t.\t.\t.\tName=N_stretch_1-10\n") | |
1686 | |
1687 self.assertTrue(FileUtils.are2FilesIdentical(obsFileName, expFileName)) | |
1688 | |
1689 os.remove(obsFileName) | |
1690 os.remove(expFileName) | |
1691 os.remove(fileName) | |
1692 | |
1505 if __name__ == "__main__": | 1693 if __name__ == "__main__": |
1506 unittest.main() | 1694 unittest.main() |