comparison commons/core/seq/test/Test_FastaUtils.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents 769e306b7933
children
comparison
equal deleted inserted replaced
17:b0e8584489e6 18:94ab73e8a190
1500 1500
1501 os.remove(inFileName) 1501 os.remove(inFileName)
1502 os.remove(fileObs) 1502 os.remove(fileObs)
1503 os.remove(fileExp) 1503 os.remove(fileExp)
1504 1504
1505 def test_getNstreches_2(self):
1506 fileName = "dummy.fa"
1507 with open(fileName, "w") as f:
1508 f.write(">seq2\n")
1509 f.write("NNNNxxNNnNTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
1510 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
1511 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
1512 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
1513 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
1514 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
1515 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGGTTAGGGTTAGGGTTAGGGTTAGGGT\n")
1516 f.write("TAGGGCTAGGGTTAGGGGTTAGGGTTAGGGTTAGGCTTAGGGTTAGGGTTAGGGTTAGGG\n")
1517 f.write("\n")
1518 f.write("TTAGGGTTAGGGTTAGGGTTAGGAGTTAGGGTGTAGGGTTAGGGTTAGGGTTAGGGTTAG\n")
1519 f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAG\n")
1520 f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGCTAGGGTTAGGGTTAG\n")
1521 f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAG\n")
1522 f.write("\n")
1523 f.write(">seq1\n")
1524 f.write("AAGTTGGACATTGAGGGCTTTCTTCGCCGTGTTTCGTTCTTTTCGACAAACAGCAGTGCT\n")
1525 f.write("TTGCGGATCATxxxxxxxxxxxxxxxTTTGTTTGAACAACCGACAATGCGACCAATTTCA\n")
1526 f.write("GCGTAGGTTTTACCTTCAGAGATCACGTTTTTAATCAAATTTCTTTTTTCGACGGTACAA\n")
1527 f.write("TGCTTTCCGCGACCCATGACTAGAGAATTTTTGGTCTTCGTTTGGAAAAAATTCAATTAA\n")
1528 f.write("AACCTTTAATACAACTCCTTNNTTTTCAAAATTTTTCGAAAAAAACCCAAAGCAATCACT\n")
1529 f.write("CCTATTAATTTTATTCAGCAAATACGTGTTCAGTGCTATTTTTGTNTACCGCCTCATTTC\n")
1530 f.write("\n")
1531 f.write("GCGCACTTTTGCAGCAAGTGCCCAAAAACAAAAAGAACCGTTACATTGAGAGACTAAAAA\n")
1532 f.write("TTTCTTGCTCAGAGAGCCAACATATGGTACTTATTATTCATGCAATCTGACTTAAAAAAA\n")
1533 f.write("TATAAACATTTAATAATTTTTTTTAGGAANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
1534 f.write("NNATCAACTTTCCACCTGCAGTAGTGCTATTATTTTAACCGCAGCTGTATAxx\n")
1535 f.write("\n")
1536 f.write("")
1537
1538 FastaUtils.writeNstreches(fileName, 2)
1539 obsFileName = "%s_Nstretches.map" % os.path.splitext(os.path.split(fileName)[1])[0]
1540
1541 expFileName = "expNstretches.map"
1542 with open(expFileName, "w") as f:
1543 f.write("N_stretch\tseq1\t72\t86\n")
1544 f.write("N_stretch\tseq1\t261\t262\n")
1545 f.write("N_stretch\tseq1\t510\t542\n")
1546 f.write("N_stretch\tseq1\t592\t593\n")
1547 f.write("N_stretch\tseq2\t1\t10\n")
1548
1549 self.assertTrue(FileUtils.are2FilesIdentical(obsFileName, expFileName))
1550
1551 os.remove(obsFileName)
1552 os.remove(expFileName)
1553 os.remove(fileName)
1554
1555 def test_getNstreches_1(self):
1556 fileName = "dummy.fa"
1557 with open(fileName, "w") as f:
1558 f.write(">seq2\n")
1559 f.write("NNNNNNNNNNTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
1560 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
1561 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
1562 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
1563 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
1564 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
1565 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGGTTAGGGTTAGGGTTAGGGTTAGGGT\n")
1566 f.write("TAGGGCTAGGGTTAGGGGTTAGGGTTAGGGTTAGGCTTAGGGTTAGGGTTAGGGTTAGGG\n")
1567 f.write("TTAGGGTTAGGGTTAGGGTTAGGAGTTAGGGTGTAGGGTTAGGGTTAGGGTTAGGGTTAG\n")
1568 f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAG\n")
1569 f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGCTAGGGTTAGGGTTAG\n")
1570 f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAG\n")
1571 f.write(">seq1\n")
1572 f.write("AAGTTGGACATTGAGGGCTTTCTTCGCCGTGTTTCGTTCTTTTCGACAAACAGCAGTGCT\n")
1573 f.write("TTGCGGATCATNNNNNNNNNNNNNNNTTTGTTTGAACAACCGACAATGCGACCAATTTCA\n")
1574 f.write("GCGTAGGTTTTACCTTCAGAGATCACGTTTTTAATCAAATTTCTTTTTTCGACGGTACAA\n")
1575 f.write("TGCTTTCCGCGACCCATGACTAGAGAATTTTTGGTCTTCGTTTGGAAAAAATTCAATTAA\n")
1576 f.write("AACCTTTAATACAACTCCTTNNTTTTCAAAATTTTTCGAAAAAAACCCAAAGCAATCACT\n")
1577 f.write("CCTATTAATTTTATTCAGCAAATACGTGTTCAGTGCTATTTTTGTNTACCGCCTCATTTC\n")
1578 f.write("GCGCACTTTTGCAGCAAGTGCCCAAAAACAAAAAGAACCGTTACATTGAGAGACTAAAAA\n")
1579 f.write("TTTCTTGCTCAGAGAGCCAACATATGGTACTTATTATTCATGCAATCTGACTTAAAAAAA\n")
1580 f.write("TATAAACATTTAATAATTTTTTTTAGGAANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
1581 f.write("NNATCAACTTTCCACCTGCAGTAGTGCTATTATTTTAACCGCAGCTGTATAx\n")
1582
1583 FastaUtils.writeNstreches(fileName, 1)
1584 obsFileName = "%s_Nstretches.map" % os.path.splitext(os.path.split(fileName)[1])[0]
1585
1586 expFileName = "expNstretches.map"
1587 with open(expFileName, "w") as f:
1588 f.write("N_stretch\tseq1\t72\t86\n")
1589 f.write("N_stretch\tseq1\t261\t262\n")
1590 f.write("N_stretch\tseq1\t346\t346\n")
1591 f.write("N_stretch\tseq1\t510\t542\n")
1592 f.write("N_stretch\tseq1\t592\t592\n")
1593 f.write("N_stretch\tseq2\t1\t10\n")
1594
1595 self.assertTrue(FileUtils.are2FilesIdentical(obsFileName, expFileName))
1596
1597 os.remove(obsFileName)
1598 os.remove(expFileName)
1599 os.remove(fileName)
1600
1601 def test_getNstreches_0(self):
1602 fileName = "dummy.fa"
1603 with open(fileName, "w") as f:
1604 f.write(">seq2\n")
1605 f.write("NNNNNNNNNNTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
1606 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
1607 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
1608 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
1609 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
1610 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
1611 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGGTTAGGGTTAGGGTTAGGGTTAGGGT\n")
1612 f.write("TAGGGCTAGGGTTAGGGGTTAGGGTTAGGGTTAGGCTTAGGGTTAGGGTTAGGGTTAGGG\n")
1613 f.write("TTAGGGTTAGGGTTAGGGTTAGGAGTTAGGGTGTAGGGTTAGGGTTAGGGTTAGGGTTAG\n")
1614 f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAG\n")
1615 f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGCTAGGGTTAGGGTTAG\n")
1616 f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAG\n")
1617 f.write(">seq1\n")
1618 f.write("AAGTTGGACATTGAGGGCTTTCTTCGCCGTGTTTCGTTCTTTTCGACAAACAGCAGTGCT\n")
1619 f.write("TTGCGGATCATNNNNNNNNNNNNNNNTTTGTTTGAACAACCGACAATGCGACCAATTTCA\n")
1620 f.write("GCGTAGGTTTTACCTTCAGAGATCACGTTTTTAATCAAATTTCTTTTTTCGACGGTACAA\n")
1621 f.write("TGCTTTCCGCGACCCATGACTAGAGAATTTTTGGTCTTCGTTTGGAAAAAATTCAATTAA\n")
1622 f.write("AACCTTTAATACAACTCCTTNNTTTTCAAAATTTTTCGAAAAAAACCCAAAGCAATCACT\n")
1623 f.write("CCTATTAATTTTATTCAGCAAATACGTGTTCAGTGCTATTTTTGTNTACCGCCTCATTTC\n")
1624 f.write("GCGCACTTTTGCAGCAAGTGCCCAAAAACAAAAAGAACCGTTACATTGAGAGACTAAAAA\n")
1625 f.write("TTTCTTGCTCAGAGAGCCAACATATGGTACTTATTATTCATGCAATCTGACTTAAAAAAA\n")
1626 f.write("TATAAACATTTAATAATTTTTTTTAGGAANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
1627 f.write("NNATCAACTTTCCACCTGCAGTAGTGCTATTATTTTAACCGCAGCTGTATANN\n")
1628
1629 FastaUtils.writeNstreches(fileName, 0)
1630 obsFileName = "%s_Nstretches.map" % os.path.splitext(os.path.split(fileName)[1])[0]
1631
1632 expFileName = "expNstretches.map"
1633 with open(expFileName, "w") as f:
1634 pass
1635
1636 self.assertTrue(FileUtils.are2FilesIdentical(obsFileName, expFileName))
1637
1638 os.remove(obsFileName)
1639 os.remove(expFileName)
1640 os.remove(fileName)
1641
1642 def test_getNstreches_2_GFF(self):
1643 fileName = "dummy.fa"
1644 with open(fileName, "w") as f:
1645 f.write(">seq2\n")
1646 f.write("NNNNxxNNnNTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
1647 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
1648 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
1649 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
1650 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
1651 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTT\n")
1652 f.write("AGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGGTTAGGGTTAGGGTTAGGGTTAGGGT\n")
1653 f.write("TAGGGCTAGGGTTAGGGGTTAGGGTTAGGGTTAGGCTTAGGGTTAGGGTTAGGGTTAGGG\n")
1654 f.write("\n")
1655 f.write("TTAGGGTTAGGGTTAGGGTTAGGAGTTAGGGTGTAGGGTTAGGGTTAGGGTTAGGGTTAG\n")
1656 f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAG\n")
1657 f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGCTAGGGTTAGGGTTAG\n")
1658 f.write("GGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAG\n")
1659 f.write("\n")
1660 f.write(">seq1\n")
1661 f.write("AAGTTGGACATTGAGGGCTTTCTTCGCCGTGTTTCGTTCTTTTCGACAAACAGCAGTGCT\n")
1662 f.write("TTGCGGATCATxxxxxxxxxxxxxxxTTTGTTTGAACAACCGACAATGCGACCAATTTCA\n")
1663 f.write("GCGTAGGTTTTACCTTCAGAGATCACGTTTTTAATCAAATTTCTTTTTTCGACGGTACAA\n")
1664 f.write("TGCTTTCCGCGACCCATGACTAGAGAATTTTTGGTCTTCGTTTGGAAAAAATTCAATTAA\n")
1665 f.write("AACCTTTAATACAACTCCTTNNTTTTCAAAATTTTTCGAAAAAAACCCAAAGCAATCACT\n")
1666 f.write("CCTATTAATTTTATTCAGCAAATACGTGTTCAGTGCTATTTTTGTNTACCGCCTCATTTC\n")
1667 f.write("\n")
1668 f.write("GCGCACTTTTGCAGCAAGTGCCCAAAAACAAAAAGAACCGTTACATTGAGAGACTAAAAA\n")
1669 f.write("TTTCTTGCTCAGAGAGCCAACATATGGTACTTATTATTCATGCAATCTGACTTAAAAAAA\n")
1670 f.write("TATAAACATTTAATAATTTTTTTTAGGAANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n")
1671 f.write("NNATCAACTTTCCACCTGCAGTAGTGCTATTATTTTAACCGCAGCTGTATAxx\n")
1672 f.write("\n")
1673 f.write("")
1674
1675 FastaUtils.writeNstreches(fileName, 2, outFormat = "gff")
1676 obsFileName = "%s_Nstretches.gff3" % os.path.splitext(os.path.split(fileName)[1])[0]
1677
1678 expFileName = "expNstretches.gff3"
1679 with open(expFileName, "w") as f:
1680 f.write("##gff-version 3\n")
1681 f.write("seq1\tFastaUtils\tN_stretch\t72\t86\t.\t.\t.\tName=N_stretch_72-86\n")
1682 f.write("seq1\tFastaUtils\tN_stretch\t261\t262\t.\t.\t.\tName=N_stretch_261-262\n")
1683 f.write("seq1\tFastaUtils\tN_stretch\t510\t542\t.\t.\t.\tName=N_stretch_510-542\n")
1684 f.write("seq1\tFastaUtils\tN_stretch\t592\t593\t.\t.\t.\tName=N_stretch_592-593\n")
1685 f.write("seq2\tFastaUtils\tN_stretch\t1\t10\t.\t.\t.\tName=N_stretch_1-10\n")
1686
1687 self.assertTrue(FileUtils.are2FilesIdentical(obsFileName, expFileName))
1688
1689 os.remove(obsFileName)
1690 os.remove(expFileName)
1691 os.remove(fileName)
1692
1505 if __name__ == "__main__": 1693 if __name__ == "__main__":
1506 unittest.main() 1694 unittest.main()