# HG changeset patch # User Jim Johnson # Date 1347043923 18000 # Node ID 34a799d173f7e1ada9806c4abe32a781e4bd76b4 # Parent 23f5701549b1a03a52bf0701d1ca1cf88036f104 Add tool_dependencies and functional test diff -r 23f5701549b1 -r 34a799d173f7 cd_hit_est.xml --- a/cd_hit_est.xml Mon Sep 12 10:43:08 2011 -0400 +++ b/cd_hit_est.xml Fri Sep 07 13:52:03 2012 -0500 @@ -1,5 +1,8 @@ - + Cluster a nucleotide dataset into representative sequences + + cd-hit + cd-hit-est -i $fasta_in -o rep_seq -c $similarity -n $wordsize $strand @@ -24,10 +27,23 @@ - - cd-hit-est - + + + + + + + + + + + + + + + + **CD-HIT-EST** diff -r 23f5701549b1 -r 34a799d173f7 test-data/cd_hit_est_in.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cd_hit_est_in.fa Fri Sep 07 13:52:03 2012 -0500 @@ -0,0 +1,74 @@ +>F12Fcsw_481739 +ACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGACGCGTTCCATTAGTTTGTTGGCGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>F14Fcsw_133982 +GGCGACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGCGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGCCCAA +>F14Fcsw_149685 +GGCGACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>F14Fcsw_175165 +CGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGATAGTAGGCGGGGTAACGGCCCACCTAGTCTTCGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>F14Fcsw_176364 +ACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGGTTGTTGGCGGGGTAACGGCCCACCAAGCCTTCGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACGCGGTCCAA +>F14Fcsw_224425 +ACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGCGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTAAGACACGGTCCAA +>F14Fcsw_27361 +CGACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>F14Fcsw_2745 +GACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>F14Fcsw_37069 +ACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGCCCAG +>F14Fcsw_38031 +ACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGCGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCAA +>F14Fcsw_49588 +ACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGCGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACGCGGTCCAA +>F22Fcsw_400293 +CCGGCGCACGGGTGAGTAACACGTATCCAACCTTCCGTACACTCAGGGATAGCCTTTCGAAAGAAAGATTAATACCCGATGGCATAGTTCTTCCGCATGGTAGAACTATTAAAGGATTTCGGTCATCGATGGGGATGCGTTCCATTAGGTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>F23Fcsw_133990 +GGCGACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCGACAACACTGGGATAGCCTTTCGAAAGAAAGATTAATACCGGATGGCATAGTTTTCCCGCATGGAAAAACTATTAAAGAATTTCGGTTATCGATGGGGATGCGTTCCATTAGGCAGTTGGCGGGGTAACGGCCCACCAAACCGACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>F23Fcsw_160873 +CGGGTGAGTAACGCGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGGTAGTAGGCGGGGTAACGGCCCACCTAGCCAACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>F23Fcsw_86009 +GGCGACCGGCGCACGGGTGAGTAACGCGTATCCAACCTTCCGTACACTCAGGGATAGCCTTTCGAAAGAAAGATTAATACCCGATGGCATAGTTCTTCCGCATGGTAGAACTATTAAAGGATTTCGGTCATCGATGGGGATGCGTTCCATTAGGTTGTTGGCGGGGTAACGGCCCACCAAGGCAACGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>F23Fcsw_96640 +TCCGTACACTCAGGGATAGCCTTTCGAAAGAAAGATTAATACCCGATGGCATAGTTCTTCCGCATGGTAGAACTATTAAAGAATTTCGGTCATCGATGGGGATGCGTTCCATTAGGTTGTTGGCGGGGTAACGGCCCACCAAGCCTTCGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>F31Fcsw_135439 +TTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGGTAGTAGGCGGGGTAACGGCCCACCTAGCCGGCGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>F32Fcsw_322472 +AACCTTCCGTACACTCAGGGATAGCCTTTCGAAAGAAAGATTAATACCCGATGGCATAGTTCTTCCGCATGGTAGAACTATTAAAGAACTTCGGTCATCGATGGGGATGCGTTCCATTAGGTTGTTGGCGGGGTAACGGCCCACCAAGCCTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGCCCAG +>F33Fcsw_137774 +CTGAGTGGCGGACGGGTGAGTAACACGTATCCAACCTGCCGATGACTCGGGGATAGCCTTTCGAAAGAAAGATTAATACCCGATGGCATAGTTCTTCCGCATGGTAGAACTATTAAAGAACTTCGGTCATCGATGGGGATGCGTTCCATTAGGTTGTTGGCGGGGTAACGGCCCACCAAGCCTTCGATGGATAGGGGTTCTGAGAGGAAGGCCCCCCACATTGGAACTGAGACACGGTCCAA +>F34Fcsw_50866 +CGACCGGCGCACGGGTGAGTAACGCGTATCCAACCTTCCGTACACTCAGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGTATATTAAAACCGCATGGTTTTACTATTAAAGAATTTCGGTTATCGATGGGGATGCGTTCCATTAGTTTGTTGGCGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>M12Fcsw_69587 +CGGGTGAGTAACACGTGGGCAACCTGCCTCATAGAGGGGAATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGGCAGTTGGTGAGGTAACGGCTCACCAAACCTTCGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>M13Fcsw_127764 +GACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGCGGGGTAACGGCCCACCAAGACGACGATGCGTAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGTACTGAGACACGGACCAA +>M13Fcsw_128004 +CGGACGGGTGAGTAACGCGTGAGTAACCTGCCGATAACTCAGGGATAGCCTTTCGAAAGAAAGATTAATACCCGATGGCATAGTTCTTCCGCATGGTAGAACTATTAAAGAATTTCGGTCATCGATGGGGATGCGTTCCATTAGGTTGTTGGCGGGGTAACGGCCCACCAAGGCGACGATGCGTAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGTACTGAGACACGGACCAA +>M13Fcsw_198303 +TAACACGTATCCAACCTGCCTCATACTCGGGGATAACCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGATAGTAGGCGGGGTAACGGCCCACCTAGTCAACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>M14Fcsw_117325 +GTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGATAGTAGGCGGGGTAACGGCCCACCTAGTCTTCGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>M14Fcsw_151062 +CGACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGCGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>M14Fcsw_181677 +ACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGATAGTAGGCGGGGTAACGGCCCACCTAGTCTTCGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>M14Fcsw_186607 +GACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGGTTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGCGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>M24Fcsw_136217 +ACCGGCGCACGGGTGAGTAACACGTATCCAACCTGCCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTCTGATTAGCTTGTTGGCGGGGTAACGGCCCACCAAGGCACCGATCAGTAGGGGTTCTGAGAGGAAGGTCCCCCACATAGGAACTGAGACACGGTCCTA +>M41Fcsw_259146 +ACCGGCGCACGGGTGAGTAACACGTATCCAACCTACCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGGTTGTTGGCGGGGTAACGGCCCACCAAGCCTTCGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>M42Fcsw_137216 +CAACCTACCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>M42Fcsw_138199 +GACCGGCGCACGGGTGAGTAACACGTATCCAACCTACCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>M42Fcsw_225418 +CACGTATCCAACCTGCCGTCTACTCTTGGACAGCCTTCTGAAAGGAAGATTAATACCCGATGGCATAGTTCTTCCGCATGGTAGAACTATTAAAGGATTTCGGTCATCGATGGGGATGCGTTCCATTAGGTTGTTGGCGGGGTAACGGCCCACCAAGCCTTCGATGGATAGGGGTTCTGAGAGGAAGGTCCCCACATTGGAACTGAGACACGGTCCAA +>M42Fcsw_263016 +ACCTACCTCATACTCGGGGATAGCCTTTCGAAAGAAAGATTAATATCCGATAGCATATATTTCCCGCATGGGTTTTATATTAAAGAAATTCGGTATGAGATGGGGATGCGTTCCATTAGTTTGTTGGGGGGTAACGGCCCACCAAGACTACGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCCA +>M42Fcsw_334979 +GGGATAGCCGCCCGAAAGGACGGGTAATACCCGATGGCATAGTTCTTCCGCATGGTAGAACTATTAAAGGATTTCGGTCATCGATGGGGATGCGTTCCATTAGGTTGTTGGCGGGGTAACGGCCCACCAAGCCTTCGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>M43Fcsw_250770 +GTATCCAACCTGCCGTCTACTCTTGGACAGCCTTCTGAAAGAAAGATTAATACCCGATGGCATAGTTCTTCCGCATGGTAGAACTATTAAAGGATTTCGGTCATCGATGGGGATGCGTTCCATTAGGTTGTTGGCGGGGTAACGGCCCACCTAGTCTTCGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA +>M44Fcsw_200453 +CTAGTGGCGGACGGGTGAGTAACGCGTATCCAACCTGCCGATGACTCGGGGATAGCCTTTCGAAAGAAAGATTAATACCCGATGGCATAGTTCTTCCGCATGGTAGAACTATTAAAGAACTTCGGTCATCGATGGGGATGCGTTCCATTAGATAGTAGGCGGGGTAACGGCCCACCTAGTCTTCGATGGATAGGGGTTCTGAGAGGAAGGTCCCCCACATTGGAACTGAGACACGGTCCAA diff -r 23f5701549b1 -r 34a799d173f7 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Fri Sep 07 13:52:03 2012 -0500 @@ -0,0 +1,25 @@ + + + + + + https://cdhit.googlecode.com/files/cd-hit-v4.6.1-2012-08-27.tgz + make openmp=yes + + $INSTALL_DIR + + + + +These links provide information for building the cdhit package: + +http://weizhong-lab.ucsd.edu/cd-hit/ + +https://code.google.com/p/cdhit/ + +https://code.google.com/p/cdhit/source/browse/README + + + + +