Trie
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
| class Solution { private static class Trie { private final TrieNode root;
public Trie() { this.root = new TrieNode(); } }
private static class TrieNode { private boolean end; private final TrieNode[] children;
public TrieNode() { this.end = false; this.children = new TrieNode[4]; }
public TrieNode getChild(char c) { return children[charToIndex(c)]; }
private int charToIndex(char c) { switch (c) { case 'A': return 0; case 'C': return 1; case 'G': return 2; case 'T': return 3; default: throw new IllegalArgumentException(); } }
public boolean isEnd() { return end; }
public void setEnd(boolean end) { this.end = end; }
public TrieNode addChild(char c) { TrieNode child = new TrieNode(); children[charToIndex(c)] = child; return child; } }
public List<String> findRepeatedDnaSequences(String s) { Set<String> set = new HashSet<>();
Trie trie = new Trie(); for (int i = 0; i < s.length() - 9; i++) { addCharToTrie(s, i, trie, set); }
return new ArrayList<>(set); }
private void addCharToTrie(String s, int i, Trie trie, Set<String> set) { TrieNode node = trie.root;
for (int j = i; j < i + 10; j++) { char c = s.charAt(j); TrieNode childNode = node.getChild(c); if (childNode != null) { node = childNode; } else { node = node.addChild(c); } }
if (node.isEnd()) { set.add(s.substring(i, i + 10)); } else { node.setEnd(true); } } }
|
Hash
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
| class Solution { private int hash(char c) { switch (c) { case 'A': return 0b00; case 'C': return 0b01; case 'G': return 0b10; case 'T': return 0b11; default: throw new IllegalArgumentException(); } }
public List<String> findRepeatedDnaSequences(String s) { List<String> list = new ArrayList<>(); if (s.length() <= 10) { return list; }
int hash = 0; for (int i = 0; i < 10; i++) { hash = (hash << 2) | hash(s.charAt(i)); }
Map<Integer, Integer> hashToCountMap = new HashMap<>(); hashToCountMap.put(hash, 1);
for (int i = 10; i < s.length(); i++) { hash = ((hash << 2) | hash(s.charAt(i))) & 0b11111111111111111111; int count = hashToCountMap.getOrDefault(hash, 0); if (count == 1) { list.add(s.substring(i - 9, i + 1)); }
hashToCountMap.put(hash, count + 1); }
return list; } }
|
Rolling Hash
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
| class Solution { private static final int PRIME = Boolean.hashCode(true);
public List<String> findRepeatedDnaSequences(String s) { List<String> answerList = new ArrayList<>();
int[] prefixHashArray = new int[s.length() + 1]; int[] factorArray = new int[s.length() + 1]; factorArray[0] = 1;
for (int i = 0; i < s.length(); i++) { prefixHashArray[i + 1] = prefixHashArray[i] * PRIME + s.charAt(i); factorArray[i + 1] = factorArray[i] * PRIME; }
Map<Integer, Integer> hashToCountMap = new HashMap<>(); for (int j = 9; j < s.length(); j++) { int i = j - 9; int hash = prefixHashArray[j + 1] - prefixHashArray[i] * factorArray[10]; int count = hashToCountMap.getOrDefault(hash, 0); if (count == 1) { answerList.add(s.substring(i, j + 1)); } hashToCountMap.put(hash, count + 1); }
return answerList; } }
|
Rolling Hash
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
| class Solution { private static final long PRIME = Boolean.hashCode(true);
public List<String> findRepeatedDnaSequences(String s) { if (s.length() <= 10) { return Collections.emptyList(); }
List<String> seqList = new ArrayList<>(); Map<Long, Integer> hashToFeqMap = new HashMap<>(); long hash = 0; long product = 1; for (int i = 0; i < 10; i++) { if (i != 0) { product *= PRIME; } hash = hash * PRIME + s.charAt(i); } hashToFeqMap.put(hash, 1);
for (int i = 10; i < s.length(); i++) { hash -= s.charAt(i - 10) * product; hash = hash * PRIME + s.charAt(i); int feq = hashToFeqMap.getOrDefault(hash, 0); if (feq == 1) { seqList.add(s.substring(i - 9, i + 1)); } hashToFeqMap.put(hash, feq + 1); }
return seqList; } }
|
Rolling Hash
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
| class Solution { public List<String> findRepeatedDnaSequences(String s) { List<String> resultList = new ArrayList<>(); if (s.length() <= 10) { return resultList; }
int hashCode = 0; Map<Integer, Integer> codeToCountMap = new HashMap<>();
for (int i = 0; i < 10; i++) { hashCode = hashCode * 4 + toCode(s.charAt(i)); } codeToCountMap.put(hashCode, 1);
for (int j = 10; j < s.length(); j++) { hashCode = hashCode * 4 + toCode(s.charAt(j)); hashCode -= toCode(s.charAt(j - 10)) * (int) Math.pow(4, 10);
int count = codeToCountMap.getOrDefault(hashCode, 0); if (count == 1) { resultList.add(s.substring(j - 9, j + 1)); } codeToCountMap.put(hashCode, count + 1); }
return resultList; }
private int toCode(char c) { switch (c) { case 'A': return 0; case 'C': return 1; case 'G': return 2; case 'T': return 3; default: throw new IllegalArgumentException("Unsupported char: " + c); } } }
|
References
187. Repeated DNA Sequences