Need Faster Word Builder Algorithm
I have an app that assists studying for Scrabble. Most searches are much faster than by desktop version in C#, except the Word Builder. This search shows all the words that can be formed from a given set of letters A-Z, or blanks.
What can I do to get it to run faster?
I've considered using a Trie, but haven't found a way to support the use of blanks.
I am using a SimpleCursorAdapter to populate the ListView, which is why I am returning a cursor.
public Cursor getCursor_subanagrams(String term, String filters, String ordering) {
if (term.trim() == "")
return null;
// only difference between this and anagram is changing the length filter
char a = term.toCharArray(); // anagram
int first = new int[26]; // letter count of anagram
int c; // array position
int blankcount = 0;
// initialize word to anagram
for (c = 0; c < a.length; c++) {
if (a[c] == '?') {
blankcount++;
continue;
}
first[a[c] - 'A']++;
}
// gets pool of words to search through
String lenFilter = String.format("Length(Word) <= %1$s AND Length(Word) <= %2$s", LexData.getMaxLength(), term.length());
Cursor cursor = database.rawQuery("SELECT WordID as _id, Word, WordID, FrontHooks, BackHooks, " +
"InnerFront, InnerBack, Anagrams, ProbFactor, OPlayFactor, Score n" +
"FROM `" + LexData.getLexName() + "` n" +
"WHERE (" + lenFilter +
filters +
" ) " + ordering, null);
// creates new cursor to add valid words to
MatrixCursor matrixCursor = new MatrixCursor(new String{"_id", "Word", "WordID", "FrontHooks", "BackHooks", "InnerFront", "InnerBack",
"Anagrams", "ProbFactor", "OPlayFactor", "Score"});
// THIS NEEDS TO BE FASTER
while (cursor.moveToNext()) {
String word = cursor.getString(1);
char b = word.toCharArray();
if (isAnagram(first, b, blankcount)) {
matrixCursor.addRow(get_CursorRow(cursor));
}
}
cursor.close();
return matrixCursor;
}
private boolean isAnagram(int anagram, char word, int blankcount) {
int matchcount = blankcount;
int c; // each letter
int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};
for (c = 0; c < word.length; c++)
second[word[c] - 'A']++;
for (c = 0; c < 26; c++)
{
matchcount += (anagram[c]<second[c]) ? anagram[c]:second[c];
}
if (matchcount == word.length)
return true;
return false;
}
java android algorithm simplecursoradapter
add a comment |
I have an app that assists studying for Scrabble. Most searches are much faster than by desktop version in C#, except the Word Builder. This search shows all the words that can be formed from a given set of letters A-Z, or blanks.
What can I do to get it to run faster?
I've considered using a Trie, but haven't found a way to support the use of blanks.
I am using a SimpleCursorAdapter to populate the ListView, which is why I am returning a cursor.
public Cursor getCursor_subanagrams(String term, String filters, String ordering) {
if (term.trim() == "")
return null;
// only difference between this and anagram is changing the length filter
char a = term.toCharArray(); // anagram
int first = new int[26]; // letter count of anagram
int c; // array position
int blankcount = 0;
// initialize word to anagram
for (c = 0; c < a.length; c++) {
if (a[c] == '?') {
blankcount++;
continue;
}
first[a[c] - 'A']++;
}
// gets pool of words to search through
String lenFilter = String.format("Length(Word) <= %1$s AND Length(Word) <= %2$s", LexData.getMaxLength(), term.length());
Cursor cursor = database.rawQuery("SELECT WordID as _id, Word, WordID, FrontHooks, BackHooks, " +
"InnerFront, InnerBack, Anagrams, ProbFactor, OPlayFactor, Score n" +
"FROM `" + LexData.getLexName() + "` n" +
"WHERE (" + lenFilter +
filters +
" ) " + ordering, null);
// creates new cursor to add valid words to
MatrixCursor matrixCursor = new MatrixCursor(new String{"_id", "Word", "WordID", "FrontHooks", "BackHooks", "InnerFront", "InnerBack",
"Anagrams", "ProbFactor", "OPlayFactor", "Score"});
// THIS NEEDS TO BE FASTER
while (cursor.moveToNext()) {
String word = cursor.getString(1);
char b = word.toCharArray();
if (isAnagram(first, b, blankcount)) {
matrixCursor.addRow(get_CursorRow(cursor));
}
}
cursor.close();
return matrixCursor;
}
private boolean isAnagram(int anagram, char word, int blankcount) {
int matchcount = blankcount;
int c; // each letter
int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};
for (c = 0; c < word.length; c++)
second[word[c] - 'A']++;
for (c = 0; c < 26; c++)
{
matchcount += (anagram[c]<second[c]) ? anagram[c]:second[c];
}
if (matchcount == word.length)
return true;
return false;
}
java android algorithm simplecursoradapter
Try doing all the "non UI" work on a separate thread, if you're not doing it already.
– HB.
Nov 22 '18 at 18:11
This is running in a separate thread. I just need it to run faster. Similar apps do this in about a tenth of the time, though they generally only show the word without details.
– Dana Bell
Nov 22 '18 at 22:54
add a comment |
I have an app that assists studying for Scrabble. Most searches are much faster than by desktop version in C#, except the Word Builder. This search shows all the words that can be formed from a given set of letters A-Z, or blanks.
What can I do to get it to run faster?
I've considered using a Trie, but haven't found a way to support the use of blanks.
I am using a SimpleCursorAdapter to populate the ListView, which is why I am returning a cursor.
public Cursor getCursor_subanagrams(String term, String filters, String ordering) {
if (term.trim() == "")
return null;
// only difference between this and anagram is changing the length filter
char a = term.toCharArray(); // anagram
int first = new int[26]; // letter count of anagram
int c; // array position
int blankcount = 0;
// initialize word to anagram
for (c = 0; c < a.length; c++) {
if (a[c] == '?') {
blankcount++;
continue;
}
first[a[c] - 'A']++;
}
// gets pool of words to search through
String lenFilter = String.format("Length(Word) <= %1$s AND Length(Word) <= %2$s", LexData.getMaxLength(), term.length());
Cursor cursor = database.rawQuery("SELECT WordID as _id, Word, WordID, FrontHooks, BackHooks, " +
"InnerFront, InnerBack, Anagrams, ProbFactor, OPlayFactor, Score n" +
"FROM `" + LexData.getLexName() + "` n" +
"WHERE (" + lenFilter +
filters +
" ) " + ordering, null);
// creates new cursor to add valid words to
MatrixCursor matrixCursor = new MatrixCursor(new String{"_id", "Word", "WordID", "FrontHooks", "BackHooks", "InnerFront", "InnerBack",
"Anagrams", "ProbFactor", "OPlayFactor", "Score"});
// THIS NEEDS TO BE FASTER
while (cursor.moveToNext()) {
String word = cursor.getString(1);
char b = word.toCharArray();
if (isAnagram(first, b, blankcount)) {
matrixCursor.addRow(get_CursorRow(cursor));
}
}
cursor.close();
return matrixCursor;
}
private boolean isAnagram(int anagram, char word, int blankcount) {
int matchcount = blankcount;
int c; // each letter
int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};
for (c = 0; c < word.length; c++)
second[word[c] - 'A']++;
for (c = 0; c < 26; c++)
{
matchcount += (anagram[c]<second[c]) ? anagram[c]:second[c];
}
if (matchcount == word.length)
return true;
return false;
}
java android algorithm simplecursoradapter
I have an app that assists studying for Scrabble. Most searches are much faster than by desktop version in C#, except the Word Builder. This search shows all the words that can be formed from a given set of letters A-Z, or blanks.
What can I do to get it to run faster?
I've considered using a Trie, but haven't found a way to support the use of blanks.
I am using a SimpleCursorAdapter to populate the ListView, which is why I am returning a cursor.
public Cursor getCursor_subanagrams(String term, String filters, String ordering) {
if (term.trim() == "")
return null;
// only difference between this and anagram is changing the length filter
char a = term.toCharArray(); // anagram
int first = new int[26]; // letter count of anagram
int c; // array position
int blankcount = 0;
// initialize word to anagram
for (c = 0; c < a.length; c++) {
if (a[c] == '?') {
blankcount++;
continue;
}
first[a[c] - 'A']++;
}
// gets pool of words to search through
String lenFilter = String.format("Length(Word) <= %1$s AND Length(Word) <= %2$s", LexData.getMaxLength(), term.length());
Cursor cursor = database.rawQuery("SELECT WordID as _id, Word, WordID, FrontHooks, BackHooks, " +
"InnerFront, InnerBack, Anagrams, ProbFactor, OPlayFactor, Score n" +
"FROM `" + LexData.getLexName() + "` n" +
"WHERE (" + lenFilter +
filters +
" ) " + ordering, null);
// creates new cursor to add valid words to
MatrixCursor matrixCursor = new MatrixCursor(new String{"_id", "Word", "WordID", "FrontHooks", "BackHooks", "InnerFront", "InnerBack",
"Anagrams", "ProbFactor", "OPlayFactor", "Score"});
// THIS NEEDS TO BE FASTER
while (cursor.moveToNext()) {
String word = cursor.getString(1);
char b = word.toCharArray();
if (isAnagram(first, b, blankcount)) {
matrixCursor.addRow(get_CursorRow(cursor));
}
}
cursor.close();
return matrixCursor;
}
private boolean isAnagram(int anagram, char word, int blankcount) {
int matchcount = blankcount;
int c; // each letter
int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};
for (c = 0; c < word.length; c++)
second[word[c] - 'A']++;
for (c = 0; c < 26; c++)
{
matchcount += (anagram[c]<second[c]) ? anagram[c]:second[c];
}
if (matchcount == word.length)
return true;
return false;
}
java android algorithm simplecursoradapter
java android algorithm simplecursoradapter
asked Nov 22 '18 at 16:05
Dana Bell
246
246
Try doing all the "non UI" work on a separate thread, if you're not doing it already.
– HB.
Nov 22 '18 at 18:11
This is running in a separate thread. I just need it to run faster. Similar apps do this in about a tenth of the time, though they generally only show the word without details.
– Dana Bell
Nov 22 '18 at 22:54
add a comment |
Try doing all the "non UI" work on a separate thread, if you're not doing it already.
– HB.
Nov 22 '18 at 18:11
This is running in a separate thread. I just need it to run faster. Similar apps do this in about a tenth of the time, though they generally only show the word without details.
– Dana Bell
Nov 22 '18 at 22:54
Try doing all the "non UI" work on a separate thread, if you're not doing it already.
– HB.
Nov 22 '18 at 18:11
Try doing all the "non UI" work on a separate thread, if you're not doing it already.
– HB.
Nov 22 '18 at 18:11
This is running in a separate thread. I just need it to run faster. Similar apps do this in about a tenth of the time, though they generally only show the word without details.
– Dana Bell
Nov 22 '18 at 22:54
This is running in a separate thread. I just need it to run faster. Similar apps do this in about a tenth of the time, though they generally only show the word without details.
– Dana Bell
Nov 22 '18 at 22:54
add a comment |
3 Answers
3
active
oldest
votes
Focus on speeding up the most typical case, which is where the word is not a (sub)anagram, and you return false. If you can identify as quickly as possible when it is not possible to make word
out of anagram
then you can avoid the expensive test.
One way to do this is using a bitmask of the letters in the words. You don't need to store letter counts, because if the number of unique letters in word
that are not in anagram
is greater than the number of blanks, then there is no way you can make it and you can quickly return false. If not then you can proceed to the more expensive test taking letter counts into account.
You can precompute the bitmasks like this:
private int letterMask(char word)
{
int c, mask = 0;
for (c = 0; c < word.length; c++)
mask |= (1 << (word[c] - 'A'));
return mask;
}
Add an extra column to your database to store the letter bitmask for each word, add it to your cursor, and compute the letter bitmask for the letters in term
and store in termMask
. Then inside your cursor loop you can do a test like this:
// compute mask of bits in mask that are not in term:
int missingLettersMask = cursor.getInt(8) & ~termMask;
if(missingLettersMask != 0)
{
// check if we could possibly make up for these letters using blanks:
int remainingBlanks = blankcount;
while((remainingBlanks-- > 0) && (missingLettersMask != 0))
missingLettersMask &= (missingLettersMask - 1); // remove one bit
if(missingLettersMask != 0)
continue; // move onto the next word
}
// word can potentially be made from anagram, call isAnagram:
Well, I tried that. After correcting cursor.getInt(8) to use the right column cursor.getInt(11), I got it to work.
– Dana Bell
Nov 24 '18 at 16:22
Except it actually takes longer. To verify the proper execution of the code, I logged the "candidate" words that were passed to isAnagram() and the mask does filter out the words correctly.
– Dana Bell
Nov 24 '18 at 16:29
That's a bit disappointing. Perhaps it is that database cursor functions that account for a lot of the time taken, and not your isAnagram() function.
– samgak
Nov 25 '18 at 2:18
Do you get much of a speed up by removing the ordering clause?
– samgak
Nov 26 '18 at 1:32
The database cursor shouldn't be taking much time. Other searches with simpler filtering methods return the same number of results quickly. The ordering clause is also SQL so it takes practically no time.
– Dana Bell
Nov 26 '18 at 17:18
add a comment |
There are ways to speed up your anagram checking function. Samgak has pointed out one. Another obvious optimisation is to return false if the word is longer than the number of available letters plus blanks. In the end, these are all micro-optimisations and you will end up checking your whole dictionary.
You said you have considered using a trie. That's a good solution, in my opinion, because the structure of the trie will only make you check relevant words. Build it like this:
- Sort the letters of each word, so that "triangle" and "integral" will both become "aegilnrt".
- Insert the sorted word into the trie.
- Where you would place the end-marker in a normal trie, you place a list of possible words.
If you were looking for exact anagrams, you would sort the word to check, traverse the trie and print out the list of possible anagrams at the end. But here, you have to deal with partial anagrams and with blanks:
- Regular traversal means you take the next letter of the word and then descent the corresponding link in the tree,if it exists.
- Partial anagrams can be found by ignoring the next letter without descending in the trie.
- Blanks can be dealt with by descending all possible branches of a trie and decreasing the number of blanks.
When you have blanks, you will end up with duplicates. For example, if you have the letters A, B and C and a blank tile, you can make the word CAB, but you can get there on four different ways: CAB, _AB, C_B, CA_.
You could get around this by storing the result list in a data structure that eliminates duplicates, such as a set or ordered set, but you would still go down the same paths several times in order to create the duplicates.
A better solution is to keep track of which trie nodes you have visited with which parameters, i.e. with with remaining unused letters and blanks. You can then cut such paths short. Here's an implementation in pseudocode:
function find_r(t, str, blanks, visited)
{
// don't revisit explored paths
key = make_key(t, str, blanks);
if (key in visited) return ;
visited ~= key;
if (str.length == 0 and blanks == 0) {
// all resources have been used: return list of anagrams
return t.word;
} else {
res = ;
c = 0;
if (str.length > 0) {
c = str[0];
// regular traversal: use current letter and descend
if (c in t.next) {
res ~= find_r(t.next[c], str[1:], blanks, visited);
}
# partial anagrams: skip current letter and don't descend
l = 1
while (l < str.length and str[l] == c) l++;
res ~= find_r(t, str[l:], blanks, visited);
}
if (blanks > 0) {
// blanks: decrease blanks and descend
for (i in t.next) {
if (i < c) {
res ~= find_r(t.next[i], str, blanks - 1, visited);
}
}
}
return res;
}
}
(Here, ~
denotes list concatenation or set insertion; [beg=0:end=length]
denotes string slices; in
tests whether a dictionary or set contains a key.)
Once you've built the tree, this solution is fast when there are no blanks, but it gets exponentially worse with each blank and with larger letter pools. Testing with one blank is still reasonably fast, but with two blanks, it is on par with your existing solution.
Now there are at most two blanks in a Scrabble game and the rack can hold only up to seven tiles, so it may not be as bad in practice. Another question is whether the search should consider words obtained with two blanks. The results list will be very long and it will contain all two-letter words. The player may be more interested in high-scoring words that can be played with a single blank.
Thank you for the trie code. I'll try that when I have time. I'm already filtering length when doing the initial SQL query. While there are only two blanks, searching based on existing letters on the board would mean more blanks could be added to the search.
– Dana Bell
Nov 24 '18 at 17:15
add a comment |
I used samgak's recommendation to check for "mismatches".
Instead of going through the process of checking each letter before continuing, I exit during the process of checking each letter. I thought I could just check mismatches and ignore matchcounting, but couldn't get it to work accurately. More analysis must be needed.
This speeded things up about 20% according to initial tests with an emulator.
I may be able to speed things up even more by checking letters based on letter frequency; check most common letters(LNSTE) before others (ZQJ). Now to figure out how to sort an array by one field in the array, but that's another question.
private boolean isAnagram(int anagram, char word, int blankcount) {
// anagram is letters in term
int matchcount = blankcount;
int mismatchcount = 0;
int c;
int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};
for (c = 0; c < word.length; c++)
second[word[c] - 'A']++;
for (c = 0; c < 26; c++)
{
mismatchcount += (anagram[c]<second[c] ? second[c] - anagram[c]:0);
if (mismatchcount > blankcount)
return false;
matchcount += (anagram[c]<second[c]) ? anagram[c]:second[c];
}
if (matchcount == word.length)
return true;
return false;
}
EDIT:
Here's an even more compact (faster?) version that counts mismatches as it builds the alternate word character array.
private boolean isAnagram(int anagram, char word, int blankcount) {
int mismatchcount = 0;
int c;
int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};
int letter = 0;
for (c = 0; c < word.length; c++) {
letter = ++second[word[c] - 'A'];
mismatchcount += (letter > anagram[word[c]-'A'] ? 1 : 0);
if (mismatchcount > blankcount)
return false;
}
return true;
}
add a comment |
Your Answer
StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53434695%2fneed-faster-word-builder-algorithm%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
3 Answers
3
active
oldest
votes
3 Answers
3
active
oldest
votes
active
oldest
votes
active
oldest
votes
Focus on speeding up the most typical case, which is where the word is not a (sub)anagram, and you return false. If you can identify as quickly as possible when it is not possible to make word
out of anagram
then you can avoid the expensive test.
One way to do this is using a bitmask of the letters in the words. You don't need to store letter counts, because if the number of unique letters in word
that are not in anagram
is greater than the number of blanks, then there is no way you can make it and you can quickly return false. If not then you can proceed to the more expensive test taking letter counts into account.
You can precompute the bitmasks like this:
private int letterMask(char word)
{
int c, mask = 0;
for (c = 0; c < word.length; c++)
mask |= (1 << (word[c] - 'A'));
return mask;
}
Add an extra column to your database to store the letter bitmask for each word, add it to your cursor, and compute the letter bitmask for the letters in term
and store in termMask
. Then inside your cursor loop you can do a test like this:
// compute mask of bits in mask that are not in term:
int missingLettersMask = cursor.getInt(8) & ~termMask;
if(missingLettersMask != 0)
{
// check if we could possibly make up for these letters using blanks:
int remainingBlanks = blankcount;
while((remainingBlanks-- > 0) && (missingLettersMask != 0))
missingLettersMask &= (missingLettersMask - 1); // remove one bit
if(missingLettersMask != 0)
continue; // move onto the next word
}
// word can potentially be made from anagram, call isAnagram:
Well, I tried that. After correcting cursor.getInt(8) to use the right column cursor.getInt(11), I got it to work.
– Dana Bell
Nov 24 '18 at 16:22
Except it actually takes longer. To verify the proper execution of the code, I logged the "candidate" words that were passed to isAnagram() and the mask does filter out the words correctly.
– Dana Bell
Nov 24 '18 at 16:29
That's a bit disappointing. Perhaps it is that database cursor functions that account for a lot of the time taken, and not your isAnagram() function.
– samgak
Nov 25 '18 at 2:18
Do you get much of a speed up by removing the ordering clause?
– samgak
Nov 26 '18 at 1:32
The database cursor shouldn't be taking much time. Other searches with simpler filtering methods return the same number of results quickly. The ordering clause is also SQL so it takes practically no time.
– Dana Bell
Nov 26 '18 at 17:18
add a comment |
Focus on speeding up the most typical case, which is where the word is not a (sub)anagram, and you return false. If you can identify as quickly as possible when it is not possible to make word
out of anagram
then you can avoid the expensive test.
One way to do this is using a bitmask of the letters in the words. You don't need to store letter counts, because if the number of unique letters in word
that are not in anagram
is greater than the number of blanks, then there is no way you can make it and you can quickly return false. If not then you can proceed to the more expensive test taking letter counts into account.
You can precompute the bitmasks like this:
private int letterMask(char word)
{
int c, mask = 0;
for (c = 0; c < word.length; c++)
mask |= (1 << (word[c] - 'A'));
return mask;
}
Add an extra column to your database to store the letter bitmask for each word, add it to your cursor, and compute the letter bitmask for the letters in term
and store in termMask
. Then inside your cursor loop you can do a test like this:
// compute mask of bits in mask that are not in term:
int missingLettersMask = cursor.getInt(8) & ~termMask;
if(missingLettersMask != 0)
{
// check if we could possibly make up for these letters using blanks:
int remainingBlanks = blankcount;
while((remainingBlanks-- > 0) && (missingLettersMask != 0))
missingLettersMask &= (missingLettersMask - 1); // remove one bit
if(missingLettersMask != 0)
continue; // move onto the next word
}
// word can potentially be made from anagram, call isAnagram:
Well, I tried that. After correcting cursor.getInt(8) to use the right column cursor.getInt(11), I got it to work.
– Dana Bell
Nov 24 '18 at 16:22
Except it actually takes longer. To verify the proper execution of the code, I logged the "candidate" words that were passed to isAnagram() and the mask does filter out the words correctly.
– Dana Bell
Nov 24 '18 at 16:29
That's a bit disappointing. Perhaps it is that database cursor functions that account for a lot of the time taken, and not your isAnagram() function.
– samgak
Nov 25 '18 at 2:18
Do you get much of a speed up by removing the ordering clause?
– samgak
Nov 26 '18 at 1:32
The database cursor shouldn't be taking much time. Other searches with simpler filtering methods return the same number of results quickly. The ordering clause is also SQL so it takes practically no time.
– Dana Bell
Nov 26 '18 at 17:18
add a comment |
Focus on speeding up the most typical case, which is where the word is not a (sub)anagram, and you return false. If you can identify as quickly as possible when it is not possible to make word
out of anagram
then you can avoid the expensive test.
One way to do this is using a bitmask of the letters in the words. You don't need to store letter counts, because if the number of unique letters in word
that are not in anagram
is greater than the number of blanks, then there is no way you can make it and you can quickly return false. If not then you can proceed to the more expensive test taking letter counts into account.
You can precompute the bitmasks like this:
private int letterMask(char word)
{
int c, mask = 0;
for (c = 0; c < word.length; c++)
mask |= (1 << (word[c] - 'A'));
return mask;
}
Add an extra column to your database to store the letter bitmask for each word, add it to your cursor, and compute the letter bitmask for the letters in term
and store in termMask
. Then inside your cursor loop you can do a test like this:
// compute mask of bits in mask that are not in term:
int missingLettersMask = cursor.getInt(8) & ~termMask;
if(missingLettersMask != 0)
{
// check if we could possibly make up for these letters using blanks:
int remainingBlanks = blankcount;
while((remainingBlanks-- > 0) && (missingLettersMask != 0))
missingLettersMask &= (missingLettersMask - 1); // remove one bit
if(missingLettersMask != 0)
continue; // move onto the next word
}
// word can potentially be made from anagram, call isAnagram:
Focus on speeding up the most typical case, which is where the word is not a (sub)anagram, and you return false. If you can identify as quickly as possible when it is not possible to make word
out of anagram
then you can avoid the expensive test.
One way to do this is using a bitmask of the letters in the words. You don't need to store letter counts, because if the number of unique letters in word
that are not in anagram
is greater than the number of blanks, then there is no way you can make it and you can quickly return false. If not then you can proceed to the more expensive test taking letter counts into account.
You can precompute the bitmasks like this:
private int letterMask(char word)
{
int c, mask = 0;
for (c = 0; c < word.length; c++)
mask |= (1 << (word[c] - 'A'));
return mask;
}
Add an extra column to your database to store the letter bitmask for each word, add it to your cursor, and compute the letter bitmask for the letters in term
and store in termMask
. Then inside your cursor loop you can do a test like this:
// compute mask of bits in mask that are not in term:
int missingLettersMask = cursor.getInt(8) & ~termMask;
if(missingLettersMask != 0)
{
// check if we could possibly make up for these letters using blanks:
int remainingBlanks = blankcount;
while((remainingBlanks-- > 0) && (missingLettersMask != 0))
missingLettersMask &= (missingLettersMask - 1); // remove one bit
if(missingLettersMask != 0)
continue; // move onto the next word
}
// word can potentially be made from anagram, call isAnagram:
edited Nov 23 '18 at 8:34
answered Nov 23 '18 at 0:29
samgak
18.9k33159
18.9k33159
Well, I tried that. After correcting cursor.getInt(8) to use the right column cursor.getInt(11), I got it to work.
– Dana Bell
Nov 24 '18 at 16:22
Except it actually takes longer. To verify the proper execution of the code, I logged the "candidate" words that were passed to isAnagram() and the mask does filter out the words correctly.
– Dana Bell
Nov 24 '18 at 16:29
That's a bit disappointing. Perhaps it is that database cursor functions that account for a lot of the time taken, and not your isAnagram() function.
– samgak
Nov 25 '18 at 2:18
Do you get much of a speed up by removing the ordering clause?
– samgak
Nov 26 '18 at 1:32
The database cursor shouldn't be taking much time. Other searches with simpler filtering methods return the same number of results quickly. The ordering clause is also SQL so it takes practically no time.
– Dana Bell
Nov 26 '18 at 17:18
add a comment |
Well, I tried that. After correcting cursor.getInt(8) to use the right column cursor.getInt(11), I got it to work.
– Dana Bell
Nov 24 '18 at 16:22
Except it actually takes longer. To verify the proper execution of the code, I logged the "candidate" words that were passed to isAnagram() and the mask does filter out the words correctly.
– Dana Bell
Nov 24 '18 at 16:29
That's a bit disappointing. Perhaps it is that database cursor functions that account for a lot of the time taken, and not your isAnagram() function.
– samgak
Nov 25 '18 at 2:18
Do you get much of a speed up by removing the ordering clause?
– samgak
Nov 26 '18 at 1:32
The database cursor shouldn't be taking much time. Other searches with simpler filtering methods return the same number of results quickly. The ordering clause is also SQL so it takes practically no time.
– Dana Bell
Nov 26 '18 at 17:18
Well, I tried that. After correcting cursor.getInt(8) to use the right column cursor.getInt(11), I got it to work.
– Dana Bell
Nov 24 '18 at 16:22
Well, I tried that. After correcting cursor.getInt(8) to use the right column cursor.getInt(11), I got it to work.
– Dana Bell
Nov 24 '18 at 16:22
Except it actually takes longer. To verify the proper execution of the code, I logged the "candidate" words that were passed to isAnagram() and the mask does filter out the words correctly.
– Dana Bell
Nov 24 '18 at 16:29
Except it actually takes longer. To verify the proper execution of the code, I logged the "candidate" words that were passed to isAnagram() and the mask does filter out the words correctly.
– Dana Bell
Nov 24 '18 at 16:29
That's a bit disappointing. Perhaps it is that database cursor functions that account for a lot of the time taken, and not your isAnagram() function.
– samgak
Nov 25 '18 at 2:18
That's a bit disappointing. Perhaps it is that database cursor functions that account for a lot of the time taken, and not your isAnagram() function.
– samgak
Nov 25 '18 at 2:18
Do you get much of a speed up by removing the ordering clause?
– samgak
Nov 26 '18 at 1:32
Do you get much of a speed up by removing the ordering clause?
– samgak
Nov 26 '18 at 1:32
The database cursor shouldn't be taking much time. Other searches with simpler filtering methods return the same number of results quickly. The ordering clause is also SQL so it takes practically no time.
– Dana Bell
Nov 26 '18 at 17:18
The database cursor shouldn't be taking much time. Other searches with simpler filtering methods return the same number of results quickly. The ordering clause is also SQL so it takes practically no time.
– Dana Bell
Nov 26 '18 at 17:18
add a comment |
There are ways to speed up your anagram checking function. Samgak has pointed out one. Another obvious optimisation is to return false if the word is longer than the number of available letters plus blanks. In the end, these are all micro-optimisations and you will end up checking your whole dictionary.
You said you have considered using a trie. That's a good solution, in my opinion, because the structure of the trie will only make you check relevant words. Build it like this:
- Sort the letters of each word, so that "triangle" and "integral" will both become "aegilnrt".
- Insert the sorted word into the trie.
- Where you would place the end-marker in a normal trie, you place a list of possible words.
If you were looking for exact anagrams, you would sort the word to check, traverse the trie and print out the list of possible anagrams at the end. But here, you have to deal with partial anagrams and with blanks:
- Regular traversal means you take the next letter of the word and then descent the corresponding link in the tree,if it exists.
- Partial anagrams can be found by ignoring the next letter without descending in the trie.
- Blanks can be dealt with by descending all possible branches of a trie and decreasing the number of blanks.
When you have blanks, you will end up with duplicates. For example, if you have the letters A, B and C and a blank tile, you can make the word CAB, but you can get there on four different ways: CAB, _AB, C_B, CA_.
You could get around this by storing the result list in a data structure that eliminates duplicates, such as a set or ordered set, but you would still go down the same paths several times in order to create the duplicates.
A better solution is to keep track of which trie nodes you have visited with which parameters, i.e. with with remaining unused letters and blanks. You can then cut such paths short. Here's an implementation in pseudocode:
function find_r(t, str, blanks, visited)
{
// don't revisit explored paths
key = make_key(t, str, blanks);
if (key in visited) return ;
visited ~= key;
if (str.length == 0 and blanks == 0) {
// all resources have been used: return list of anagrams
return t.word;
} else {
res = ;
c = 0;
if (str.length > 0) {
c = str[0];
// regular traversal: use current letter and descend
if (c in t.next) {
res ~= find_r(t.next[c], str[1:], blanks, visited);
}
# partial anagrams: skip current letter and don't descend
l = 1
while (l < str.length and str[l] == c) l++;
res ~= find_r(t, str[l:], blanks, visited);
}
if (blanks > 0) {
// blanks: decrease blanks and descend
for (i in t.next) {
if (i < c) {
res ~= find_r(t.next[i], str, blanks - 1, visited);
}
}
}
return res;
}
}
(Here, ~
denotes list concatenation or set insertion; [beg=0:end=length]
denotes string slices; in
tests whether a dictionary or set contains a key.)
Once you've built the tree, this solution is fast when there are no blanks, but it gets exponentially worse with each blank and with larger letter pools. Testing with one blank is still reasonably fast, but with two blanks, it is on par with your existing solution.
Now there are at most two blanks in a Scrabble game and the rack can hold only up to seven tiles, so it may not be as bad in practice. Another question is whether the search should consider words obtained with two blanks. The results list will be very long and it will contain all two-letter words. The player may be more interested in high-scoring words that can be played with a single blank.
Thank you for the trie code. I'll try that when I have time. I'm already filtering length when doing the initial SQL query. While there are only two blanks, searching based on existing letters on the board would mean more blanks could be added to the search.
– Dana Bell
Nov 24 '18 at 17:15
add a comment |
There are ways to speed up your anagram checking function. Samgak has pointed out one. Another obvious optimisation is to return false if the word is longer than the number of available letters plus blanks. In the end, these are all micro-optimisations and you will end up checking your whole dictionary.
You said you have considered using a trie. That's a good solution, in my opinion, because the structure of the trie will only make you check relevant words. Build it like this:
- Sort the letters of each word, so that "triangle" and "integral" will both become "aegilnrt".
- Insert the sorted word into the trie.
- Where you would place the end-marker in a normal trie, you place a list of possible words.
If you were looking for exact anagrams, you would sort the word to check, traverse the trie and print out the list of possible anagrams at the end. But here, you have to deal with partial anagrams and with blanks:
- Regular traversal means you take the next letter of the word and then descent the corresponding link in the tree,if it exists.
- Partial anagrams can be found by ignoring the next letter without descending in the trie.
- Blanks can be dealt with by descending all possible branches of a trie and decreasing the number of blanks.
When you have blanks, you will end up with duplicates. For example, if you have the letters A, B and C and a blank tile, you can make the word CAB, but you can get there on four different ways: CAB, _AB, C_B, CA_.
You could get around this by storing the result list in a data structure that eliminates duplicates, such as a set or ordered set, but you would still go down the same paths several times in order to create the duplicates.
A better solution is to keep track of which trie nodes you have visited with which parameters, i.e. with with remaining unused letters and blanks. You can then cut such paths short. Here's an implementation in pseudocode:
function find_r(t, str, blanks, visited)
{
// don't revisit explored paths
key = make_key(t, str, blanks);
if (key in visited) return ;
visited ~= key;
if (str.length == 0 and blanks == 0) {
// all resources have been used: return list of anagrams
return t.word;
} else {
res = ;
c = 0;
if (str.length > 0) {
c = str[0];
// regular traversal: use current letter and descend
if (c in t.next) {
res ~= find_r(t.next[c], str[1:], blanks, visited);
}
# partial anagrams: skip current letter and don't descend
l = 1
while (l < str.length and str[l] == c) l++;
res ~= find_r(t, str[l:], blanks, visited);
}
if (blanks > 0) {
// blanks: decrease blanks and descend
for (i in t.next) {
if (i < c) {
res ~= find_r(t.next[i], str, blanks - 1, visited);
}
}
}
return res;
}
}
(Here, ~
denotes list concatenation or set insertion; [beg=0:end=length]
denotes string slices; in
tests whether a dictionary or set contains a key.)
Once you've built the tree, this solution is fast when there are no blanks, but it gets exponentially worse with each blank and with larger letter pools. Testing with one blank is still reasonably fast, but with two blanks, it is on par with your existing solution.
Now there are at most two blanks in a Scrabble game and the rack can hold only up to seven tiles, so it may not be as bad in practice. Another question is whether the search should consider words obtained with two blanks. The results list will be very long and it will contain all two-letter words. The player may be more interested in high-scoring words that can be played with a single blank.
Thank you for the trie code. I'll try that when I have time. I'm already filtering length when doing the initial SQL query. While there are only two blanks, searching based on existing letters on the board would mean more blanks could be added to the search.
– Dana Bell
Nov 24 '18 at 17:15
add a comment |
There are ways to speed up your anagram checking function. Samgak has pointed out one. Another obvious optimisation is to return false if the word is longer than the number of available letters plus blanks. In the end, these are all micro-optimisations and you will end up checking your whole dictionary.
You said you have considered using a trie. That's a good solution, in my opinion, because the structure of the trie will only make you check relevant words. Build it like this:
- Sort the letters of each word, so that "triangle" and "integral" will both become "aegilnrt".
- Insert the sorted word into the trie.
- Where you would place the end-marker in a normal trie, you place a list of possible words.
If you were looking for exact anagrams, you would sort the word to check, traverse the trie and print out the list of possible anagrams at the end. But here, you have to deal with partial anagrams and with blanks:
- Regular traversal means you take the next letter of the word and then descent the corresponding link in the tree,if it exists.
- Partial anagrams can be found by ignoring the next letter without descending in the trie.
- Blanks can be dealt with by descending all possible branches of a trie and decreasing the number of blanks.
When you have blanks, you will end up with duplicates. For example, if you have the letters A, B and C and a blank tile, you can make the word CAB, but you can get there on four different ways: CAB, _AB, C_B, CA_.
You could get around this by storing the result list in a data structure that eliminates duplicates, such as a set or ordered set, but you would still go down the same paths several times in order to create the duplicates.
A better solution is to keep track of which trie nodes you have visited with which parameters, i.e. with with remaining unused letters and blanks. You can then cut such paths short. Here's an implementation in pseudocode:
function find_r(t, str, blanks, visited)
{
// don't revisit explored paths
key = make_key(t, str, blanks);
if (key in visited) return ;
visited ~= key;
if (str.length == 0 and blanks == 0) {
// all resources have been used: return list of anagrams
return t.word;
} else {
res = ;
c = 0;
if (str.length > 0) {
c = str[0];
// regular traversal: use current letter and descend
if (c in t.next) {
res ~= find_r(t.next[c], str[1:], blanks, visited);
}
# partial anagrams: skip current letter and don't descend
l = 1
while (l < str.length and str[l] == c) l++;
res ~= find_r(t, str[l:], blanks, visited);
}
if (blanks > 0) {
// blanks: decrease blanks and descend
for (i in t.next) {
if (i < c) {
res ~= find_r(t.next[i], str, blanks - 1, visited);
}
}
}
return res;
}
}
(Here, ~
denotes list concatenation or set insertion; [beg=0:end=length]
denotes string slices; in
tests whether a dictionary or set contains a key.)
Once you've built the tree, this solution is fast when there are no blanks, but it gets exponentially worse with each blank and with larger letter pools. Testing with one blank is still reasonably fast, but with two blanks, it is on par with your existing solution.
Now there are at most two blanks in a Scrabble game and the rack can hold only up to seven tiles, so it may not be as bad in practice. Another question is whether the search should consider words obtained with two blanks. The results list will be very long and it will contain all two-letter words. The player may be more interested in high-scoring words that can be played with a single blank.
There are ways to speed up your anagram checking function. Samgak has pointed out one. Another obvious optimisation is to return false if the word is longer than the number of available letters plus blanks. In the end, these are all micro-optimisations and you will end up checking your whole dictionary.
You said you have considered using a trie. That's a good solution, in my opinion, because the structure of the trie will only make you check relevant words. Build it like this:
- Sort the letters of each word, so that "triangle" and "integral" will both become "aegilnrt".
- Insert the sorted word into the trie.
- Where you would place the end-marker in a normal trie, you place a list of possible words.
If you were looking for exact anagrams, you would sort the word to check, traverse the trie and print out the list of possible anagrams at the end. But here, you have to deal with partial anagrams and with blanks:
- Regular traversal means you take the next letter of the word and then descent the corresponding link in the tree,if it exists.
- Partial anagrams can be found by ignoring the next letter without descending in the trie.
- Blanks can be dealt with by descending all possible branches of a trie and decreasing the number of blanks.
When you have blanks, you will end up with duplicates. For example, if you have the letters A, B and C and a blank tile, you can make the word CAB, but you can get there on four different ways: CAB, _AB, C_B, CA_.
You could get around this by storing the result list in a data structure that eliminates duplicates, such as a set or ordered set, but you would still go down the same paths several times in order to create the duplicates.
A better solution is to keep track of which trie nodes you have visited with which parameters, i.e. with with remaining unused letters and blanks. You can then cut such paths short. Here's an implementation in pseudocode:
function find_r(t, str, blanks, visited)
{
// don't revisit explored paths
key = make_key(t, str, blanks);
if (key in visited) return ;
visited ~= key;
if (str.length == 0 and blanks == 0) {
// all resources have been used: return list of anagrams
return t.word;
} else {
res = ;
c = 0;
if (str.length > 0) {
c = str[0];
// regular traversal: use current letter and descend
if (c in t.next) {
res ~= find_r(t.next[c], str[1:], blanks, visited);
}
# partial anagrams: skip current letter and don't descend
l = 1
while (l < str.length and str[l] == c) l++;
res ~= find_r(t, str[l:], blanks, visited);
}
if (blanks > 0) {
// blanks: decrease blanks and descend
for (i in t.next) {
if (i < c) {
res ~= find_r(t.next[i], str, blanks - 1, visited);
}
}
}
return res;
}
}
(Here, ~
denotes list concatenation or set insertion; [beg=0:end=length]
denotes string slices; in
tests whether a dictionary or set contains a key.)
Once you've built the tree, this solution is fast when there are no blanks, but it gets exponentially worse with each blank and with larger letter pools. Testing with one blank is still reasonably fast, but with two blanks, it is on par with your existing solution.
Now there are at most two blanks in a Scrabble game and the rack can hold only up to seven tiles, so it may not be as bad in practice. Another question is whether the search should consider words obtained with two blanks. The results list will be very long and it will contain all two-letter words. The player may be more interested in high-scoring words that can be played with a single blank.
edited Nov 24 '18 at 16:25
answered Nov 24 '18 at 8:53
M Oehm
21.3k31832
21.3k31832
Thank you for the trie code. I'll try that when I have time. I'm already filtering length when doing the initial SQL query. While there are only two blanks, searching based on existing letters on the board would mean more blanks could be added to the search.
– Dana Bell
Nov 24 '18 at 17:15
add a comment |
Thank you for the trie code. I'll try that when I have time. I'm already filtering length when doing the initial SQL query. While there are only two blanks, searching based on existing letters on the board would mean more blanks could be added to the search.
– Dana Bell
Nov 24 '18 at 17:15
Thank you for the trie code. I'll try that when I have time. I'm already filtering length when doing the initial SQL query. While there are only two blanks, searching based on existing letters on the board would mean more blanks could be added to the search.
– Dana Bell
Nov 24 '18 at 17:15
Thank you for the trie code. I'll try that when I have time. I'm already filtering length when doing the initial SQL query. While there are only two blanks, searching based on existing letters on the board would mean more blanks could be added to the search.
– Dana Bell
Nov 24 '18 at 17:15
add a comment |
I used samgak's recommendation to check for "mismatches".
Instead of going through the process of checking each letter before continuing, I exit during the process of checking each letter. I thought I could just check mismatches and ignore matchcounting, but couldn't get it to work accurately. More analysis must be needed.
This speeded things up about 20% according to initial tests with an emulator.
I may be able to speed things up even more by checking letters based on letter frequency; check most common letters(LNSTE) before others (ZQJ). Now to figure out how to sort an array by one field in the array, but that's another question.
private boolean isAnagram(int anagram, char word, int blankcount) {
// anagram is letters in term
int matchcount = blankcount;
int mismatchcount = 0;
int c;
int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};
for (c = 0; c < word.length; c++)
second[word[c] - 'A']++;
for (c = 0; c < 26; c++)
{
mismatchcount += (anagram[c]<second[c] ? second[c] - anagram[c]:0);
if (mismatchcount > blankcount)
return false;
matchcount += (anagram[c]<second[c]) ? anagram[c]:second[c];
}
if (matchcount == word.length)
return true;
return false;
}
EDIT:
Here's an even more compact (faster?) version that counts mismatches as it builds the alternate word character array.
private boolean isAnagram(int anagram, char word, int blankcount) {
int mismatchcount = 0;
int c;
int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};
int letter = 0;
for (c = 0; c < word.length; c++) {
letter = ++second[word[c] - 'A'];
mismatchcount += (letter > anagram[word[c]-'A'] ? 1 : 0);
if (mismatchcount > blankcount)
return false;
}
return true;
}
add a comment |
I used samgak's recommendation to check for "mismatches".
Instead of going through the process of checking each letter before continuing, I exit during the process of checking each letter. I thought I could just check mismatches and ignore matchcounting, but couldn't get it to work accurately. More analysis must be needed.
This speeded things up about 20% according to initial tests with an emulator.
I may be able to speed things up even more by checking letters based on letter frequency; check most common letters(LNSTE) before others (ZQJ). Now to figure out how to sort an array by one field in the array, but that's another question.
private boolean isAnagram(int anagram, char word, int blankcount) {
// anagram is letters in term
int matchcount = blankcount;
int mismatchcount = 0;
int c;
int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};
for (c = 0; c < word.length; c++)
second[word[c] - 'A']++;
for (c = 0; c < 26; c++)
{
mismatchcount += (anagram[c]<second[c] ? second[c] - anagram[c]:0);
if (mismatchcount > blankcount)
return false;
matchcount += (anagram[c]<second[c]) ? anagram[c]:second[c];
}
if (matchcount == word.length)
return true;
return false;
}
EDIT:
Here's an even more compact (faster?) version that counts mismatches as it builds the alternate word character array.
private boolean isAnagram(int anagram, char word, int blankcount) {
int mismatchcount = 0;
int c;
int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};
int letter = 0;
for (c = 0; c < word.length; c++) {
letter = ++second[word[c] - 'A'];
mismatchcount += (letter > anagram[word[c]-'A'] ? 1 : 0);
if (mismatchcount > blankcount)
return false;
}
return true;
}
add a comment |
I used samgak's recommendation to check for "mismatches".
Instead of going through the process of checking each letter before continuing, I exit during the process of checking each letter. I thought I could just check mismatches and ignore matchcounting, but couldn't get it to work accurately. More analysis must be needed.
This speeded things up about 20% according to initial tests with an emulator.
I may be able to speed things up even more by checking letters based on letter frequency; check most common letters(LNSTE) before others (ZQJ). Now to figure out how to sort an array by one field in the array, but that's another question.
private boolean isAnagram(int anagram, char word, int blankcount) {
// anagram is letters in term
int matchcount = blankcount;
int mismatchcount = 0;
int c;
int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};
for (c = 0; c < word.length; c++)
second[word[c] - 'A']++;
for (c = 0; c < 26; c++)
{
mismatchcount += (anagram[c]<second[c] ? second[c] - anagram[c]:0);
if (mismatchcount > blankcount)
return false;
matchcount += (anagram[c]<second[c]) ? anagram[c]:second[c];
}
if (matchcount == word.length)
return true;
return false;
}
EDIT:
Here's an even more compact (faster?) version that counts mismatches as it builds the alternate word character array.
private boolean isAnagram(int anagram, char word, int blankcount) {
int mismatchcount = 0;
int c;
int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};
int letter = 0;
for (c = 0; c < word.length; c++) {
letter = ++second[word[c] - 'A'];
mismatchcount += (letter > anagram[word[c]-'A'] ? 1 : 0);
if (mismatchcount > blankcount)
return false;
}
return true;
}
I used samgak's recommendation to check for "mismatches".
Instead of going through the process of checking each letter before continuing, I exit during the process of checking each letter. I thought I could just check mismatches and ignore matchcounting, but couldn't get it to work accurately. More analysis must be needed.
This speeded things up about 20% according to initial tests with an emulator.
I may be able to speed things up even more by checking letters based on letter frequency; check most common letters(LNSTE) before others (ZQJ). Now to figure out how to sort an array by one field in the array, but that's another question.
private boolean isAnagram(int anagram, char word, int blankcount) {
// anagram is letters in term
int matchcount = blankcount;
int mismatchcount = 0;
int c;
int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};
for (c = 0; c < word.length; c++)
second[word[c] - 'A']++;
for (c = 0; c < 26; c++)
{
mismatchcount += (anagram[c]<second[c] ? second[c] - anagram[c]:0);
if (mismatchcount > blankcount)
return false;
matchcount += (anagram[c]<second[c]) ? anagram[c]:second[c];
}
if (matchcount == word.length)
return true;
return false;
}
EDIT:
Here's an even more compact (faster?) version that counts mismatches as it builds the alternate word character array.
private boolean isAnagram(int anagram, char word, int blankcount) {
int mismatchcount = 0;
int c;
int second = {0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0,0,0,0,0, 0};
int letter = 0;
for (c = 0; c < word.length; c++) {
letter = ++second[word[c] - 'A'];
mismatchcount += (letter > anagram[word[c]-'A'] ? 1 : 0);
if (mismatchcount > blankcount)
return false;
}
return true;
}
edited Dec 8 '18 at 15:50
answered Dec 7 '18 at 17:17
Dana Bell
246
246
add a comment |
add a comment |
Thanks for contributing an answer to Stack Overflow!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Some of your past answers have not been well-received, and you're in danger of being blocked from answering.
Please pay close attention to the following guidance:
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53434695%2fneed-faster-word-builder-algorithm%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Try doing all the "non UI" work on a separate thread, if you're not doing it already.
– HB.
Nov 22 '18 at 18:11
This is running in a separate thread. I just need it to run faster. Similar apps do this in about a tenth of the time, though they generally only show the word without details.
– Dana Bell
Nov 22 '18 at 22:54