Back to index

indicator-appmenu  12.10.0
Classes | Defines | Functions
hudtoken.c File Reference
#include "hudtoken.h"
#include "hudsettings.h"
#include <string.h>

Go to the source code of this file.

Classes

struct  _HudToken
struct  _HudTokenList

Defines

#define TOKEN_LENGTH_LIMIT   32
#define MIN3(a, b, c)   (MIN(MIN((a),(b)),(c)))
#define SEPARATORS   " .->"

Functions

guint hud_token_distance (const HudToken *haystack, const HudToken *needle)
HudToken * hud_token_new (const gchar *str, gssize length)
void hud_token_free (HudToken *token)
static const gchar * hud_token_get_original (HudToken *token)
static HudTokenList * hud_token_list_new_consume_array (GPtrArray *array)
static void hud_token_list_add_string_to_array (GPtrArray *array, const gchar *string)
static void hud_token_list_add_string_list_to_array (GPtrArray *array, HudStringList *list)
HudTokenList * hud_token_list_new_from_string (const gchar *string)
HudTokenList * hud_token_list_new_from_string_list (HudStringList *string_list)
void hud_token_list_free (HudTokenList *list)
guint hud_token_list_distance (HudTokenList *haystack, HudTokenList *needle, const gchar ***matches)
guint hud_token_list_get_length (HudTokenList *list)

Class Documentation

struct _HudToken

Definition at line 25 of file hudtoken.c.

Class Members
guint length
gchar * original
gunichar * str
struct _HudTokenList

Definition at line 209 of file hudtoken.c.

Class Members
gint length
HudToken ** tokens

Define Documentation

#define MIN3 (   a,
  b,
 
)    (MIN(MIN((a),(b)),(c)))

Definition at line 60 of file hudtoken.c.

#define SEPARATORS   " .->"

Definition at line 229 of file hudtoken.c.

#define TOKEN_LENGTH_LIMIT   32

Definition at line 35 of file hudtoken.c.


Function Documentation

guint hud_token_distance ( const HudToken *  haystack,
const HudToken *  needle 
)

Definition at line 74 of file hudtoken.c.

{
  static guint d[TOKEN_LENGTH_LIMIT][TOKEN_LENGTH_LIMIT];
  gunichar h, n;
  gint result;
  gint i, j;

  g_assert (haystack->length < TOKEN_LENGTH_LIMIT && needle->length < TOKEN_LENGTH_LIMIT);
  g_assert (haystack->str[haystack->length] == 0);
  g_assert (needle->str[needle->length] == 0);

  /* This function only performs memory writes to 'd' and calls no other
   * functions.  No pointer to 'd' is ever leaked.  Hopefully the
   * compiler is clever enough to therefore realise that no other memory
   * will be modified during the running of this function and optimise
   * aggressively.
   */

  /* By convention we refer to "add" and "drop" in terms of "mistakes
   * the user made".  The user's token is the one in "needle".  To give
   * examples against the menu item "preferences", if the user typed:
   *
   *  - "prefereces": this has a dropped character
   *
   *  - "prefferences": this has an added character
   *
   *  - "prefefences": this has a swap
   *
   * We organise the matrix with each column (j) corresponding to a
   * character in the menu item text ('haystack') and each row (i)
   * corresponding to a character in the search token ('needle').
   *
   * We modify the Levenshtein algorithm in the following ways:
   *
   *  - configurable penalties for various mistakes (add, drop,
   *    swap).  This is done by replacing additions of '1' with
   *    additions of these configurable values.
   *
   *  - a lower penalties for drops that occur at the end of the token
   *    that the user typed.  For example, "prefer" would be given a
   *    lower penalty for those 5 missing letters than would occur if
   *    they were not missing from the end.
   *
   *    This is done by special-casing the last row (i == nlen) in the
   *    matrix, corresponding to the last character in the search
   *    token.  In this case, we calculate drops at a lower penalty.
   *
   * Implementing the first of these two changes is a simple tweak:
   * instead of adding '1', add the configurable value.
   *
   * Implementing the second one is somewhat more difficult: we could
   * modify the core algorithm, but that would violate the 'pureness' of
   * the dynamic programming approach.
   *
   * Instead, we wait until the 'pure' algorithm is done then instead of
   * only considering the result for the full menu item text (ie: the
   * number in the bottom right corner) we consider all prefixes of the
   * menu item text by scanning the entire bottom row (and adding a
   * penalty according to the number of characters removed).
   */

  /* http://en.wikipedia.org/wiki/Levenshtein_distance#Computing_Levenshtein_distance */
  for (i = 0; i <= needle->length; i++)
    d[i][0] = i * hud_settings.add_penalty;

  for (j = 0; j <= haystack->length; j++)
    d[0][j] = j * hud_settings.drop_penalty;

  for (i = 1; (n = needle->str[i - 1]); i++)
    for (j = 1; (h = haystack->str[j - 1]); j++)
      if (n == h)
        d[i][j] = d[i - 1][j - 1];

      else
        d[i][j] = MIN3(d[i - 1][j - 1] + hud_settings.swap_penalty,
                       d[i    ][j - 1] + hud_settings.drop_penalty,
                       d[i - 1][j    ] + hud_settings.add_penalty);

  /* Noe we consider all prefixes of the menu item text to discover
   * which one gives us the best score.
   *
   * If we end up picking a result from a column other than the
   * rightmost, it will have had the correct multiple of the end-drop
   * penalty added to it by the time we're done.
   */
  result = d[--i][0];
  for (j = 1; j <= haystack->length; j++)
    result = MIN (d[i][j], result + hud_settings.end_drop_penalty);

  return result;
}

Here is the caller graph for this function:

void hud_token_free ( HudToken *  token)

Definition at line 196 of file hudtoken.c.

{
  g_free (token->original);
  g_free (token->str);
  g_slice_free (HudToken, token);
}

Here is the caller graph for this function:

static const gchar* hud_token_get_original ( HudToken *  token) [static]

Definition at line 204 of file hudtoken.c.

{
  return token->original;
}

Here is the caller graph for this function:

static void hud_token_list_add_string_list_to_array ( GPtrArray *  array,
HudStringList *  list 
) [static]

Definition at line 252 of file hudtoken.c.

Here is the call graph for this function:

Here is the caller graph for this function:

static void hud_token_list_add_string_to_array ( GPtrArray *  array,
const gchar *  string 
) [static]

Definition at line 231 of file hudtoken.c.

{
  while (*string)
    {
      /* strip separators */
      string += strspn (string, SEPARATORS);

      if (*string)
        {
          gint length;

          /* consume a token */
          length = strcspn (string, SEPARATORS);
          g_ptr_array_add (array, hud_token_new (string, length));
          string += length;
        }
    }
}

Here is the call graph for this function:

Here is the caller graph for this function:

guint hud_token_list_distance ( HudTokenList *  haystack,
HudTokenList *  needle,
const gchar ***  matches 
)

Definition at line 296 of file hudtoken.c.

{
  static guint d[32][32];
  gint i, j;

  if (needle->length > haystack->length)
    return G_MAXUINT;

  /* Simply refuse to deal with ridiculous situations.
   *
   * This only happens when the user has more than 32 words in their
   * search or the same happens in a menu item.
   */
  if (haystack->length > 32 || needle->length > 32)
    return G_MAXUINT;

  /* unroll the handling of the first needle term */
  {
    guint cost;

    /* unroll the handling of the first haystack term */
    cost = hud_token_distance (haystack->tokens[0], needle->tokens[0]);
    d[0][0] = cost;

    for (j = 0; j < haystack->length; j++)
      {
        guint take_cost;

        take_cost = hud_token_distance (haystack->tokens[j], needle->tokens[0]);
        cost = MIN (take_cost, cost + 1);
        d[0][j] = cost;
      }
  }

  /* if we have only one needle, this loop won't run at all */
  for (i = 1; i < needle->length; i++)
    {
      guint cost;

      /* unroll the handling of the first haystack term */
      cost = d[i - 1][i - 1] + hud_token_distance (haystack->tokens[i], needle->tokens[i]);
      d[i][i] = cost;

      for (j = i + 1; j < haystack->length; j++)
        {
          guint prev_cost;

          prev_cost = d[i - 1][j - 1];
          /* Only do checking of additional terms of it's possible that
           * we'll come in under the max distance AND beat the cost of
           * just dropping the term.
           *
           * hud_token_distance() could return zero so we need to try it
           * if:
           *
           *   - prev_cost is less than or equal to the max distance
           *     (because then our result could equal the max distance)
           *
           *   - prev_cost is less than or equal to the last cost
           *     Even if it's equal, skipping has a cost and a perfect
           *     match has no cost, so we need to try the match.
           */
          if (prev_cost <= hud_settings.max_distance && prev_cost <= cost)
            {
              guint take_cost;

              take_cost = prev_cost + hud_token_distance (haystack->tokens[j], needle->tokens[i]);
              cost = MIN (take_cost, cost + 1);
            }
          else
            cost = cost + 1;

          d[i][j] = cost;
        }
    }

  /* Discover which terms were matched */
  if (matches)
    {
      *matches = g_new (const gchar *, needle->length + 1);

      j = haystack->length - 1;

      for (i = needle->length - 1; i >= 0; i--)
        {
          while (j > i && d[i][j-1] == d[i][j] - 1)
            j--;

          (*matches)[i] = hud_token_get_original (haystack->tokens[j]);
          j--;
        }

      (*matches)[needle->length] = NULL;
    }

  return d[needle->length - 1][haystack->length - 1];
}

Here is the call graph for this function:

Here is the caller graph for this function:

void hud_token_list_free ( HudTokenList *  list)

Definition at line 283 of file hudtoken.c.

{
  gint i;

  for (i = 0; i < list->length; i++)
    hud_token_free (list->tokens[i]);

  g_free (list->tokens);

  g_slice_free (HudTokenList, list);
}

Here is the call graph for this function:

Here is the caller graph for this function:

guint hud_token_list_get_length ( HudTokenList *  list)

Definition at line 397 of file hudtoken.c.

{
  return list->length;
}

Here is the caller graph for this function:

static HudTokenList* hud_token_list_new_consume_array ( GPtrArray *  array) [static]

Definition at line 216 of file hudtoken.c.

{
  HudTokenList *list;

  list = g_slice_new (HudTokenList);
  list->tokens = (HudToken **) array->pdata;
  list->length = array->len;

  g_ptr_array_free (array, FALSE);

  return list;
}

Here is the caller graph for this function:

HudTokenList* hud_token_list_new_from_string ( const gchar *  string)

Definition at line 263 of file hudtoken.c.

{
  GPtrArray *array;

  array = g_ptr_array_new ();
  hud_token_list_add_string_to_array (array, string);
  return hud_token_list_new_consume_array (array);
}

Here is the call graph for this function:

Here is the caller graph for this function:

HudTokenList* hud_token_list_new_from_string_list ( HudStringList *  string_list)

Definition at line 273 of file hudtoken.c.

{
  GPtrArray *array;

  array = g_ptr_array_new ();
  hud_token_list_add_string_list_to_array (array, string_list);
  return hud_token_list_new_consume_array (array);
}

Here is the call graph for this function:

Here is the caller graph for this function:

HudToken* hud_token_new ( const gchar *  str,
gssize  length 
)

Definition at line 168 of file hudtoken.c.

{
  HudToken *token;
  gchar *normal;
  gchar *folded;
  glong items;

  token = g_slice_new (HudToken);

  token->original = g_strndup (str, length);
  normal = g_utf8_normalize (str, length, G_NORMALIZE_ALL);
  folded = g_utf8_casefold (normal, -1);
  token->str = g_utf8_to_ucs4_fast (folded, -1, &items);
  token->length = items;
  g_free (folded);
  g_free (normal);

  if (!(token->length < TOKEN_LENGTH_LIMIT))
    {
      token->length = 31;
      token->str[31] = 0;
    }

  return token;
}

Here is the caller graph for this function: