Ninja
edit_distance.cc
Go to the documentation of this file.
1 // Copyright 2011 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "edit_distance.h"
16 
17 #include <vector>
18 
19 int EditDistance(const StringPiece& s1,
20  const StringPiece& s2,
21  bool allow_replacements,
22  int max_edit_distance) {
23  // The algorithm implemented below is the "classic"
24  // dynamic-programming algorithm for computing the Levenshtein
25  // distance, which is described here:
26  //
27  // http://en.wikipedia.org/wiki/Levenshtein_distance
28  //
29  // Although the algorithm is typically described using an m x n
30  // array, only two rows are used at a time, so this implemenation
31  // just keeps two separate vectors for those two rows.
32  int m = s1.len_;
33  int n = s2.len_;
34 
35  vector<int> previous(n + 1);
36  vector<int> current(n + 1);
37 
38  for (int i = 0; i <= n; ++i)
39  previous[i] = i;
40 
41  for (int y = 1; y <= m; ++y) {
42  current[0] = y;
43  int best_this_row = current[0];
44 
45  for (int x = 1; x <= n; ++x) {
46  if (allow_replacements) {
47  current[x] = min(previous[x-1] + (s1.str_[y-1] == s2.str_[x-1] ? 0 : 1),
48  min(current[x-1], previous[x])+1);
49  }
50  else {
51  if (s1.str_[y-1] == s2.str_[x-1])
52  current[x] = previous[x-1];
53  else
54  current[x] = min(current[x-1], previous[x]) + 1;
55  }
56  best_this_row = min(best_this_row, current[x]);
57  }
58 
59  if (max_edit_distance && best_this_row > max_edit_distance)
60  return max_edit_distance + 1;
61 
62  current.swap(previous);
63  }
64 
65  return previous[n];
66 }
const char * str_
Definition: string_piece.h:49
StringPiece represents a slice of a string whose memory is managed externally.
Definition: string_piece.h:27
size_t len_
Definition: string_piece.h:50
int EditDistance(const StringPiece &s1, const StringPiece &s2, bool allow_replacements, int max_edit_distance)