MLPACK  1.0.11
lsh_search.hpp
Go to the documentation of this file.
1 
38 #ifndef __MLPACK_METHODS_NEIGHBOR_SEARCH_LSH_SEARCH_HPP
39 #define __MLPACK_METHODS_NEIGHBOR_SEARCH_LSH_SEARCH_HPP
40 
41 #include <mlpack/core.hpp>
42 #include <vector>
43 #include <string>
44 
47 
48 namespace mlpack {
49 namespace neighbor {
50 
58 template<typename SortPolicy = NearestNeighborSort>
59 class LSHSearch
60 {
61  public:
83  LSHSearch(const arma::mat& referenceSet,
84  const arma::mat& querySet,
85  const size_t numProj,
86  const size_t numTables,
87  const double hashWidth = 0.0,
88  const size_t secondHashSize = 99901,
89  const size_t bucketSize = 500);
90 
111  LSHSearch(const arma::mat& referenceSet,
112  const size_t numProj,
113  const size_t numTables,
114  const double hashWidth = 0.0,
115  const size_t secondHashSize = 99901,
116  const size_t bucketSize = 500);
117 
136  void Search(const size_t k,
137  arma::Mat<size_t>& resultingNeighbors,
138  arma::mat& distances,
139  const size_t numTablesToSearch = 0);
140 
141  // Returns a string representation of this object.
142  std::string ToString() const;
143 
144  private:
158  void BuildHash();
159 
171  void ReturnIndicesFromTable(const size_t queryIndex,
172  arma::uvec& referenceIndices,
173  size_t numTablesToSearch);
174 
182  double BaseCase(const size_t queryIndex, const size_t referenceIndex);
183 
196  void InsertNeighbor(const size_t queryIndex, const size_t pos,
197  const size_t neighbor, const double distance);
198 
200  const arma::mat& referenceSet;
201 
203  const arma::mat& querySet;
204 
206  const size_t numProj;
207 
209  const size_t numTables;
210 
212  std::vector<arma::mat> projections; // should be [numProj x dims] x numTables
213 
215  arma::mat offsets; // should be numProj x numTables
216 
218  double hashWidth;
219 
221  const size_t secondHashSize;
222 
224  arma::vec secondHashWeights;
225 
227  const size_t bucketSize;
228 
231 
233  arma::Mat<size_t> secondHashTable;
234 
237  arma::Col<size_t> bucketContentSize;
238 
241  arma::Col<size_t> bucketRowInHashTable;
242 
244  arma::mat* distancePtr;
245 
247  arma::Mat<size_t>* neighborPtr;
248 }; // class LSHSearch
249 
250 }; // namespace neighbor
251 }; // namespace mlpack
252 
253 // Include implementation.
254 #include "lsh_search_impl.hpp"
255 
256 #endif
const arma::mat & referenceSet
Reference dataset.
Definition: lsh_search.hpp:200
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: load.hpp:31
const arma::mat & querySet
Query dataset (may not be given).
Definition: lsh_search.hpp:203
void BuildHash()
This function builds a hash table with two levels of hashing as presented in the paper.
const size_t secondHashSize
The big prime representing the size of the second hash.
Definition: lsh_search.hpp:221
metric::SquaredEuclideanDistance metric
Instantiation of the metric.
Definition: lsh_search.hpp:230
std::vector< arma::mat > projections
The std::vector containing the projection matrix of each table.
Definition: lsh_search.hpp:212
const size_t bucketSize
The bucket size of the second hash.
Definition: lsh_search.hpp:227
arma::Col< size_t > bucketContentSize
The number of elements present in each hash bucket; should be secondHashSize.
Definition: lsh_search.hpp:237
arma::Mat< size_t > * neighborPtr
The pointer to the nearest neighbor indices.
Definition: lsh_search.hpp:247
double BaseCase(const size_t queryIndex, const size_t referenceIndex)
This is a helper function that computes the distance of the query to the neighbor candidates and appr...
The LSHSearch class – This class builds a hash on the reference set and uses this hash to compute th...
Definition: lsh_search.hpp:59
const size_t numProj
The number of projections.
Definition: lsh_search.hpp:206
std::string ToString() const
arma::mat offsets
The list of the offset &#39;b&#39; for each of the projection for each table.
Definition: lsh_search.hpp:215
The L_p metric for arbitrary integer p, with an option to take the root.
Definition: lmetric.hpp:73
LSHSearch(const arma::mat &referenceSet, const arma::mat &querySet, const size_t numProj, const size_t numTables, const double hashWidth=0.0, const size_t secondHashSize=99901, const size_t bucketSize=500)
This function initializes the LSH class.
void InsertNeighbor(const size_t queryIndex, const size_t pos, const size_t neighbor, const double distance)
This is a helper function that efficiently inserts better neighbor candidates into an existing set of...
const size_t numTables
The number of hash tables.
Definition: lsh_search.hpp:209
arma::Mat< size_t > secondHashTable
The final hash table; should be (< secondHashSize) x bucketSize.
Definition: lsh_search.hpp:233
void ReturnIndicesFromTable(const size_t queryIndex, arma::uvec &referenceIndices, size_t numTablesToSearch)
This function takes a query and hashes it into each of the hash tables to get keys for the query and ...
arma::vec secondHashWeights
The weights of the second hash.
Definition: lsh_search.hpp:224
void Search(const size_t k, arma::Mat< size_t > &resultingNeighbors, arma::mat &distances, const size_t numTablesToSearch=0)
Compute the nearest neighbors and store the output in the given matrices.
double hashWidth
The hash width.
Definition: lsh_search.hpp:218
arma::Col< size_t > bucketRowInHashTable
For a particular hash value, points to the row in secondHashTable corresponding to this value...
Definition: lsh_search.hpp:241
arma::mat * distancePtr
The pointer to the nearest neighbor distances.
Definition: lsh_search.hpp:244