OpenTREP Logo  0.07.4
C++ Open Travel Request Parsing Library
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
XapianIndexManager.cpp
Go to the documentation of this file.
1 // //////////////////////////////////////////////////////////////////////
2 // Import section
3 // //////////////////////////////////////////////////////////////////////
4 // STL
5 #include <cassert>
6 #include <sstream>
7 #include <string>
8 #include <vector>
9 #include <exception>
10 // Boost
11 #include <boost/filesystem.hpp>
12 #include <boost/random/random_device.hpp>
13 #include <boost/random/uniform_int_distribution.hpp>
14 // Xapian
15 #include <xapian.h>
16 // OpenTrep
17 #include <opentrep/Location.hpp>
18 #include <opentrep/CityDetails.hpp>
19 #include <opentrep/bom/Result.hpp>
22 
23 namespace OPENTREP {
24 
25  // //////////////////////////////////////////////////////////////////////
26  boost::filesystem::path
27  checkTravelDBFilePath (const TravelDBFilePath_T& iTravelDBFilePath) {
28  boost::filesystem::path oTravelDBFilePath (iTravelDBFilePath.begin(),
29  iTravelDBFilePath.end());
30  if (!(boost::filesystem::exists (oTravelDBFilePath)
31  && boost::filesystem::is_directory (oTravelDBFilePath))) {
32  std::ostringstream oStr;
33  oStr << "The file-path to the Xapian database/index ('"
34  << iTravelDBFilePath << "') does not exist or is not a directory.";
35  OPENTREP_LOG_ERROR (oStr.str());
36  throw FileNotFoundException (oStr.str());
37  }
38 
39  return oTravelDBFilePath;
40  }
41 
42  // //////////////////////////////////////////////////////////////////////
43  NbOfDBEntries_T XapianIndexManager::
44  getSize (const TravelDBFilePath_T& iTravelDBFilePath) {
45  NbOfDBEntries_T oNbOfDBEntries = 0;
46 
47  // Check whether the file-path to the Xapian database/index exists
48  // and is a directory.
49  checkTravelDBFilePath (iTravelDBFilePath);
50 
51  // Open the Xapian database
52  Xapian::Database lXapianDatabase (iTravelDBFilePath);
53 
54  // Retrieve the actual number of documents indexed by the Xapian database
55  const Xapian::doccount& lDocCount = lXapianDatabase.get_doccount();
56 
57  //
58  oNbOfDBEntries = static_cast<const NbOfDBEntries_T> (lDocCount);
59 
60  return oNbOfDBEntries;
61  }
62 
63  // //////////////////////////////////////////////////////////////////////
64  NbOfMatches_T XapianIndexManager::
65  drawRandomLocations (const TravelDBFilePath_T& iTravelDBFilePath,
66  const NbOfMatches_T& iNbOfDraws,
67  LocationList_T& ioLocationList) {
68  NbOfMatches_T oNbOfMatches = 0;
69 
70  // Check whether the file-path to the Xapian database/index exists
71  // and is a directory.
72  checkTravelDBFilePath (iTravelDBFilePath);
73 
74  // Open the Xapian database
75  Xapian::Database lXapianDatabase (iTravelDBFilePath);
76 
77  // Retrieve the number of documents indexed by the database
78  const NbOfDBEntries_T& lTotalNbOfDocs = getSize (iTravelDBFilePath);
79 
80  // No need to go further when the Xapian database (index) is empty
81  if (lTotalNbOfDocs == 0) {
82  //
83  OPENTREP_LOG_NOTIFICATION ("The Xapian database is empty");
84  return oNbOfMatches;
85  }
86 
87  // random_device is used as a source of entropy, since the generated
88  // locations are expected not to be reproducible.
89  boost::random::random_device lRandomDevice;
90  boost::random::uniform_int_distribution<> uniformDistrib (1, lTotalNbOfDocs);
91 
92  // Randomly generate document IDs. If the corresponding documents
93  // do not exist in the Xapian database, generate another one.
94  for (NbOfMatches_T idx = 1; idx <= iNbOfDraws; ++idx) {
95  unsigned int lRandomNbInt = uniformDistrib (lRandomDevice);
96  Xapian::docid lDocID = static_cast<Xapian::docid> (lRandomNbInt);
97 
98  // Retrieve the document from the Xapian database/index
99  Xapian::termcount lDocLength = lXapianDatabase.get_doclength (lDocID);
100 
101  unsigned short currentNbOfIterations = 0;
102  while (lDocLength == 0 && currentNbOfIterations <= 100) {
103  // DEBUG
104  OPENTREP_LOG_DEBUG ("[" << idx << "] The " << lDocID
105  << " document ID does not exist in the Xapian "
106  << "database. Another ID will be generated.");
107 
108  // Re-draw another random document ID
109  lRandomNbInt = uniformDistrib (lRandomDevice);
110  lDocID = static_cast<Xapian::docid> (lRandomNbInt);
111 
112  // Retrieve the document from the Xapian database/index
113  lDocLength = lXapianDatabase.get_doclength (lDocID);
114  }
115 
116  // Bad luck: no document ID can be generated so that it corresponds to
117  // an actual document in the Xapian database/index
118  if (lDocLength == 0) {
119  //
120  OPENTREP_LOG_NOTIFICATION ("[" << idx << "] No document ID can be "
121  << "generated so that it corresponds to "
122  << "a document in the Xapian database.");
123 
124  } else {
125  // Retrieve the actual document.
126  const Xapian::Document lDoc = lXapianDatabase.get_document (lDocID);
127  const std::string& lDocDataStr = lDoc.get_data();
128  const RawDataString_T& lDocData = RawDataString_T (lDocDataStr);
129 
130  // Parse the POR details and create the corresponding Location structure
131  const Location& lLocation = Result::retrieveLocation (lDocData);
132 
133  // Add the Location structure to the dedicated list
134  ioLocationList.push_back (lLocation);
135  }
136  }
137 
138  // Consistency check
139  oNbOfMatches = ioLocationList.size();
140  if (oNbOfMatches != iNbOfDraws) {
141  //
142  OPENTREP_LOG_NOTIFICATION (iNbOfDraws << " random draws were expected, "
143  << "but " << oNbOfMatches
144  << " have been generated.");
145  }
146 
147  //
148  return oNbOfMatches;
149  }
150 
151 }
#define OPENTREP_LOG_ERROR(iToBeLogged)
Definition: Logger.hpp:24
#define OPENTREP_LOG_DEBUG(iToBeLogged)
Definition: Logger.hpp:33
#define OPENTREP_LOG_NOTIFICATION(iToBeLogged)
Definition: Logger.hpp:27
unsigned short NbOfMatches_T
static Location retrieveLocation(const Xapian::Document &)
Definition: Result.cpp:266
unsigned int NbOfDBEntries_T
std::list< Location > LocationList_T
boost::filesystem::path checkTravelDBFilePath(const TravelDBFilePath_T &iTravelDBFilePath)