bes  Updated for version 3.17.0
BESDapResponseCache.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of libdap, A C++ implementation of the OPeNDAP Data
4 // Access Protocol.
5 
6 // Copyright (c) 2011 OPeNDAP, Inc.
7 // Author: James Gallagher <jgallagher@opendap.org>
8 //
9 // This library is free software; you can redistribute it and/or
10 // modify it under the terms of the GNU Lesser General Public
11 // License as published by the Free Software Foundation; either
12 // version 2.1 of the License, or (at your option) any later version.
13 //
14 // This library is distributed in the hope that it will be useful,
15 // but WITHOUT ANY WARRANTY; without even the implied warranty of
16 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 // Lesser General Public License for more details.
18 //
19 // You should have received a copy of the GNU Lesser General Public
20 // License along with this library; if not, write to the Free Software
21 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 //
23 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24 
25 #include "config.h"
26 
27 //#define DODS_DEBUG
28 
29 #include <sys/stat.h>
30 
31 #include <iostream>
32 #include <string>
33 #include <fstream>
34 #include <sstream>
35 
36 #include <DDS.h>
37 #include <ConstraintEvaluator.h>
38 #include <DDXParserSAX2.h>
39 #include <XDRStreamMarshaller.h>
40 #include <XDRStreamUnMarshaller.h>
41 //<XDRFileUnMarshaller.h>
42 #include <debug.h>
43 #include <mime_util.h> // for last_modified_time() and rfc_822_date()
44 #include <util.h>
45 
46 #include "BESDapResponseCache.h"
47 #include "BESDapResponseBuilder.h"
48 #include "BESInternalError.h"
49 
50 #include "BESUtil.h"
51 #include "TheBESKeys.h"
52 #include "BESLog.h"
53 #include "BESDebug.h"
54 
55 #ifdef NDEBUG
56 #undef BESDEBUG
57 #define BESDEBUG(stream, stuff)
58 #endif
59 
60 #define CRLF "\r\n"
61 
62 using namespace std;
63 using namespace libdap;
64 
65 BESDapResponseCache *BESDapResponseCache::d_instance = 0;
66 const string BESDapResponseCache::PATH_KEY = "DAP.ResponseCache.path";
67 const string BESDapResponseCache::PREFIX_KEY = "DAP.ResponseCache.prefix";
68 const string BESDapResponseCache::SIZE_KEY = "DAP.ResponseCache.size";
69 
70 unsigned long BESDapResponseCache::getCacheSizeFromConfig()
71 {
72 
73  bool found;
74  string size;
75  unsigned long size_in_megabytes = 0;
76  TheBESKeys::TheKeys()->get_value(SIZE_KEY, size, found);
77  if (found) {
78  BESDEBUG("dap_response_cache",
79  "BESDapResponseCache::getCacheSizeFromConfig(): Located BES key " << SIZE_KEY<< "=" << size << endl);
80  istringstream iss(size);
81  iss >> size_in_megabytes;
82  }
83  else {
84  // FIXME This should not throw an exception. jhrg 10/20/15
85  string msg = "[ERROR] BESDapResponseCache::getCacheSizeFromConfig() - The BES Key " + SIZE_KEY
86  + " is not set! It MUST be set to utilize the DAP response cache. ";
87  BESDEBUG("dap_response_cache", msg);
88  throw BESInternalError(msg, __FILE__, __LINE__);
89  }
90  return size_in_megabytes;
91 }
92 
93 string BESDapResponseCache::getCachePrefixFromConfig()
94 {
95  bool found;
96  string prefix = "";
97  TheBESKeys::TheKeys()->get_value(PREFIX_KEY, prefix, found);
98  if (found) {
99  BESDEBUG("dap_response_cache",
100  "BESDapResponseCache::getCachePrefixFromConfig(): Located BES key " << PREFIX_KEY<< "=" << prefix << endl);
101  prefix = BESUtil::lowercase(prefix);
102  }
103  else {
104  string msg = "[ERROR] BESDapResponseCache::getCachePrefixFromConfig() - The BES Key " + PREFIX_KEY
105  + " is not set! It MUST be set to utilize the DAP response cache. ";
106  BESDEBUG("dap_response_cache", msg);
107  throw BESInternalError(msg, __FILE__, __LINE__);
108  }
109 
110  return prefix;
111 }
112 
113 string BESDapResponseCache::getCacheDirFromConfig()
114 {
115  bool found;
116 
117  string cacheDir = "";
118  TheBESKeys::TheKeys()->get_value(PATH_KEY, cacheDir, found);
119  if (found) {
120  BESDEBUG("dap_response_cache",
121  "BESDapResponseCache::getCacheDirFromConfig(): Located BES key " << PATH_KEY<< "=" << cacheDir << endl);
122  }
123  else {
124  string msg = "[ERROR] BESDapResponseCache::getCacheDirFromConfig() - The BES Key " + PATH_KEY
125  + " is not set! It MUST be set to utilize the DAP response cache. ";
126  BESDEBUG("dap_response_cache", msg);
127  throw BESInternalError(msg, __FILE__, __LINE__);
128  }
129  return cacheDir;
130 }
131 
132 BESDapResponseCache::BESDapResponseCache()
133 {
134  BESDEBUG("dap_response_cache", "BESDapResponseCache::BESDapResponseCache() - BEGIN" << endl);
135 
136  string cacheDir = getCacheDirFromConfig();
137  string prefix = getCachePrefixFromConfig();
138  unsigned long size_in_megabytes = getCacheSizeFromConfig();
139 
140  BESDEBUG("dap_response_cache",
141  "BESDapResponseCache::BESDapResponseCache() - Cache config params: " << cacheDir << ", " << prefix << ", " << size_in_megabytes << endl);
142 
143  // The required params must be present. If initialize() is not called,
144  // then d_cache will stay null and is_available() will return false.
145  // Also, the directory 'path' must exist, or d_cache will be null.
146  if (!cacheDir.empty() && size_in_megabytes > 0)
147  initialize(cacheDir, prefix, size_in_megabytes);
148 
149  BESDEBUG("dap_response_cache", "BESDapResponseCache::BESDapResponseCache() - END" << endl);
150 }
151 
164 BESDapResponseCache::get_instance(const string &cache_dir, const string &prefix, unsigned long long size)
165 {
166  if (d_instance == 0) {
167  if (dir_exists(cache_dir)) {
168  try {
169  d_instance = new BESDapResponseCache(cache_dir, prefix, size);
170 #ifdef HAVE_ATEXIT
171  atexit(delete_instance);
172 #endif
173  }
174  catch (BESInternalError &bie) {
175  BESDEBUG("dap_response_cache",
176  "BESDapResponseCache::get_instance(): Failed to obtain cache! msg: " << bie.get_message() << endl);
177  }
178  }
179  }
180  BESDEBUG("dap_response_cache", "BESDapResponseCache::get_instance(dir,prefix,size) - d_instance: " << d_instance << endl);
181 
182  return d_instance;
183 }
184 
190 {
191  if (d_instance == 0) {
192  if (dir_exists(getCacheDirFromConfig())) {
193  try {
194  d_instance = new BESDapResponseCache();
195 #ifdef HAVE_ATEXIT
196  atexit(delete_instance);
197 #endif
198  }
199  catch (BESInternalError &bie) {
200  BESDEBUG("dap_response_cache",
201  "BESDapResponseCache::get_instance(): Failed to obtain cache! msg: " << bie.get_message() << endl);
202  }
203  }
204  }
205  BESDEBUG("dap_response_cache", "BESDapResponseCache::get_instance() - d_instance: " << d_instance << endl);
206 
207  return d_instance;
208 }
209 
210 
211 
221 bool BESDapResponseCache::is_valid(const string &cache_file_name, const string &dataset)
222 {
223  // If the cached response is zero bytes in size, it's not valid. This is true
224  // because a DAP data object, even if it has no data still has a metadata part.
225  // jhrg 10/20/15
226 
227  off_t entry_size = 0;
228  time_t entry_time = 0;
229  struct stat buf;
230  if (stat(cache_file_name.c_str(), &buf) == 0) {
231  entry_size = buf.st_size;
232  entry_time = buf.st_mtime;
233  }
234  else {
235  return false;
236  }
237 
238  if (entry_size == 0) return false;
239 
240  time_t dataset_time = entry_time;
241  if (stat(dataset.c_str(), &buf) == 0) {
242  dataset_time = buf.st_mtime;
243  }
244 
245  // Trick: if the d_dataset is not a file, stat() returns error and
246  // the times stay equal and the code uses the cache entry.
247 
248  // TODO Fix this so that the code can get a LMT from the correct
249  // handler.
250  if (dataset_time > entry_time) return false;
251 
252  return true;
253 }
254 
266 void BESDapResponseCache::read_data_from_cache(const string &cache_file_name, DDS *fdds)
267 {
268  BESDEBUG("dap_response_cache", __PRETTY_FUNCTION__ << " Opening cache file: " << cache_file_name << endl);
269 
270  ifstream data(cache_file_name.c_str());
271 
272  // Rip off the MIME headers from the response if they are present
273  string mime = get_next_mime_header(data);
274  while (!mime.empty()) {
275  mime = get_next_mime_header(data);
276  }
277 
278  // Parse the DDX; throw an exception on error.
279  DDXParser ddx_parser(fdds->get_factory());
280 #if 1
281  // Read the MPM boundary and then read the subsequent headers
282  string boundary = read_multipart_boundary(data);
283 
284  read_multipart_headers(data, "text/xml", dods_ddx);
285 #endif
286  // Parse the DDX, reading up to and including the next boundary.
287  // Return the CID for the matching data part
288  string data_cid;
289  try {
290  ddx_parser.intern_stream(data, fdds, data_cid, boundary);
291  }
292  catch (Error &e) {
293  BESDEBUG("dap_response_cache", "BESDapResponseCache::read_data_from_cache() - [ERROR] DDX Parser Error: " << e.get_error_message() << endl);
294  throw;
295  }
296 #if 1
297  // Munge the CID into something we can work with
298  data_cid = cid_to_header_value(data_cid);
299 
300  // Read the data part's MPM part headers (boundary was read by
301  // DDXParse::intern)
302  read_multipart_headers(data, "application/octet-stream", dods_data_ddx /* old value? dap4_data */, data_cid);
303 #endif
304  // Now read the data
305 
306  // XDRFileUnMarshaller um(data);
307  XDRStreamUnMarshaller um(data);
308  for (DDS::Vars_iter i = fdds->var_begin(); i != fdds->var_end(); i++) {
309  (*i)->deserialize(um, fdds);
310  }
311 }
312 
317 DDS *
318 BESDapResponseCache::get_cached_data_ddx(const string &cache_file_name, BaseTypeFactory *factory,
319  const string &filename)
320 {
321  BESDEBUG("dap_response_cache", __PRETTY_FUNCTION__ << " Reading cache for " << cache_file_name << endl);
322 
323  DDS *fdds = new DDS(factory);
324 
325  fdds->filename(filename);
326 
327  read_data_from_cache(cache_file_name, fdds);
328 
329  fdds->set_factory(0);
330 
331  // mark everything as read. And 'to send.' That is, make sure that when a response
332  // is retrieved from the cache, all of the variables are marked as 'to be sent.'
333  DDS::Vars_iter i = fdds->var_begin();
334  while (i != fdds->var_end()) {
335  (*i)->set_read_p(true);
336  (*i++)->set_send_p(true);
337  }
338 
339  return fdds;
340 }
341 
342 DDS *
343 BESDapResponseCache::cache_dataset(DDS &dds, const string &constraint, BESDapResponseBuilder *rb,
344  ConstraintEvaluator *eval, string &cache_token)
345 {
346  // These are used for the cached or newly created DDS object
347  BaseTypeFactory factory;
348  DDS *fdds;
349 
350  // Build the response_id. Since the response conent is a function of both the dataset AND the constraint,
351  // glue them together to get a unique id for the response.
352  string response_id = dds.filename() + "#" + constraint;
353 
354  // Get the cache filename for this thing.
355  string cache_file_name = get_cache_file_name(response_id, /*mangle*/true);
356 
357  BESDEBUG("dap_response_cache", __PRETTY_FUNCTION__ << " cache_file_name: " << cache_file_name << endl);
358  int fd;
359  try {
360  // If the object in the cache is not valid, remove it. The read_lock will
361  // then fail and the code will drop down to the create_and_lock() call.
362  // is_valid() tests for a non-zero object and for d_dateset newer than
363  // the cached object.
364  if (!is_valid(cache_file_name, dds.filename()))
365  purge_file(cache_file_name);
366 
367  if (get_read_lock(cache_file_name, fd)) {
368  BESDEBUG("dap_response_cache", __PRETTY_FUNCTION__ << " Cache hit (1) for: " << cache_file_name << endl);
369  fdds = get_cached_data_ddx(cache_file_name, &factory, dds.filename());
370  }
371  else if (create_and_lock(cache_file_name, fd)) {
372  // If here, the cache_file_name could not be locked for read access;
373  // try to build it. First make an empty file and get an exclusive lock on it.
374  BESDEBUG("dap_response_cache", __PRETTY_FUNCTION__ << " Caching " << cache_file_name << ", constraint: " << constraint << endl);
375 
376  fdds = new DDS(dds);
377  eval->parse_constraint(constraint, *fdds);
378 
379  if (eval->function_clauses()) {
380  DDS *temp_fdds = eval->eval_function_clauses(*fdds);
381  delete fdds;
382  fdds = temp_fdds;
383  }
384 
385  ofstream data_stream(cache_file_name.c_str());
386  if (!data_stream) {
387  throw BESInternalError("Could not open '" + cache_file_name + "' to write cached response.", __FILE__, __LINE__);
388  }
389 
390 #if 1
391  string start = "dataddx_cache_start", boundary = "dataddx_cache_boundary";
392 
393  // Use a ConstraintEvaluator that has not parsed a CE so the code can use
394  // the send method(s)
395  ConstraintEvaluator eval;
396 
397  // Setting the version to 3.2 causes send_data_ddx to write the MIME headers that
398  // the cache expects.
399  fdds->set_dap_version("3.2");
400 
401  // This is a bit of a hack, but it effectively uses ResponseBuilder to write the
402  // cached object/response without calling the machinery in one of the send_*()
403  // methods. Those methods assume they need to evaluate the BESDapResponseBuilder's
404  // CE, which is not necessary and will alter the values of the send_p property
405  // of the DDS's variables.
406  set_mime_multipart(data_stream, boundary, start, dods_data_ddx, x_plain,
407  last_modified_time(rb->get_dataset_name()));
408  //data_stream << flush;
409  rb->serialize_dap2_data_ddx(data_stream, *fdds, eval, boundary, start);
410  //data_stream << flush;
411 
412  data_stream << CRLF << "--" << boundary << "--" << CRLF;
413 #endif
414 #if 0
415  // FIXME: Remove Lame hack
416  ConstraintEvaluator eval;
417 
418  // Setting the version to 3.2 causes send_data_ddx to write the MIME headers that
419  // the cache expects. FIXME Wrong, but we still need to set the value. Used by the
420  // code that prints the 'DDX'
421  fdds->set_dap_version("3.2");
422 
423  rb->serialize_dap2_data_ddx(data_stream, *fdds, eval, /*boundary*/"unused", /*start*/"unused", /*ce_eval*/true);
424 #endif
425  data_stream.close();
426 
427  // Change the exclusive lock on the new file to a shared lock. This keeps
428  // other processes from purging the new file and ensures that the reading
429  // process can use it.
430  exclusive_to_shared_lock(fd);
431 
432  // Now update the total cache size info and purge if needed. The new file's
433  // name is passed into the purge method because this process cannot detect its
434  // own lock on the file.
435  unsigned long long size = update_cache_info(cache_file_name);
436  if (cache_too_big(size)) update_and_purge(cache_file_name);
437  }
438  // get_read_lock() returns immediately if the file does not exist,
439  // but blocks waiting to get a shared lock if the file does exist.
440  else if (get_read_lock(cache_file_name, fd)) {
441  BESDEBUG("dap_response_cache", __PRETTY_FUNCTION__ << " cache hit (2) for: " << cache_file_name << endl);
442  fdds = get_cached_data_ddx(cache_file_name, &factory, dds.get_dataset_name());
443  }
444  else {
445  throw BESInternalError("Cache error! Unable to acquire DAP Response cache.", __FILE__, __LINE__);
446  }
447  }
448  catch (...) {
449  BESDEBUG("dap_response_cache", __PRETTY_FUNCTION__ << " Caught exception, unlocking cache and re-throw." << endl);
450  // I think this call is not needed. jhrg 10/23/12
451  unlock_cache();
452  throw;
453  }
454 
455  cache_token = cache_file_name; // Set this value-result parameter
456  return fdds;
457 }
458 
exception thrown if inernal error encountered
static string lowercase(const string &s)
Definition: BESUtil.cc:184
virtual std::string get_message()
get the error message for this exception
Definition: BESError.h:97
static BESDapResponseCache * get_instance()
STL namespace.
void get_value(const string &s, string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: BESKeys.cc:481
virtual void serialize_dap2_data_ddx(std::ostream &out, libdap::DDS &dds, libdap::ConstraintEvaluator &eval, const std::string &boundary, const std::string &start, bool ce_eval=true)
static BESKeys * TheKeys()
Definition: TheBESKeys.cc:43
virtual std::string get_dataset_name() const
Get the dataset name.