HPCloud-PHP  1.2.0
PHP bindings for HPCloud and OpenStack services.
 All Classes Namespaces Files Functions Variables Pages
RemoteObject.php
Go to the documentation of this file.
1 <?php
2 /* ============================================================================
3 (c) Copyright 2012 Hewlett-Packard Development Company, L.P.
4 Permission is hereby granted, free of charge, to any person obtaining a copy
5 of this software and associated documentation files (the "Software"), to deal
6 in the Software without restriction, including without limitation the rights to
7 use, copy, modify, merge,publish, distribute, sublicense, and/or sell copies of
8 the Software, and to permit persons to whom the Software is furnished to do so,
9 subject to the following conditions:
10 
11 The above copyright notice and this permission notice shall be included in all
12 copies or substantial portions of the Software.
13 
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 SOFTWARE.
21 ============================================================================ */
22 /**
23  * @file
24  *
25  * Contains the RemoteObject class.
26  */
27 
29 
30 /**
31  * A representation of an object stored in remote Object Storage.
32  *
33  * A remote object is one whose canonical copy is stored in a remote
34  * object storage. It represents a local (and possibly partial) copy of
35  * an object. (Contrast this with HPCloud::Storage::ObjectStorage::Object)
36  *
37  * Depending on how the object was constructed, it may or may not have a
38  * local copy of the entire contents of the file. It may only have the
39  * object's "metadata" (information such as name, type, modification
40  * date, and length of the object). Or it may have all of that in
41  * addition to the entire content of the file.
42  *
43  * Remote objects can be modified locally. Simply modifying an object
44  * will not result in those modifications being stored on the remote
45  * server. The object must be saved (see
46  * HPCloud::Storage::ObjectStorage::Container::save()). When an
47  * object is modified so that its local contents differ from the remote
48  * stored copy, it is marked dirty (see isDirty()).
49  */
50 class RemoteObject extends Object {
51 
52  protected $contentLength = 0;
53  protected $etag = '';
54  protected $lastModified = 0;
55 
56  protected $contentVerification = TRUE;
57  protected $caching = FALSE;
58 
59  /**
60  * All headers received from a remote are stored in this array.
61  * Implementing subclasses can access this array for complete access
62  * to the HTTP headers.
63  *
64  * This will be empty if the object was constructed from JSON, and may
65  * serve as a good indicator that the object does not have all
66  * attributes set.
67  */
68  protected $allHeaders = array();
69 
70  protected $cdnUrl;
71  protected $cdnSslUrl;
72 
73  /**
74  * Create a new RemoteObject from JSON data.
75  *
76  * @param array $data
77  * The JSON data as an array.
78  * @param string $token
79  * The authentication token.
80  * @param $url
81  * The URL to the object on the remote server
82  */
83  public static function newFromJSON($data, $token, $url) {
84 
85  $object = new RemoteObject($data['name']);
86  $object->setContentType($data['content_type']);
87 
88  $object->contentLength = (int) $data['bytes'];
89  $object->etag = (string) $data['hash'];
90  $object->lastModified = strtotime($data['last_modified']);
91 
92  $object->token = $token;
93  $object->url = $url;
94 
95  // FIXME: What do we do about HTTP header data that doesn't come
96  // back in JSON?
97 
98  return $object;
99  }
100 
101  /**
102  * Create a new RemoteObject from HTTP headers.
103  *
104  * This is used to create objects from GET and HEAD requests, which
105  * return all of the metadata inside of the headers.
106  *
107  * @param string $name
108  * The name of the object.
109  * @param array $headers
110  * An associative array of HTTP headers in the exact format
111  * documented by OpenStack's API docs.
112  * @param string $token
113  * The current auth token (used for issuing subsequent requests).
114  * @param string $url
115  * The URL to the object in the object storage. Used for issuing
116  * subsequent requests.
117  * @param string $cdnUrl
118  * The URL to the CDN version of the object. Used for issuing
119  * subsequent requests. If this is set, this object may use
120  * CDN to make subsequent requests. It may also return the
121  * CDN URL when requested.
122  * @param string $cdnSslUrl
123  * The URL to the SSL-protected CDN version of the object.
124  *
125  * @retval HPCloud::Storage::ObjectStorage::RemoteObject
126  * @return \HPCloud\Storage\ObjectStorage\RemoteObject
127  * A new RemoteObject.
128  */
129  public static function newFromHeaders($name, $headers, $token, $url, $cdnUrl = NULL, $cdnSslUrl = NULL) {
130  $object = new RemoteObject($name);
131 
132  //$object->allHeaders = $headers;
133  $object->setHeaders($headers);
134 
135  //throw new \Exception(print_r($headers, TRUE));
136 
137  // Fix inconsistant header.
138  if (isset($headers['ETag'])) {
139  $headers['Etag'] = $headers['ETag'];
140  }
141 
142  $object->setContentType($headers['Content-Type']);
143  $object->contentLength = empty($headers['Content-Length']) ? 0 : (int) $headers['Content-Length'];
144  $object->etag = (string) $headers['Etag']; // ETag is now Etag.
145  $object->lastModified = strtotime($headers['Last-Modified']);
146 
147  // Set the metadata, too.
148  $object->setMetadata(Container::extractHeaderAttributes($headers));
149 
150 
151  // If content encoding and disposition exist, set them on the
152  // object.
153  if (!empty($headers['Content-Disposition'])) {
154  $object->setDisposition($headers['Content-Disposition']);
155 
156  }
157  if (!empty($headers['Content-Encoding'])) {
158  $object->setEncoding($headers['Content-Encoding']);
159  }
160 
161  $object->token = $token;
162  $object->url = $url;
163  $object->cdnUrl = $cdnUrl;
164  $object->cdnSslUrl = $cdnSslUrl;
165 
166  return $object;
167  }
168 
169  /**
170  * Set the URL to this object in a CDN service.
171  *
172  * A CDN may be used to expedite *reading* the object. Write
173  * operations are never performed on a CDN. Since a RemoteObject
174  * can be partially loaded, it is possible that part of the object
175  * is read from a CDN, and part from Swift. However, to accomplish
176  * this would require one to set CDN services in one place, and
177  * not in the other.
178  *
179  * Note that if CDN was set using ObjectStorage::useCDN() or
180  * Container::useCDN(), you needn't call this method. CDN will
181  * be automatically enabled during object construction.
182  *
183  * Setting this to NULL has the effect of turning off CDN for
184  * this object.
185  *
186  * @param string $url
187  * The URL to this object in CDN.
188  * @param string $sslUrl
189  * The SSL URL to this object in CDN.
190  *
191  * @retval HPCloud::Storage::ObjectStorage::RemoteObject
192  * @return \HPCloud\Storage\ObjectStorage\RemoteObject
193  * $this for the current object so it can be used in chaining methods.
194  */
195  public function useCDN($url, $sslUrl) {
196  $this->cdnUrl = $url;
197  $this->cdnSslUrl = $sslUrl;
198 
199  return $this;
200  }
201 
202  /**
203  * Get the URL to this object.
204  *
205  * If this object has been stored remotely, it will have
206  * a valid URL.
207  *
208  * @param boolean $cached
209  * If this value is set to TRUE, this call *may* return the
210  * URL to a cached (CDN) URL. Reading from a cached URL should
211  * be substantially faster than reading from a normal URL. Note,
212  * however, that a container must have CDN enabled on it before
213  * caching can be used, and a CDN must be passed into this
214  * object. See ObjectStorage::useCDN(), Container::useCDN() and
215  * RemoteObject::useCDN(). (Generally, using ObjectStorage::useCDN()
216  * is all you need to do.)
217  * @param boolean $useSSL
218  * FOR CACHED URLS ONLY, there is an option for either SSL or non-SSL
219  * URLs. By default, we use SSL URLs because (a) it's safer, and
220  * (b) it mirrors non-CDN behavior. This can be turned off by setting
221  * $useSSL to FALSE.
222  * @retval string
223  * @return string
224  * A URL to the object. The following considerations apply:
225  * - If the container is public, this URL can be loaded without
226  * authentication. You can, for example, pass the URL to a browser
227  * user agent.
228  * - If a CDN URL has been provided to useCDN() and $cached is TRUE...
229  * - If the container is CDN enabled, a URL to the cache will be returned.
230  * - Otherwise, the Swift URL will be returned.
231  * - If this object has never been saved remotely, then there will be
232  * no URL, and this will return NULL.
233  */
234  public function url($cached = FALSE, $useSSL = TRUE) {
235 
236  if ($cached && !empty($this->cdnUrl)) {
237  return $useSSL ? $this->cdnSslUrl : $this->cdnUrl;
238  }
239  return $this->url;
240  }
241 
242 
243  public function contentLength() {
244  if (!empty($this->content)) {
245  return parent::contentLength();
246  }
247  return $this->contentLength;
248  }
249 
250  public function eTag() {
251 
252  if (!empty($this->content)) {
253  return parent::eTag();
254  }
255 
256  return $this->etag;
257  }
258 
259  /**
260  * Get the modification time, as reported by the server.
261  *
262  * This returns an integer timestamp indicating when the server's
263  * copy of this file was last modified.
264  */
265  public function lastModified() {
266  return $this->lastModified;
267  }
268 
269  public function metadata() {
270  // How do we get this?
271  return $this->metadata;
272  }
273 
274  /**
275  * Set the headers
276  *
277  * @retval HPCloud::Storage::ObjectStorage::RemoteObject
278  * @return \HPCloud\Storage\ObjectStorage\RemoteObject
279  * $this for the current object so it can be used in chaining methods.
280  */
281  public function setHeaders($headers) {
282  $this->allHeaders = array();
283 
284  foreach ($headers as $name => $value) {
285  if (strpos($name, Container::METADATA_HEADER_PREFIX) !== 0) {
286  $this->allHeaders[$name] = $value;
287  }
288  }
289 
290  return $this;
291  }
292 
293  /**
294  * Get the HTTP headers sent by the server.
295  *
296  * @attention EXPERT.
297  *
298  * This returns the array of minimally processed HTTP headers that
299  * were sent from the server.
300  *
301  * @retval array
302  * @return array
303  * An associative array of header names and values.
304  */
305  public function headers() {
306  return $this->allHeaders;
307  }
308 
309  public function additionalHeaders($mergeAll = FALSE) {
310  // Any additional headers will be set. Note that $this->headers will contain
311  // some headers that are NOT additional. But we do not know which headers are
312  // additional and which are from Swift because Swift does not commit to using
313  // a specific set of headers.
314  if ($mergeAll) {
315  $additionalHeaders = parent::additionalHeaders() + $this->allHeaders;
317  }
318  else {
319  $additionalHeaders = parent::additionalHeaders();
320  }
321 
322  return $additionalHeaders;
323  }
324 
325  protected $reservedHeaders = array(
326  'etag' => TRUE, 'content-length' => TRUE,
327  'x-auth-token' => TRUE,
328  'transfer-encoding' => TRUE,
329  'x-trans-id' => TRUE,
330  );
331 
332  /**
333  * Filter the headers.
334  *
335  * @retval HPCloud::Storage::ObjectStorage::RemoteObject
336  * @return \HPCloud\Storage\ObjectStorage\RemoteObject
337  * $this for the current object so it can be used in chaining methods.
338  */
339  public function filterHeaders(&$headers) {
340  $unset = array();
341  foreach ($headers as $name => $value) {
342  $lower = strtolower($name);
343  if (isset($this->reservedHeaders[$lower])) {
344  $unset[] = $name;
345  }
346  }
347  foreach ($unset as $u) {
348  unset($headers[$u]);
349  }
350 
351  return $this;
352  }
353 
354  /**
355  * Given an array of header names.
356  *
357  * This will remove the given headers from the existing headers.
358  * Both additional headers and the original headers from the
359  * server are affected here.
360  *
361  * Note that you cannot remove metadata through this mechanism,
362  * as it is managed using the metadata() methods.
363  *
364  * @attention
365  * Many headers are generated automatically, such as
366  * Content-Type and Content-Length. Removing these
367  * will simply result in their being regenerated.
368  *
369  * @param array $keys
370  * The header names to be removed.
371  *
372  * @retval HPCloud::Storage::ObjectStorage::RemoteObject
373  * @return \HPCloud\Storage\ObjectStorage\RemoteObject
374  * $this for the current object so it can be used in chaining methods.
375  */
376  public function removeHeaders($keys) {
377  foreach ($keys as $key) {
378  unset($this->allHeaders[$key]);
379  unset($this->additionalHeaders[$key]);
380  }
381 
382  return $this;
383  }
384 
385  /**
386  * Get the content of this object.
387  *
388  * Since this is a proxy object, calling content() will cause the
389  * object to be fetched from the remote data storage. The result will
390  * be delivered as one large string.
391  *
392  * The file size, content type, etag, and modification date of the
393  * object are all updated during this command, too. This accounts for
394  * the possibility that the content was modified externally between
395  * the time this object was constructed and the time this method was
396  * executed.
397  *
398  * Be wary of using this method with large files.
399  *
400  * @retval string
401  * @return string
402  * The contents of the file as a string.
403  * @throws \HPCloud\Transport\FileNotFoundException
404  * when the requested content cannot be located on the remote
405  * server.
406  * @throws \HPCloud\Exception
407  * when an unknown exception (usually an abnormal network condition)
408  * occurs.
409  */
410  public function content() {
411 
412  // XXX: This allows local overwrites. Is this a good idea?
413  if (!empty($this->content)) {
414  return $this->content;
415  }
416 
417  // Get the object, content included.
418  $response = $this->fetchObject(TRUE);
419 
420  $content = $response->content();
421 
422  // Checksum the content.
423  // XXX: Right now the md5 is done even if checking is turned off.
424  // Should fix that.
425  $check = md5($content);
426  if ($this->isVerifyingContent() && $check != $this->etag()) {
427  throw new ContentVerificationException("Checksum $check does not match Etag " . $this->etag());
428  }
429 
430  // If we are caching, set the content locally when we retrieve
431  // remotely.
432  if ($this->isCaching()) {
433  $this->setContent($content);
434  }
435 
436  return $content;
437  }
438 
439  /**
440  * Get the content of this object as a file stream.
441  *
442  * This is useful for large objects. Such objects should not be read
443  * into memory all at once (as content() does), but should instead be
444  * made available as an input stream.
445  *
446  * PHP offers low-level stream support in the form of PHP stream
447  * wrappers, and this mechanism is used internally whenever available.
448  *
449  * If there is a local copy of the content, the stream will be read
450  * out of the content as if it were a temp-file backed in-memory
451  * resource. To ignore the local version, pass in TRUE for the
452  * $refresh parameter.
453  *
454  * If the content is coming from a remote copy, the stream will be
455  * read directly from the underlying IO stream.
456  *
457  * Each time stream() is called, a new stream is created. In most
458  * cases, this results in a new HTTP transaction (unless $refresh is
459  * FALSE and the content is already stored locally).
460  *
461  * The stream is read-only.
462  *
463  * @param boolean $refresh
464  * If this is set to TRUE, any existing local modifications will be ignored
465  * and the content will be refreshed from the server. Any
466  * local changes to the object will be discarded.
467  * @retval resource
468  * @return resource
469  * A handle to the stream, which is already opened and positioned at
470  * the beginning of the stream.
471  */
472  public function stream($refresh = FALSE) {
473 
474  // If we're working on local content, return that content wrapped in
475  // a fake IO stream.
476  if (!$refresh && isset($this->content)) {
477  return $this->localFileStream();
478  }
479 
480  // Otherwise, we fetch a fresh version from the remote server and
481  // return its stream handle.
482  $response = $this->fetchObject(TRUE);
483 
484  return $response->file();
485  }
486 
487  /**
488  * Transform a local copy of content into a file stream.
489  *
490  * This buffers the content into a stream resource and then returns
491  * the stream resource. The resource is not used internally, and its
492  * data is never written back to the remote object storage.
493  */
494  protected function localFileStream() {
495 
496  $tmp = fopen('php://temp', 'rw');
497  fwrite($tmp, $this->content(), $this->contentLength());
498  rewind($tmp);
499 
500  return $tmp;
501  }
502 
503  /**
504  * Enable or disable content caching.
505  *
506  * If a RemoteObject is set to cache then the first time content() is
507  * called, its results will be cached locally. This is very useful for
508  * small files whose content is accessed repeatedly, but can be a
509  * cause of memory consumption for larger files.
510  *
511  * If caching settings are changed after content is retrieved, the
512  * already retrieved content will not be affected, though any
513  * subsequent requests will use the new caching settings. That is,
514  * existing cached content will not be removed if caching is turned
515  * off.
516  *
517  * @param boolean $enabled
518  * If this is TRUE, caching will be enabled. If this is FALSE,
519  * caching will be disabled.
520  *
521  * @retval HPCloud::Storage::ObjectStorage::RemoteObject
522  * @return \HPCloud\Storage\ObjectStorage\RemoteObject
523  * $this so the method can be used in chaining.
524  */
525  public function setCaching($enabled) {
526  $this->caching = $enabled;
527  return $this;
528  }
529 
530  /**
531  * Indicates whether this object caches content.
532  *
533  * Importantly, this indicates whether the object <i>will</i> cache
534  * its contents, not whether anything is actually cached.
535  *
536  * @retval boolean
537  * @return boolean
538  * TRUE if caching is enabled, FALSE otherwise.
539  */
540  public function isCaching() {
541  return $this->caching;
542  }
543 
544  /**
545  * Enable or disable content verification (checksum/md5).
546  *
547  * The default behavior of a RemoteObject is to verify that the MD5
548  * provided by the server matches the locally generated MD5 of the
549  * file contents.
550  *
551  * If content verification is enabled, then whenever the content is
552  * fetched from the remote server, its checksum is calculated and
553  * tested against the ETag value. This provides a layer of assurance
554  * that the payload of the HTTP request was not altered during
555  * transmission.
556  *
557  * This featured can be turned off, which is sometimes necessary on
558  * systems that do not correctly produce MD5s. Turning this off might
559  * also provide a small performance improvement on large files, but at
560  * the expense of security.
561  *
562  * @param boolean $enabled
563  * If this is TRUE, content verification is performed. The content
564  * is hashed and checked against a server-supplied MD5 hashcode. If
565  * this is FALSE, no checking is done.
566  *
567  * @retval HPCloud::Storage::ObjectStorage::RemoteObject
568  * @return \HPCloud\Storage\ObjectStorage\RemoteObject
569  * $this so the method can be used in chaining.
570  */
571  public function setContentVerification($enabled) {
572  $this->contentVerification = $enabled;
573  return $this;
574  }
575 
576  /**
577  * Indicate whether this object verifies content (checksum).
578  *
579  * When content verification is on, RemoteObject attemts to perform a
580  * checksum on the object, calculating the MD5 hash of the content
581  * returned by the remote server, and comparing that to the server's
582  * supplied ETag hash.
583  *
584  * @retval boolean
585  * @return boolean
586  * TRUE if this is verifying, FALSE otherwise.
587  */
588  public function isVerifyingContent() {
589  return $this->contentVerification;
590  }
591 
592  /**
593  * Check whether there are unsaved changes.
594  *
595  * An object is marked "dirty" if it has been altered
596  * locally in such a way that it no longer matches the
597  * remote version.
598  *
599  * The practical definition of dirtiness, for us, is this: An object
600  * is dirty if and only if (a) it has locally buffered content AND (b)
601  * the checksum of the local content does not match the checksom of
602  * the remote content.
603  *
604  * Not that minor differences, such as altered character encoding, may
605  * change the checksum value, and thus (correctly) mark the object as
606  * dirty.
607  *
608  * The RemoteObject implementation does not internally check dirty
609  * markers. It is left to implementors to ensure that dirty content is
610  * written to the remote server when desired.
611  *
612  * To replace dirty content with a clean copy, see refresh().
613  */
614  public function isDirty() {
615 
616  // If there is no content, the object can't be dirty.
617  if (!isset($this->content)) {
618  return FALSE;
619  }
620 
621  // Content is dirty iff content is set, and it is
622  // different from the original content. Note that
623  // we are using the etag from the original headers.
624  if ($this->etag != md5($this->content)) {
625  return TRUE;
626  }
627 
628  return FALSE;
629  }
630 
631  /**
632  * Rebuild the local object from the remote.
633  *
634  * This refetches the object from the object store and then
635  * reconstructs the present object based on the refreshed data.
636  *
637  * WARNING: This will destroy any unsaved local changes. You can use
638  * isDirty() to determine whether or not a local change has been made.
639  *
640  * @param boolean $fetchContent
641  * If this is TRUE, the content will be downloaded as well.
642  *
643  * @retval HPCloud::Storage::ObjectStorage::RemoteObject
644  * @return \HPCloud\Storage\ObjectStorage\RemoteObject
645  * $this for the current object so it can be used in chaining methods.
646  */
647  public function refresh($fetchContent = FALSE) {
648 
649  // Kill old content.
650  unset($this->content);
651 
652  $response = $this->fetchObject($fetchContent);
653 
654 
655  if ($fetchContent) {
656  $this->setContent($response->content());
657  }
658 
659  return $this;
660  }
661 
662  /**
663  * Helper function for fetching an object.
664  *
665  * @param boolean $fetchContent
666  * If this is set to TRUE, a GET request will be issued, which will
667  * cause the remote host to return the object in the response body.
668  * The response body is not handled, though. If this is set to
669  * FALSE, a HEAD request is sent, and no body is returned.
670  * @retval HPCloud::Transport::Response
671  * @return \HPCloud\Transport\Response
672  * containing the object metadata and (depending on the
673  * $fetchContent flag) optionally the data.
674  */
675  protected function fetchObject($fetchContent = FALSE) {
676  $method = $fetchContent ? 'GET' : 'HEAD';
677 
678  $client = \HPCloud\Transport::instance();
679  $headers = array(
680  'X-Auth-Token' => $this->token,
681  );
682 
683  if (empty($this->cdnUrl)) {
684  $response = $client->doRequest($this->url, $method, $headers);
685  }
686  else {
687  $response = $client->doRequest($this->cdnUrl, $method, $headers);
688  }
689 
690  if ($response->status() != 200) {
691  throw new \HPCloud\Exception('An unknown exception occurred during transmission.');
692  }
693 
694  $this->extractFromHeaders($response);
695 
696  return $response;
697  }
698 
699  /**
700  * Extract information from HTTP headers.
701  *
702  * This is used internally to set object properties from headers.
703  *
704  * @retval HPCloud::Storage::ObjectStorage::RemoteObject
705  * @return \HPCloud\Storage\ObjectStorage\RemoteObject
706  * $this for the current object so it can be used in chaining methods.
707  */
708  protected function extractFromHeaders($response) {
709  $this->setContentType($response->header('Content-Type', $this->contentType()));
710  $this->lastModified = strtotime($response->header('Last-Modified', 0));
711  $this->etag = $response->header('Etag', $this->etag);
712  $this->contentLength = (int) $response->header('Content-Length', 0);
713 
714  $this->setDisposition($response->header('Content-Disposition', NULL));
715  $this->setEncoding($response->header('Content-Encoding', NULL));
716 
717  // Reset the metadata, too:
718  $this->setMetadata(Container::extractHeaderAttributes($response->headers()));
719 
720  return $this;
721 
722  }
723 }