array('activity', 'sport'), 'business' => array('bar', 'company', 'cafe', 'hotel', 'restaurant'), 'group' => array('cause', 'sports_league', 'sports_team'), 'organization' => array('band', 'government', 'non_profit', 'school', 'university'), 'person' => array('actor', 'athlete', 'author', 'director', 'musician', 'politician', 'public_figure'), 'place' => array('city', 'country', 'landmark', 'state_province'), 'product' => array('album', 'book', 'drink', 'food', 'game', 'movie', 'product', 'song', 'tv_show'), 'website' => array('blog', 'website'), ); /** * Holds all the Open Graph values we've parsed from a page * */ private $_values = array(); /** * Fetches a URI and parses it for Open Graph data, returns * false on error. * * @param $URI URI to page to parse for Open Graph data * @return OpenGraph */ static public function fetch($URI) { $curl = curl_init($URI); curl_setopt($curl, CURLOPT_FAILONERROR, true); curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true); curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); curl_setopt($curl, CURLOPT_TIMEOUT, 15); curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false); curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($curl, CURLOPT_USERAGENT, $_SERVER['HTTP_USER_AGENT']); $response = curl_exec($curl); curl_close($curl); if (!empty($response)) { return self::_parse($response); } else { return false; } } /** * Parses HTML and extracts Open Graph data, this assumes * the document is at least well formed. * * @param $HTML HTML to parse * @return OpenGraph */ static private function _parse($HTML) { $old_libxml_error = libxml_use_internal_errors(true); $doc = new DOMDocument(); $doc->loadHTML($HTML); libxml_use_internal_errors($old_libxml_error); $tags = $doc->getElementsByTagName('meta'); if (!$tags || $tags->length === 0) { return false; } $page = new self(); $nonOgDescription = null; foreach ($tags AS $tag) { if ($tag->hasAttribute('property') && strpos($tag->getAttribute('property'), 'og:') === 0) { $key = strtr(substr($tag->getAttribute('property'), 3), '-', '_'); $page->_values[$key] = $tag->getAttribute('content'); } //Added this if loop to retrieve description values from sites like the New York Times who have malformed it. if ($tag ->hasAttribute('value') && $tag->hasAttribute('property') && strpos($tag->getAttribute('property'), 'og:') === 0) { $key = strtr(substr($tag->getAttribute('property'), 3), '-', '_'); $page->_values[$key] = $tag->getAttribute('value'); } //Based on modifications at https://github.com/bashofmann/opengraph/blob/master/src/OpenGraph/OpenGraph.php if ($tag->hasAttribute('name') && $tag->getAttribute('name') === 'description') { $nonOgDescription = $tag->getAttribute('content'); } } //Based on modifications at https://github.com/bashofmann/opengraph/blob/master/src/OpenGraph/OpenGraph.php if (!isset($page->_values['title'])) { $titles = $doc->getElementsByTagName('title'); if ($titles->length > 0) { $page->_values['title'] = $titles->item(0)->textContent; } } if (!isset($page->_values['description']) && $nonOgDescription) { $page->_values['description'] = $nonOgDescription; } //Fallback to use image_src if ogp::image isn't set. if (!isset($page->values['image'])) { $domxpath = new DOMXPath($doc); $elements = $domxpath->query("//link[@rel='image_src']"); if ($elements->length > 0) { $domattr = $elements->item(0)->attributes->getNamedItem('href'); if ($domattr) { $page->_values['image'] = $domattr->value; $page->_values['image_src'] = $domattr->value; } } } if (empty($page->_values)) { return false; } return $page; } /** * Helper method to access attributes directly * Example: * $graph->title * * @param $key Key to fetch from the lookup */ public function __get($key) { if (array_key_exists($key, $this->_values)) { return $this->_values[$key]; } if ($key === 'schema') { foreach (self::$TYPES AS $schema => $types) { if (array_search($this->_values['type'], $types)) { return $schema; } } } } /** * Return all the keys found on the page * * @return array */ public function keys() { return array_keys($this->_values); } /** * Helper method to check an attribute exists * * @param $key */ public function __isset($key) { return array_key_exists($key, $this->_values); } /** * Will return true if the page has location data embedded * * @return boolean Check if the page has location data */ public function hasLocation() { if (array_key_exists('latitude', $this->_values) && array_key_exists('longitude', $this->_values)) { return true; } $address_keys = array('street_address', 'locality', 'region', 'postal_code', 'country_name'); $valid_address = true; foreach ($address_keys AS $key) { $valid_address = ($valid_address && array_key_exists($key, $this->_values)); } return $valid_address; } /** * Iterator code */ private $_position = 0; public function rewind() { reset($this->_values); $this->_position = 0; } public function current() { return current($this->_values); } public function key() { return key($this->_values); } public function next() { next($this->_values); ++$this->_position; } public function valid() { return $this->_position < sizeof($this->_values); } }