2018-01-07 15:23:41 +00:00
|
|
|
<?php
|
|
|
|
/**
|
|
|
|
* URL analyzer
|
|
|
|
*
|
|
|
|
* Retreivee the source code of a webpage to extracts
|
|
|
|
* its open graph tags
|
|
|
|
*
|
|
|
|
* @author Pierre HUBERT
|
|
|
|
*/
|
|
|
|
|
|
|
|
class URLAnalyzer {
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Analyze a given URL to extract OpenGraph content
|
|
|
|
*
|
|
|
|
* @param string $url The URL to analyze
|
|
|
|
* @param int $timeout The
|
|
|
|
* @return array Open graph informations in case of success or
|
|
|
|
* empty array in case of failure
|
|
|
|
*/
|
|
|
|
public static function analyze(string $url, int $timeout = 15) : array {
|
|
|
|
|
|
|
|
//Initialize curl
|
|
|
|
$ch = curl_init($url);
|
|
|
|
|
|
|
|
//Set timeout
|
|
|
|
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
|
|
|
|
|
|
|
|
//Get the response
|
|
|
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
|
|
|
|
|
|
|
//Perform the request
|
|
|
|
$source = curl_exec($ch);
|
|
|
|
if(!$source)
|
|
|
|
return array();
|
|
|
|
|
|
|
|
//Analyze response
|
|
|
|
preg_match_all('#<meta (.*?)>#is', $source, $results, PREG_PATTERN_ORDER);
|
|
|
|
|
|
|
|
//Process results
|
|
|
|
$list = array();
|
|
|
|
foreach($results[1] as $entry){
|
|
|
|
|
|
|
|
//Remove spaces
|
|
|
|
$entry = str_replace(array(" =", "= "), "=", $entry);
|
|
|
|
|
|
|
|
//Check if the meta tag represent an interest
|
|
|
|
if(preg_match("/property/", $entry) AND preg_match("/og:/", $entry)){
|
|
|
|
|
|
|
|
//Search for property tag and content tag
|
2018-04-16 14:10:54 +00:00
|
|
|
//"
|
|
|
|
preg_match("#property=[\"]og:(.*?)[\"]#is", $entry, $matches1);
|
|
|
|
preg_match("#content=[\"](.*?)[\"]#is", $entry, $matches2);
|
2018-01-07 15:23:41 +00:00
|
|
|
|
|
|
|
$name = isset($matches1[1]) ? $matches1[1] : 1;
|
|
|
|
$value = isset($matches2[1]) ? $matches2[1] : "";
|
2018-04-16 14:10:54 +00:00
|
|
|
|
|
|
|
//'
|
|
|
|
preg_match("#property=[\']og:(.*?)[\']#is", $entry, $matches1);
|
|
|
|
preg_match("#content=[\'](.*?)[\']#is", $entry, $matches2);
|
|
|
|
|
|
|
|
$name = isset($matches1[1]) ? $matches1[1] : $name;
|
|
|
|
$value = isset($matches2[1]) ? $matches2[1] : $value;
|
|
|
|
|
2018-01-07 15:23:41 +00:00
|
|
|
$list[$name] = $value;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return $list;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|