<?php /** * URL analyzer * * Retreivee the source code of a webpage to extracts * its open graph tags * * @author Pierre HUBERT */ class URLAnalyzer { /** * Analyze a given URL to extract OpenGraph content * * @param string $url The URL to analyze * @param int $timeout The * @return array Open graph informations in case of success or * empty array in case of failure */ public static function analyze(string $url, int $timeout = 15) : array { //Initialize curl $ch = curl_init($url); //Set timeout curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout); //Get the response curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); //Perform the request $source = curl_exec($ch); if(!$source) return array(); //Analyze response preg_match_all('#<meta (.*?)>#is', $source, $results, PREG_PATTERN_ORDER); //Process results $list = array(); foreach($results[1] as $entry){ //Remove spaces $entry = str_replace(array(" =", "= "), "=", $entry); //Check if the meta tag represent an interest if(preg_match("/property/", $entry) AND preg_match("/og:/", $entry)){ //Search for property tag and content tag //" preg_match("#property=[\"]og:(.*?)[\"]#is", $entry, $matches1); preg_match("#content=[\"](.*?)[\"]#is", $entry, $matches2); $name = isset($matches1[1]) ? $matches1[1] : 1; $value = isset($matches2[1]) ? $matches2[1] : ""; //' preg_match("#property=[\']og:(.*?)[\']#is", $entry, $matches1); preg_match("#content=[\'](.*?)[\']#is", $entry, $matches2); $name = isset($matches1[1]) ? $matches1[1] : $name; $value = isset($matches2[1]) ? $matches2[1] : $value; $list[$name] = $value; } } return $list; } }