New version 6.x-2.9 of Link Checker module

This commit is contained in:
Manuel Cillero 2017-08-05 01:49:58 +02:00
parent 28b61ec2d4
commit cd51ed4595
6 changed files with 52 additions and 11 deletions

View file

@ -390,14 +390,14 @@ function _linkchecker_check_links() {
// run and guess that 2 links can be checked per second with 1 thread, what is
// nevertheless uncommon. The max_execution_time can be used to calculate
// a useful value that is higher, but not totally out of scope and limits the
// query resultset to a resonable size.
// query result set to a reasonable size.
$linkchecker_check_connections_max = variable_get('linkchecker_check_connections_max', 8);
$check_links_max_per_cron_run = ($has_httprl) ? ($linkchecker_check_connections_max * $max_execution_time) : $max_execution_time;
$linkchecker_check_links_interval = variable_get('linkchecker_check_links_interval', 2419200);
$linkchecker_check_useragent = variable_get('linkchecker_check_useragent', 'Drupal (+http://drupal.org/)');
// Connection limit can be overriden via settings.php. Two connections is the
// Connection limit can be overridden via settings.php. Two connections is the
// limit defined in RFC http://www.ietf.org/rfc/rfc2616.txt. Modern browsers
// are typically using 6-8 connections and no more. Never use more and keep
// in mind that you can overload other people servers.
@ -405,7 +405,10 @@ function _linkchecker_check_links() {
// Get URLs for checking.
$links = db_query_range("SELECT * FROM {linkchecker_links} WHERE last_checked < %d AND status = %d ORDER BY last_checked, lid ASC", time() - $linkchecker_check_links_interval, 1, 0, $check_links_max_per_cron_run);
$links_remaining = $links->num_rows;
// D6 database API does not provide a generic way to return the number of rows
// in a result set and $links->num_rows only works with 'mysqli'. The only
// workaround is to run the statement again with a COUNT query.
$links_remaining = db_result(db_query_range("SELECT COUNT(lid) AS num_rows FROM {linkchecker_links} WHERE last_checked < %d AND status = %d ORDER BY last_checked, lid ASC", time() - $linkchecker_check_links_interval, 1, 0, $check_links_max_per_cron_run));
while ($link = db_fetch_object($links)) {
$headers = array();
@ -518,10 +521,14 @@ function _linkchecker_status_handling(&$response, $link) {
// element (naming it with the name attribute), or by any other element
// (naming with the id attribute).
// See http://www.w3.org/TR/html401/struct/links.html
//
// Notes:
// - '#top' is a reserved fragment that must not exist in a page.
if ($response->code == 200
&& !empty($response->data)
&& !empty($response->headers['content-type'])
&& !empty($response->uri['fragment'])
&& !in_array($response->uri['fragment'], array('#top'))
&& in_array($response->headers['content-type'], array('text/html', 'application/xhtml+xml', 'application/xml'))
&& !preg_match('/(\s[^>]*(name|id)(\s+)?=(\s+)?["\'])(' . preg_quote($response->uri['fragment'], '/') . ')(["\'][^>]*>)/i', $response->data)
) {