- <?php 
- /* 
-  * Copyright (C) 2017 Karmabunny Pty Ltd. 
-  * 
-  * This file is a part of SproutCMS. 
-  * 
-  * SproutCMS is free software: you can redistribute it and/or modify it under the terms 
-  * of the GNU General Public License as published by the Free Software Foundation, either 
-  * version 2 of the License, or (at your option) any later version. 
-  * 
-  * For more information, visit <http://getsproutcms.com>. 
-  */ 
-   
- namespace Sprout\Helpers; 
-   
- use DOMDocument; 
-   
- use Kohana; 
-   
-   
- class WorkerLinkChecker extends WorkerBase 
- { 
-     protected $job_name = 'Link Checker'; 
-   
-   
-     protected $metric_names = array( 
-         1 => 'Total pages', 
-         2 => 'Pages processed', 
-         3 => 'Bad links found', 
-     ); 
-   
-   
-     /** 
-     * Do stuff 
-     **/ 
-     public function run($email_address = null) 
-     { 
-         $q = "SELECT page.id, page.subsite_id, page.name, MAX(rev.id) AS rev_id 
-             FROM ~pages AS page 
-             INNER JOIN ~page_revisions AS rev ON rev.page_id = page.id 
-                 AND rev.status = 'live' AND rev.type = 'standard' 
-             WHERE page.active = 1 
-             GROUP BY page.id 
-             ORDER BY page.id"; 
-         $res = Pdb::query($q, [], 'map-arr'); 
-   
-         // Fetch and collate rich text widgets to produce page text 
-             $rev_ids = []; 
-             foreach ($res as &$row) { 
-                 $row['text'] = ''; 
-                 $rev_ids[] = (int) $row['rev_id']; 
-             } 
-             $rev_ids = implode(', ', $rev_ids); 
-   
-             $q = "SELECT rev.id AS rev_id, widget.settings 
-                 FROM ~page_revisions AS rev 
-                 INNER JOIN ~page_widgets AS widget ON rev.id = widget.page_revision_id 
-                     AND widget.area_id = 1 AND widget.type = 'RichText' 
-                 WHERE rev.id IN ({$rev_ids}) 
-                 ORDER BY widget.record_order"; 
-             $widgets = Pdb::q($q, [], 'pdo'); 
-             foreach ($widgets as $widget) { 
-                 foreach ($res as &$row) { 
-                     if ($row['rev_id'] == $widget['rev_id']) { 
-                         if ($row['text']) $row['text'] .= "\n"; 
-                         $row['text'] .= $settings['text']; 
-                         break; 
-                     } 
-                 } 
-             } 
-             $widgets->closeCursor(); 
-         } 
-   
-         Worker ::message('Found ' . count($res) . ' page(s) total');
-         Worker ::metric(1, count($res));
-         Worker::metric(2, 0); 
-         Worker::metric(3, 0); 
-   
-         $processed = 0; 
-         $found = 0; 
-         foreach ($res as $row) { 
-             Worker::message("Checking page # {$row['id']}; '{$row['name']}'"); 
-             $processed++; 
-             $found += $this->checkPage($row, $errs); 
-             Worker::metric(2, $processed); 
-             Worker::metric(3, $found); 
-         } 
-   
-         Worker::message(''); 
-         Worker ::message(count($errs) . ' pages have bad link(s)');
-         Worker::message($found . ' bad link(s) total'); 
-         Worker::message(''); 
-   
-             Worker::message("Preparing HTML report"); 
-   
-             $view = new View('sprout/email/link_checker'); 
-             $view->errs = $errs; 
-             $view = $view->render(); 
-   
-   
-             Worker::message("Preparing CSV report"); 
-   
-             $csv = $this->buildCsv($errs); 
-   
-   
-             Worker::message(''); 
-             Worker::message("Sending reports via email"); 
-   
-             if ($email_address) { 
-                     'name' => 'Unknown user', 
-                     'email' => $email_address, 
-                 )); 
-             } else { 
-                 $ops = AdminPerms::getOperatorsWithAccess('access_reportemail'); 
-             } 
-   
-             $sent = 0; 
-             foreach ($ops as $row) { 
-                 if ($row['email'] == '') continue; 
-   
-                 $mail = new Email(); 
-                 $mail->AddAddress($row['email']); 
-                 $mail->Subject = 'Link checker report for site ' . Kohana::config('sprout.site_title'); 
-                 $mail->SkinnedHTML($view); 
-                 $mail->AddStringAttachment($csv, 'link_checker_report_' . date('Y_m_d') . '.csv', 'base64', 'text/csv'); 
-                 $result = $mail->Send(); 
-   
-                 if ($result) { 
-                     Worker::message("Sent report to {$row['name']} ({$row['email']})"); 
-                     $sent++; 
-                 } else { 
-                     Worker::message("Sending of report to {$row['name']} ({$row['email']}) failed!"); 
-                 } 
-             } 
-   
-             Worker::message("{$sent} email(s) sent successfully."); 
-         } 
-   
-         Worker::message(''); 
-         Worker::success(); 
-     } 
-   
-   
-     /** 
-     * Checks a single page 
-     **/ 
-     private function checkPage(&$row, &$errs) 
-     { 
-         $dom = new DOMDocument(); 
-         if (! @$dom->loadHTML($row['text'])) return; 
-   
-         $resultname = $row['id'] . ':' . $row['subsite_id'] . ':' . $row['name']; 
-   
-         $as = $dom->getElementsByTagName('a'); 
-         $numfound = 0; 
-         foreach ($as as $elem) { 
-             $href = $elem->getAttribute('href'); 
-   
-             $found = $this->checkUrl($href, $row['subsite_id']); 
-   
-             if ($found !== true) { 
-                 $errs[$resultname][] = array('page_id' => $row['id'], 'page_name' => $row['name'], 'link_href' => $href, 'link_text' => $elem->textContent, 'err' => $found); 
-                 $numfound++; 
-             } 
-         } 
-   
-   
-         return $numfound; 
-     } 
-   
-   
-     /** 
-     * Returns TRUE if the given URL is found, a string of the error message if the URL was not found. 
-     **/ 
-     public function checkUrl($href, $subsite_id = 1) 
-     { 
-   
-         if (preg_match('/^(javascript|mailto|news|irc|file|data|sms|tel|callto|skype|chrome|about|ftp):/i', $href)) { 
-             return true; 
-         } 
-   
-             $href =-  Subsites ::getAbsRoot($subsite_id) . trim($href, '/');
 
-         } 
-   
-             return '599 Not a URL'; 
-         } 
-   
-         // If behind a proxy, we cannot see ourselves properly. 
-         // TODO: Try to think up a way this could be made to work. 
-         if (preg_match('!://localhost/!', $href)-  and  $_SERVER['SERVER_PORT'] != 80) {
 
-             return true; 
-         } 
-   
-   
-         // TODO This whole things should be using HttpReq but I'm in a rush rn. 
-   
-             'method' => 'HEAD', 
-             'follow_location' => true, 
-             'ignore_errors' => false, 
-             'user_agent' => 'SproutLinkChecker/' .-  Sprout ::getVersion() . ' (PHP/' . phpversion() . ')',
 
-         )); 
-             'cafile' => APPPATH . 'cacert.pem', 
-         ); 
-   
-   
-         $h = @fopen($href, 'r', false, $context); 
-   
-         if ($h === false) { 
-             if (empty($http_response_header)) { 
-                 return '599 Not a URL'; 
-             } 
-   
-             foreach ($http_response_header as $hdr) { 
-                 if (strpos($hdr, 'HTTP') === 0) { 
-                     $status_line = $hdr; 
-                 } 
-             } 
-   
-             if (! preg_match('/([0-9][0-9][0-9]).*$/', $status_line, $matches)) { 
-                 return '599 Invalid response'; 
-             } 
-   
-             list($message, $code) = $matches; 
-   
-             if ($code >= 400 and $code <= 599) { 
-                 return $message; 
-             } 
-   
-             return '599 Network Error'; 
-         } 
-   
-   
-         return true; 
-     } 
-   
-   
-     /** 
-     * Return a CSV for the specified errors 
-     **/ 
-     private function buildCsv(&$errs) 
-     { 
-   
-         foreach ($errs as $ee) { 
-             foreach ($ee as $eee) { 
-                 $csv[] = $eee; 
-             } 
-         } 
-   
-         return QueryTo::csv($csv); 
-     } 
-   
- } 
-