SproutCMS

This is the code documentation for the SproutCMS project

source of /sprout/Helpers/Search.php

  1. <?php
  2. /*
  3.  * Copyright (C) 2017 Karmabunny Pty Ltd.
  4.  *
  5.  * This file is a part of SproutCMS.
  6.  *
  7.  * SproutCMS is free software: you can redistribute it and/or modify it under the terms
  8.  * of the GNU General Public License as published by the Free Software Foundation, either
  9.  * version 2 of the License, or (at your option) any later version.
  10.  *
  11.  * For more information, visit <http://getsproutcms.com>.
  12.  */
  13.  
  14. namespace Sprout\Helpers;
  15.  
  16. use Exception;
  17.  
  18. use Kohana;
  19.  
  20.  
  21. /**
  22. * Functions for front-end search indexing
  23. **/
  24. class Search
  25. {
  26. const WORD_LENGTH = 16;
  27.  
  28. private static $table;
  29. private static $record_id;
  30.  
  31. private static $query_and = false;
  32.  
  33.  
  34.  
  35. /**
  36.   * Selects the currently active index
  37.   * @param string $table Table name, without prefix
  38.   * @param int $record_id Record ID
  39.   * @return void
  40.   */
  41. public static function selectIndex($table, $record_id)
  42. {
  43. Pdb::validateIdentifier($table);
  44.  
  45. self::$table = $table;
  46. self::$record_id = (int) $record_id;
  47. }
  48.  
  49.  
  50. /**
  51.   * Clears the index chosen with selectIndex()
  52.   **/
  53. public static function clearIndex()
  54. {
  55. Pdb::delete(self::$table, ['record_id' => self::$record_id]);
  56. return true;
  57. }
  58.  
  59.  
  60. /**
  61.   * Splits up text, and adds the keywords which are found into the index chosen with selectIndex()
  62.   * Usually should be run from within a transaction
  63.   *
  64.   * @param string $text The text to process.
  65.   * @param float $relevancy_multiplier A multiplier to apply to the relavancy of all keywords found.
  66.   **/
  67. public static function indexText($text, $relevancy_multiplier = 1)
  68. {
  69. $text = iconv('UTF-8', 'ASCII//TRANSLIT', $text);
  70. $text = strtolower($text);
  71. $text = preg_replace('/[^-@a-zA-Z0-9 ]/', '', $text);
  72. $words = explode(' ', $text);
  73.  
  74. $cur_relevancy = 2;
  75.  
  76. $unique_words = array();
  77. foreach ($words as $word) {
  78. if (strlen($word) < 2) continue;
  79. $word = substr($word, 0, self::WORD_LENGTH);
  80.  
  81. if (!isset($unique_words[$word])) $unique_words[$word] = 0.0;
  82. $unique_words[$word] += $cur_relevancy;
  83.  
  84. $single = Inflector::singular($word);
  85. if ($single != $word) {
  86. if (!isset($unique_words[$single])) {
  87. $unique_words[$single] = 0.0;
  88. }
  89. $unique_words[$single] += $cur_relevancy * 0.7;
  90. }
  91.  
  92. if ($cur_relevancy > 1) $cur_relevancy -= 0.01;
  93. }
  94.  
  95. return self::addWordsIndex($unique_words, $relevancy_multiplier);
  96. }
  97.  
  98.  
  99. /**
  100.   * Splits up html, and adds the keywords which are found into the index chosen with selectIndex()
  101.   * Usually should be run from within a transaction
  102.   *
  103.   * @param string $text The text to process.
  104.   * @param float $relevancy_multiplier A multiplier to apply to the relavancy of all keywords found.
  105.   **/
  106. public static function indexHtml($text, $relevancy_multiplier = 1)
  107. {
  108. $text = Text::plain($text, 0);
  109. return self::indexText($text, $relevancy_multiplier);
  110. }
  111.  
  112.  
  113. /**
  114.   * Accepts an array of relevant words and adds it into the index chosen with selectIndex()
  115.   * Usually should be run from within a transaction
  116.   * @param array $words word => relevancy mapping
  117.   * @param int $relevancy_multiplier
  118.   */
  119. private static function addWordsIndex(array $words, $relevancy_multiplier = 1)
  120. {
  121.  
  122. $table = self::$table;
  123.  
  124. if (count($words) == 0) return true;
  125.  
  126.  
  127. // Build a reverse index of name => id
  128. $vals = [];
  129. $where = Pdb::buildClause([['name', 'IN', array_keys($words)]], $vals);
  130. $q = "SELECT name, id FROM ~search_keywords WHERE " . $where;
  131. $res = Pdb::q($q, $vals, 'map');
  132.  
  133. // Iterate through the words, doing an insert...update of the records
  134. // If the search_keywords records do not exist, they will be added
  135. foreach ($words as $word => $relevancy) {
  136. $relevancy = $relevancy * $relevancy_multiplier;
  137.  
  138. if (empty($word_ids[$word])) {
  139. $word_ids[$word] = Pdb::insert('search_keywords', ['name' => $word]);
  140. }
  141.  
  142. $q = "INSERT INTO ~{$table}
  143. (keyword_id, record_id, relevancy)
  144. VALUES
  145. (:word_id, :rec_id, :rel)
  146. ON DUPLICATE KEY UPDATE
  147. relevancy = relevancy + :rel";
  148. $params = [
  149. 'word_id' => $word_ids[$word],
  150. 'rec_id' => self::$record_id,
  151. 'rel' => $relevancy
  152. ];
  153. Pdb::q($q, $params, 'count');
  154. }
  155.  
  156.  
  157. return true;
  158. }
  159.  
  160.  
  161. /**
  162.   * Looks for syncronisation errors in the indexes and fixes them
  163.   **/
  164. public static function cleanup($reference_table)
  165. {
  166. Pdb::validateIdentifier($reference_table);
  167.  
  168. $table = self::$table;
  169.  
  170. // Find keyword records which don't have a matching reference record
  171. $q = "SELECT DISTINCT keywords.record_id
  172. FROM ~{$table} AS keywords
  173. LEFT JOIN ~{$reference_table} AS ref ON keywords.record_id = ref.id
  174. WHERE ref.id IS NULL";
  175. $ids = Pdb::q($q, [], 'col');
  176. if (count($ids) == 0) return true;
  177.  
  178. // And delete them
  179. Pdb::delete($table, [['record_id', 'IN', $ids]]);
  180.  
  181. return true;
  182. }
  183.  
  184.  
  185. /**
  186.   * Set the 'query_and' parameter, which defines the handling of search terms
  187.   **/
  188. public static function queryAnd($val)
  189. {
  190. self::$query_and = $val;
  191. }
  192.  
  193.  
  194. /**
  195.   * Does a query for a term, against a set of search handlers
  196.   *
  197.   * @param string $q The keyword(s) to search for.
  198.   * @param array $search_handlers Search handlers to use. Expects an array of SearchHandler objects.
  199.   * @param int $page The page number to show. Should be 0-based (first page is num 0)
  200.   * @param int $num_per_page The number of records to show per page. Defaults to a the sprout config option
  201.   * @return array The array contains 4 keys:
  202.   * [0] PDOStatement, with three columns: record_id; controller_class; relevancy.
  203.   * [1] array The list of keywords used for the search
  204.   * [2] int The total number of results available; results are
  205.   * paginated, so this is likely to be more than the number of
  206.   * results returned.
  207.   * [3] int The total number of pages of results
  208.   **/
  209. public static function query($query, $search_handlers, $page = 0, $num_per_page = null)
  210. {
  211.  
  212. $page = (int) $page;
  213. $num_per_page = (int) $num_per_page;
  214. $conn = Pdb::getConnection();
  215.  
  216. if ($num_per_page == 0) $num_per_page = Kohana::config('sprout.search_per_page');
  217. if ($num_per_page == 0) $num_per_page = 10;
  218.  
  219. if (count($search_handlers) == 0) throw new Exception("No search handlers provided");
  220. if ($page < 0) throw new Exception("Invalid page specified");
  221.  
  222. $offset = $page * $num_per_page;
  223.  
  224. // Get keywords
  225. if (is_array($query)) {
  226. $keywords = $query;
  227. } else {
  228. $keywords = explode(' ', trim($query));
  229. }
  230.  
  231. // Pre-process keywords
  232. foreach ($keywords as $idx => $word) {
  233. if (strlen($word) <= 2) unset ($keywords[$idx]);
  234. }
  235.  
  236. $sql_keywords = array();
  237. foreach ($keywords as $word) {
  238. $word = strtolower($word);
  239. $word = $conn->quote($word);
  240. $sql_keywords[] = $word;
  241. }
  242.  
  243. // Build the SELECT queries which will make up the UNION
  244. $queries = array();
  245. $i = 0;
  246. foreach ($search_handlers as $handler) {
  247. $i++;
  248.  
  249. $where = $handler->getWhere();
  250. $joins = $handler->getJoins();
  251. $having = $handler->getHaving();
  252.  
  253. if ($sql_keywords) {
  254. $joins[] = "INNER JOIN ~{$handler->getTable()} AS records ON records.record_id = main.id";
  255.  
  256. $joins[] = "INNER JOIN ~search_keywords AS keywords ON keywords.id = records.keyword_id";
  257.  
  258. $where[] = 'keywords.name IN (' . implode(', ', $sql_keywords) . ')';
  259.  
  260. if (self::$query_and) {
  261. $having[] = 'COUNT(keywords.name) = ' . count($sql_keywords);
  262. }
  263.  
  264. $having[] = 'relevancy > 0';
  265. }
  266.  
  267. if (count($where) == 0) continue;
  268.  
  269. $joins = implode("\n ", $joins);
  270. $where = implode(' AND ', $where);
  271. $having = implode(' AND ', $having);
  272.  
  273. if ($having == '') $having = '1';
  274.  
  275.  
  276. $q = '(SELECT ' . ($i == 1 ? 'SQL_CALC_FOUND_ROWS' : '') . " main.id AS record_id, " . $conn->quote($handler->getCtlrName()) . " AS controller_class, ";
  277. $q .= ($sql_keywords ? 'SUM(records.relevancy)' : '1') . ' AS relevancy';
  278. $q .= "
  279. FROM ~{$handler->getMainTable()} AS main
  280. {$joins}
  281. WHERE {$where}
  282. GROUP BY main.id
  283. HAVING {$having})";
  284.  
  285. $queries[] = $q;
  286. }
  287.  
  288. if (count($queries) == 0) {
  289. return false;
  290. }
  291.  
  292. // Combine UNIONs
  293. $q = implode("\nUNION DISTINCT\n", $queries);
  294. $q .= "\nORDER BY relevancy DESC";
  295. $q .= "\nLIMIT {$num_per_page} OFFSET {$offset}";
  296.  
  297. // Run the query
  298. $res = Pdb::query($q, [], 'pdo');
  299.  
  300. $q = "SELECT FOUND_ROWS()";
  301. $num_results = Pdb::query($q, [], 'val');
  302. $num_pages = ceil($num_results / $num_per_page);
  303.  
  304. return array($res, $keywords, $num_results, $num_pages);
  305. }
  306.  
  307.  
  308. /**
  309.   * Return HTML to perform pagination.
  310.   *
  311.   * $curr_page The current page number (1-based).
  312.   * $num_page The total number of pages.
  313.   * $div_class If provided, a wrapper div will be generated, with this class name.
  314.   **/
  315. public static function paginate($curr_page, $num_pages, $div_class = null)
  316. {
  317. $out = '';
  318. $url_base = Url::withoutArgs('page');
  319.  
  320. if ($num_pages == 1) return null;
  321.  
  322. // Start and end to show
  323. $start = $curr_page - 2;
  324. $end = $curr_page + 4;
  325. if ($start < 1) $start = 1;
  326. if ($end > $num_pages) $end = $num_pages;
  327.  
  328. if ($div_class) $out .= '<div class="' . $div_class . '">';
  329.  
  330. if ($curr_page > 1) {
  331. $out .= "<a href=\"{$url_base}page=" . ($curr_page - 1) . "\" class=\"page-prev\">&laquo; Prev</a> ";
  332. }
  333.  
  334. for ($i = $start; $i <= $end; $i++) {
  335. $class = ($i == $curr_page ? 'page on' : 'page');
  336. $out .= " <a href=\"{$url_base}page={$i}\" class=\"{$class}\">{$i}</a> ";
  337. }
  338.  
  339. if ($curr_page < $end) {
  340. $out .= " <a href=\"{$url_base}page=" . ($curr_page + 1) . "\" class=\"page-next\">Next &raquo;</a>";
  341. }
  342.  
  343. if ($div_class) $out .= '</div>';
  344.  
  345. return $out;
  346. }
  347.  
  348. }
  349.  
  350.  
  351.