SproutCMS

This is the code documentation for the SproutCMS project

source of /sprout/Helpers/DocImport/DocImport.php

  1. <?php
  2. /*
  3.  * Copyright (C) 2017 Karmabunny Pty Ltd.
  4.  *
  5.  * This file is a part of SproutCMS.
  6.  *
  7.  * SproutCMS is free software: you can redistribute it and/or modify it under the terms
  8.  * of the GNU General Public License as published by the Free Software Foundation, either
  9.  * version 2 of the License, or (at your option) any later version.
  10.  *
  11.  * For more information, visit <http://getsproutcms.com>.
  12.  */
  13.  
  14. namespace Sprout\Helpers\DocImport;
  15.  
  16. use DOMDocument;
  17. use Exception;
  18.  
  19. use Sprout\Helpers\Enc;
  20. use Sprout\Helpers\File;
  21. use Sprout\Helpers\Register;
  22. use Sprout\Helpers\Sprout;
  23. use Sprout\Helpers\Treenode;
  24.  
  25.  
  26. /**
  27. * Abstract class with additional stuff for the document importer system
  28. *
  29. * The document importer loads docuemnts and returns XML files
  30. * These files can then be processed by other parts of the CMS.
  31. * The term "xmldoc" is referring to the intermediate XML file produced by the
  32. * import libraries
  33. **/
  34. abstract class DocImport {
  35.  
  36. /**
  37.   * The main load function for a document.
  38.   * Throw an exception on error.
  39.   *
  40.   * @param string $filename The file. The file will exist, but may not be valid
  41.   * @return string|DOMDocument $data Resultant XML data as a string or DOMDocument element
  42.   **/
  43. abstract public function load($filename);
  44.  
  45.  
  46. /**
  47.   * Return a `DocImport` object instance for converting a file with a given original name
  48.   * Throws an exception on error
  49.   **/
  50. public static function instance($orig_filename)
  51. {
  52. $ext = strtolower(File::getExt($orig_filename));
  53. if (! $ext) {
  54. throw new Exception('Filename not valid');
  55. }
  56.  
  57. $doc_imports = Register::getDocImports();
  58. if (! $doc_imports[$ext]) {
  59. throw new Exception("Unsupported file extension: {$ext}");
  60. }
  61.  
  62. return Sprout::instance($doc_imports[$ext][0]);
  63. }
  64.  
  65.  
  66. /**
  67.   * For a given XML doc file, return the HTML version.
  68.   *
  69.   * @param string $filename The file to load; this can also be passed as a DOMDocument object
  70.   * @param array $images Mapping of rel => filename for images
  71.   * @param array $headings Mapping of old to new level for headings (e.g. 3 => 2 for H3 -> H2)
  72.   * @return string The HTML, or NULL on error
  73.   **/
  74. public static function getHtml($filename, $images = array(), $headings = array()) {
  75. if ($filename instanceof DOMDocument) {
  76. $xml = @simplexml_import_dom($filename);
  77. } else {
  78. }
  79.  
  80. if (! $xml) return null;
  81.  
  82. // Re-map images, or remove if no mapping exists
  83. $img_tags = $xml->xpath('//img');
  84. foreach ($img_tags as $tag) {
  85. if ((string)$tag['width'] == 0) unset($tag['width']);
  86. if ((string)$tag['height'] == 0) unset($tag['height']);
  87.  
  88. if ($tag['error']) {
  89. $width = (string)$tag['width'];
  90. $height = (string)$tag['height'];
  91.  
  92. if (!$width or !$height) {
  93. $width = 300;
  94. $height = 50;
  95. }
  96.  
  97. $tag->addAttribute('src', 'http://placehold.it/' . $width . 'x' . $height . '&text=' . Enc::url((string)$tag['error']));
  98. unset($tag['error']);
  99. unset($tag['rel']);
  100.  
  101. } else {
  102. if (isset($images[(string)$tag['rel']])) {
  103. $tag->addAttribute('src', $images[(string)$tag['rel']]);
  104. unset($tag['rel']);
  105. } else {
  106. unset($tag[0]);
  107. }
  108. }
  109. }
  110.  
  111. // Get as XML and do some XML -> HTML mods
  112. $html = $xml->body->asXML();
  113. $html = str_replace(array('<body>', '</body>', '<body/>'), '', $html);
  114. $html = str_replace(array('<br/>', '<br />'), '<br>', $html);
  115. $html = str_replace('/>', '>', $html);
  116.  
  117. // Re-map headings, or remove if no mapping exists
  118. arsort($headings);
  119. foreach ($headings as $old => $new) {
  120. $html = preg_replace("!<h{$old}>([^<]+)</h{$old}>!", "<h{$new}>\$1</h{$new}>", $html);
  121. }
  122.  
  123. return $html;
  124. }
  125.  
  126.  
  127. /**
  128.   * For a given XML doc file, return an array of resources, in name => data format.
  129.   *
  130.   * @param string $filename The file to load
  131.   * @return array The resources
  132.   **/
  133. public static function getResources($filename)
  134. {
  135. if (! $xml) return null;
  136.  
  137. $res = array();
  138. foreach ($xml->res as $row) {
  139. $res[(string)$row['name']] = base64_decode((string)$row);
  140. }
  141.  
  142. return $res;
  143. }
  144.  
  145.  
  146.  
  147. /**
  148.   * For a given XML document, return a tree of headings
  149.   *
  150.   * Any heading level past the max_depth option is treated like body text.
  151.   *
  152.   * @param string $filename The XML file to load; this can also be passed as a DOMDocument object
  153.   * @param int $max_depth The maximum depth of headings to return
  154.   * @param bool $include_body True to include the body XML as a parameter on the node. Default false
  155.   * @return Treenode The tree of headings
  156.   **/
  157. public static function getHeadingsTree($filename, $max_depth, $include_body = false)
  158. {
  159. if ($filename instanceof DOMDocument) {
  160. $dom = $filename;
  161. } else {
  162. $dom = new DOMDocument();
  163. $dom->loadXML(file_get_contents($filename));
  164. }
  165.  
  166. $body = $dom->getElementsByTagName('body');
  167. $body = $body->item(0);
  168.  
  169. $root = new Treenode();
  170. $curr = array($root);
  171. $node = $root;
  172.  
  173. foreach ($body->childNodes as $elem) {
  174. if (preg_match('/^[hH][1-6]$/', $elem->tagName) and ($level = (int)($elem->tagName[1])) and $level <= $max_depth) {
  175. $node = new Treenode();
  176. $node['name'] = $elem->textContent;
  177. $node['level'] = $level;
  178. $node['body'] = '';
  179.  
  180. $parent = null;
  181. for ($i = $level-1; !$parent; $i--) {
  182. $parent = @$curr[$i];
  183. }
  184.  
  185. $parent->children[] = $node;
  186. $node->parent = $parent;
  187.  
  188. $curr[$level] = $node;
  189.  
  190. } else if ($include_body) {
  191. $node['body'] .= $dom->saveXML($elem);
  192. }
  193. }
  194.  
  195. return $root;
  196. }
  197. }
  198.