Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace macros with multiple elements by splitting existing text runs/paragraphs instead of replacing them #2607

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 102 additions & 0 deletions src/PhpWord/StyleMerger.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
<?php

/**
* This file is part of PHPWord - A pure PHP library for reading and writing
* word processing documents.
*
* PHPWord is free software distributed under the terms of the GNU Lesser
* General Public License version 3 as published by the Free Software Foundation.
*
* For the full copyright and license information, please read the LICENSE
* file that was distributed with this source code. For the full list of
* contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
*
* @see https://github.com/PHPOffice/PHPWord
*
* @license http://www.gnu.org/licenses/lgpl.txt LGPL version 3
*/

declare(strict_types=1);

namespace PhpOffice\PhpWord;

final class StyleMerger
{

/**
* @var \DOMElement $styleElement
*/
private $styleElement;

/**
* @var array<string, \DOMElement>
*/
private $elements = [];

public function __construct(string $style)
{
$this->styleElement = $this->createStyleElement($style);
foreach ($this->styleElement->childNodes as $node) {
if ($node instanceof \DOMElement) {
$this->elements[$node->tagName] = $node;
}
}
}

public static function mergeStyles(string $style, string ...$styles): string
{
$styleMerger = new self($style);
foreach ($styles as $styleToMerge) {
$styleMerger->merge($styleToMerge);
}

return $styleMerger->getStyleString();
}

public function merge(string $style): self
{
$styleElement = $this->createStyleElement($style);
foreach ($styleElement->childNodes as $node) {
if ($node instanceof \DOMElement) {
// @todo Do we need recursive merging for some elements?
if (!isset($this->elements[$node->tagName])) {
$importedNode = $this->styleElement->ownerDocument->importNode($node, TRUE);
if (!$importedNode instanceof \DOMElement) {
throw new \RuntimeException('Importing node failed');
}

$this->styleElement->appendChild($importedNode);
$this->elements[$node->tagName] = $importedNode;
}
}
}

return $this;
}

private function createStyleElement(string $style): \DOMElement
{
if (NULL === $style = preg_replace('/>\s+</', '><', $style)) {
throw new \RuntimeException('Error processing style');
}

$doc = new \DOMDocument();
$doc->loadXML(
'<root xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">' . $style . '</root>'
);

foreach ($doc->documentElement->childNodes as $node) {
if ($node instanceof \DOMElement) {
return $node;
}
}

throw new \RuntimeException('Could not create style element');
}

public function getStyleString(): string
{
return $this->styleElement->ownerDocument->saveXML($this->styleElement);

Check failure on line 99 in src/PhpWord/StyleMerger.php

View workflow job for this annotation

GitHub Actions / phpstan

Method PhpOffice\PhpWord\StyleMerger::getStyleString() should return string but returns string|false.
}

}
153 changes: 142 additions & 11 deletions src/PhpWord/TemplateProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,64 @@ public function setComplexBlock($search, Element\AbstractElement $complexType):
$this->replaceXmlBlock($search, $xmlWriter->getData(), 'w:p');
}

/**
* Replaces a search string (macro) with a set of rendered elements, splitting
* surrounding texts, text runs or paragraphs before and after the macro,
* depending on the types of elements to insert.
*
* @param \PhpOffice\PhpWord\Element\AbstractElement[] $elements
* @param bool $inheritStyle
* If TRUE the style will be inherited from the paragraph/text run the macro
* is inside. If the element already contains styles, they will be merged.
*
* @throws \PhpOffice\PhpWord\Exception\Exception
*/
public function setElementsValue(string $search, array $elements, bool $inheritStyle = FALSE): void {
$search = static::ensureMacroCompleted($search);
$elementsDataList = [];
$hasParagraphs = FALSE;
foreach ($elements as $element) {
$elementName = substr(
get_class($element),
(int) strrpos(get_class($element), '\\') + 1
);
$objectClass = 'PhpOffice\\PhpWord\\Writer\\Word2007\\Element\\' . $elementName;

// For inline elements, do not create a new paragraph.
$withParagraph = \PhpOffice\PhpWord\Writer\Word2007\Element\Text::class !== $objectClass;
$hasParagraphs = $hasParagraphs || $withParagraph;

$xmlWriter = new XMLWriter();
/** @var \PhpOffice\PhpWord\Writer\Word2007\Element\AbstractElement $elementWriter */
$elementWriter = new $objectClass($xmlWriter, $element, !$withParagraph);
$elementWriter->write();
$elementsDataList[] = preg_replace('/>\s+</', '><', $xmlWriter->getData());
}
$blockType = $hasParagraphs ? 'w:p' : 'w:r';
$where = $this->findContainingXmlBlockForMacro($search, $blockType);
if (is_array($where)) {
/** @phpstan-var array{start: int, end: int} $where */
$block = $this->getSlice($where['start'], $where['end']);
$paragraphStyle = '';
$textRunStyle = '';
$parts = $hasParagraphs
? $this->splitParagraphIntoParagraphs($block, $paragraphStyle, $textRunStyle)
: $this->splitTextIntoTexts($block, $textRunStyle);
if ($inheritStyle) {
$elementsDataList = preg_replace_callback_array([
'#<w:pPr/>#' => fn() => $paragraphStyle,
'#<w:pPr.*</w:pPr>#' => fn (array $matches) => StyleMerger::mergeStyles($matches[0], $paragraphStyle),
// <w:pPr> may contain <w:rPr> itself so we have to match for <w:rPr> inside of <w:r>
'#<w:r><w:rPr/>.*</w:r>#' => fn(array $matches) => str_replace('<w:rPr/>', $textRunStyle, $matches[0]),
'#<w:r>.*(<w:rPr.*</w:rPr>).*</w:r>#' => fn (array $matches) =>
preg_replace('#<w:rPr.*</w:rPr>#', StyleMerger::mergeStyles($matches[1], $textRunStyle), $matches[0]),
], $elementsDataList);
}
$this->replaceXmlBlock($search, $parts, $blockType);
$this->replaceXmlBlock($search, implode('', $elementsDataList), $blockType);
}
}

/**
* @param mixed $search
* @param mixed $replace
Expand Down Expand Up @@ -1440,28 +1498,101 @@ protected function findXmlBlockEnd($offset, $blockType)
}

/**
* Splits a w:r/w:t into a list of w:r where each ${macro} is in a separate w:r.
* Adds output parameter for extracted style.
*
* @param string $text
* @param string $extractedStyle
* Is set to the extracted text run style (w:rPr).
*
* @return string
* @throws \PhpOffice\PhpWord\Exception\Exception
*/
protected function splitTextIntoTexts($text)
{
protected function splitTextIntoTexts($text, string &$extractedStyle = '') {
if (NULL === $unformattedText = preg_replace('/>\s+</', '><', $text)) {
throw new Exception('Error processing PhpWord document.');
}

$matches = [];
preg_match('/<w:rPr.*<\/w:rPr>/i', $unformattedText, $matches);
$extractedStyle = $matches[0] ?? '';

if (!$this->textNeedsSplitting($text)) {
return $text;
}
$matches = [];
if (preg_match('/(<w:rPr.*<\/w:rPr>)/i', $text, $matches)) {
$extractedStyle = $matches[0];
} else {
$extractedStyle = '';

$result = str_replace(
['<w:t>', '${', '}'],
[
'<w:t xml:space="preserve">',
'</w:t></w:r><w:r>' . $extractedStyle . '<w:t xml:space="preserve">${',
'}</w:t></w:r><w:r>' . $extractedStyle . '<w:t xml:space="preserve">',
],
$unformattedText
);

$emptyTextRun = '<w:r>' . $extractedStyle . '<w:t xml:space="preserve"></w:t></w:r>';

return str_replace($emptyTextRun, '', $result);
}

/**
* Splits a w:p into a list of w:p where each ${macro} is in a separate w:p.
*
* @param string $extractedParagraphStyle
* Is set to the extracted paragraph style (w:pPr).
* @param string $extractedTextRunStyle
* Is set to the extracted text run style (w:rPr).
*
* @throws \PhpOffice\PhpWord\Exception\Exception
*/
public function splitParagraphIntoParagraphs(
string $paragraph,
string &$extractedParagraphStyle = '',
string &$extractedTextRunStyle = ''
): string {
if (NULL === $paragraph = preg_replace('/>\s+</', '><', $paragraph)) {
throw new Exception('Error processing PhpWord document.');
}

$unformattedText = preg_replace('/>\s+</', '><', $text);
$result = str_replace([self::$macroOpeningChars, self::$macroClosingChars], ['</w:t></w:r><w:r>' . $extractedStyle . '<w:t xml:space="preserve">' . self::$macroOpeningChars, self::$macroClosingChars . '</w:t></w:r><w:r>' . $extractedStyle . '<w:t xml:space="preserve">'], $unformattedText);
$matches = [];
preg_match('#<w:pPr.*</w:pPr>#i', $paragraph, $matches);
$extractedParagraphStyle = $matches[0] ?? '';

// <w:pPr> may contain <w:rPr> itself so we have to match for <w:rPr> inside of <w:r>
preg_match('#<w:r>.*(<w:rPr.*</w:rPr>).*</w:r>#i', $paragraph, $matches);
$extractedTextRunStyle = $matches[1] ?? '';

$result = str_replace(
[
'<w:t>',
'${',
'}',
],
[
'<w:t xml:space="preserve">',
sprintf(
'</w:t></w:r></w:p><w:p>%s<w:r><w:t xml:space="preserve">%s${',
$extractedParagraphStyle,
$extractedTextRunStyle
),
sprintf(
'}</w:t></w:r></w:p><w:p>%s<w:r>%s<w:t xml:space="preserve">',
$extractedParagraphStyle,
$extractedTextRunStyle
),
],
$paragraph
);

// Remove empty paragraphs that might have been created before/after the
// macro.
$emptyParagraph = sprintf(
'<w:p>%s<w:r>%s<w:t xml:space="preserve"></w:t></w:r></w:p>',
$extractedParagraphStyle,
$extractedTextRunStyle
);

return str_replace(['<w:r>' . $extractedStyle . '<w:t xml:space="preserve"></w:t></w:r>', '<w:r><w:t xml:space="preserve"></w:t></w:r>', '<w:t>'], ['', '', '<w:t xml:space="preserve">'], $result);
return str_replace($emptyParagraph, '', $result);
}

/**
Expand Down
Loading