Jos*_*iro 5 c# ms-word openxml openxml-sdk
这个想法很简单,但答案可能会变得复杂:
事实上,我可以检查字体大小的运行属性。
如果不存在,我需要检查应用于段落的样式以找到为字体大小定义的运行属性,然后是该样式的段落运行属性。
如果没有找到,我需要再次检查有关此样式所基于的样式的所有内容。
如果没有找到,我应该检查样式层次结构中的以下样式,然后继续直到达到默认样式。
我还需要检查上一段是否应用了样式。在这种情况下,应用的样式可以定义影响我正在处理的文本的下一段的样式。
如果没有样式影响我的段落,那么我需要查看样式部分的默认运行属性。之后,我应该查看同一部分中的默认段落属性。
如果什么都不适用,那么大小定义的责任就交给处理文档的应用程序。
我对吗?
我没有来自 OPenXML SDK 和/或 OpenXmlPowerTools 的任何帮助吗?
一个重要的方面是,除了文本字体大小之外,这个问题几乎扩展到任何段落或运行属性。
我的最终目标是根据格式确定一段文本是否是节标题(如heading1、heading2 等),但看起来很难得到像“一段文本的当前格式”这样简单的东西。为了让事情变得更难,我还需要处理(部分)编号,很多时候没有应用于段落的编号格式。
谢谢,
所以,我按照承诺回答我自己的问题。
我开发了一种方法,可以从 Word 文档段落返回特定运行的“有效”运行属性。它考虑了默认文档属性、应用样式(包括相关样式层次结构)和根据标准 - ISO/IEC29500-1 的直接运行属性。
有趣的是,Word 在这两个方面似乎并没有完全遵循标准: 1 - 如果段落没有应用样式,Word 将应用默认的段落样式。据我所知,我认为不应该应用任何样式。对于运行来说,不会发生这种情况:当运行没有运行样式时,不会应用默认的运行样式。2 - 为了获得有效的运行属性,有必要“汇总样式”。段落样式和行样式遵循样式层次结构。为了获取特定的属性值,有必要在应用的样式中查找它,如果不存在,则在父样式中查找它,依此类推。在某个样式中使用特定值定义的属性如果具有相同的值,则不应添加到子样式中。Word 不遵循此字符样式规则。事实上,从运行样式应用的所有运行属性都可以直接为该运行样式获取,而无需遵循样式层次结构。这不符合标准。
现在,让我详细介绍一下我的解决方案:
首先,我的代码使用 openxml 电动工具: http: //powertools.codeplex.com/
接下来,为了汇总有关样式继承的样式,我改编并实现了 Eric White 提供的解决方案: http ://blogs.msdn.com/b/ericwhite/archive/2009/12/13/implementing-inheritance-in- xml.aspx 和 http://blogs.msdn.com/b/ericwhite/archive/2009/10/29/open-xml-wordprocessingml-style-inheritance.aspx
获取运行属性的完整算法可以在标准中找到,Eric White 也提供了该算法,网址为: http ://blogs.msdn.com/b/ericwhite/archive/2009/11/12/assemble-paragraph- and-run-properties-for-cells-in-a-table.aspx 在本例中,它涉及从表内的单元格中提取属性。我的方法不适用于表格内的段落(我只是不需要它:-)),但它可以扩展以处理这些情况(所有信息都在埃里克的文章中)
请注意,我正确处理了切换属性和单词的实际工作方式(我针对与标准相关的差异提出的观点)。
最后是代码:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml.Linq;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Wordprocessing;
using OpenXmlPowerTools;
namespace MyNameSpace
{
class OpenXmlPowerToolsUtilities
{
public static XElement GetEffectiveRunProperties(WordprocessingDocument wordDoc, XElement run)
{
XElement runProperties = null;
List<XElement> runPropertiesList = new List<XElement>();
XElement paragraph = run.Parent;
if (paragraph.Name != W.p)
return null;
StyleDefinitionsPart styleDefinitionsPart = wordDoc.MainDocumentPart
.StyleDefinitionsPart;
if (styleDefinitionsPart == null)
return null;
XElement styles = styleDefinitionsPart.GetXDocument().Root;
// 1 - Get run default
XElement runDefault = styles.Elements(W.docDefaults)
.Elements(W.rPrDefault)
.Elements(W.rPr)
.FirstOrDefault();
if (runDefault != null)
runPropertiesList.Add(runDefault);
// 2 - get paragraph style run properties
XElement pStyleRunProperties = null;
string pStyle = (string)paragraph.Elements(W.pPr)
.Elements(W.pStyle)
.Attributes(W.val)
.FirstOrDefault();
if (pStyle != null)
{
pStyleRunProperties = AssembleStyleInformation(styles, pStyle)
.Elements(W.rPr)
.FirstOrDefault();
}
else
{
XElement defaultParagraphStyle = styles
.Elements(W.style)
.Where(e =>
(string)e.Attribute(W.type) == "paragraph" &&
(string)e.Attribute(W._default) == "1")
.Select(s => s)
.FirstOrDefault();
pStyleRunProperties = defaultParagraphStyle.Elements(W.rPr).FirstOrDefault();
}
if (pStyleRunProperties != null)
runPropertiesList.Add(pStyleRunProperties);
// 3 - get run style run properties
string rStyle = (string)run.Elements(W.rPr).Elements(W.rStyle).Attributes(W.val).FirstOrDefault();
XElement rStyleRunProperties = null;
if (rStyle != null)
{
rStyleRunProperties = AssembleStyleInformation(styles, rStyle)
.Elements(W.rPr)
.FirstOrDefault();
}
if (rStyleRunProperties != null)
runPropertiesList.Add(rStyleRunProperties);
XElement toggleProperties = AssembleToggleProperties(runDefault, pStyleRunProperties, rStyleRunProperties);
if (toggleProperties != null)
runPropertiesList.Add(toggleProperties);
// 4 - direct run properties
XElement directRunProperties = run.Elements(W.rPr).FirstOrDefault();
if (directRunProperties != null)
runPropertiesList.Add(directRunProperties);
runProperties = AssembleRunProperties(runPropertiesList);
return runProperties;
}
private static XElement AssembleRunProperties(List<XElement> runPropertiesList)
{
return runPropertiesList
.Aggregate(
new XElement(W.rPr,
new XAttribute(XNamespace.Xmlns + "w", W.w)),
(mergedRun, run) =>
MergeChildElements(mergedRun, run));
}
static XElement AssembleToggleProperties(XElement runDefault, XElement pStyleRunProperties, XElement rStyleRunProperties)
{
XElement runToggleProperties;
runToggleProperties = new XElement(W.rPr,
new XAttribute(XNamespace.Xmlns + "w", W.w));
foreach (XName toggleProperty in toggleProperties)
{
XElement runDefaultToggleProperty = runDefault.Elements(toggleProperty).FirstOrDefault();
if (runDefaultToggleProperty != null)
{
if ((string)runDefaultToggleProperty.Attributes(W.val).FirstOrDefault() != "0")
{
runToggleProperties.Add(runDefaultToggleProperty);
continue;
}
}
XElement pStyleToggleProperty = null;
if (pStyleRunProperties == null)
pStyleToggleProperty = null;
else
pStyleToggleProperty = pStyleRunProperties.Elements(toggleProperty).FirstOrDefault();
XElement rStyleToggleProperty = null;
if (rStyleRunProperties == null)
rStyleToggleProperty = null;
else
rStyleToggleProperty = rStyleRunProperties.Elements(toggleProperty).FirstOrDefault();
if (pStyleToggleProperty == null && rStyleToggleProperty != null)
runToggleProperties.Add(rStyleToggleProperty);
else if (pStyleToggleProperty != null && rStyleToggleProperty == null)
runToggleProperties.Add(pStyleToggleProperty);
else if (pStyleToggleProperty != null && rStyleToggleProperty != null)
{
if ((string)rStyleToggleProperty.Attributes(W.val).FirstOrDefault() == "0")
runToggleProperties.Add(pStyleToggleProperty);
else if ((string)pStyleToggleProperty.Attributes(W.val).FirstOrDefault() == "0")
runToggleProperties.Add(rStyleToggleProperty);
else
runToggleProperties.Add(new XElement(toggleProperty, new XAttribute(W.val, "0")));
}
}
return runToggleProperties;
}
public static IEnumerable<XElement> StyleChainReverseOrder(XElement styles, string styleId)
{
string current = styleId;
while (true)
{
XElement style = styles.Elements(W.style)
.Where(s => (string)s.Attribute(W.styleId) == current).FirstOrDefault();
yield return style;
current = (string)style.Elements(W.basedOn).Attributes(W.val).FirstOrDefault();
if (current == null)
yield break;
}
}
public static IEnumerable<XElement> StyleChain(XElement styles, string styleId)
{
return StyleChainReverseOrder(styles, styleId).Reverse();
}
private static XElement AssembleStyleInformation(XElement styles, string styleId)
{
return StyleChain(styles, styleId)
.Aggregate(
new XElement(W.style, new XAttribute(XNamespace.Xmlns + "w", W.w)),
(mergedStyle, style) => MergeChildElements(mergedStyle, style));
}
public static XName[] Others =
{
W.pStyle,
W.rStyle
};
public static XName[] ElementsWithMergeElementsSemantics =
{
W.style,
W.rPr,
W.pPr
};
public static XName[] ElementsWithMergeAttributesSemantics =
{
W.ind,
W.spacing,
W.lang
};
public static XName[] ElementsWithReplaceElementsSemantics =
{
W.name, // The style Name element
W.adjustRightInd,
W.autoSpaceDE,
W.autoSpaceDN,
W.bidi,
W.cnfStyle, // within a table
W.contextualSpacing,
W.divId,
W.framePr,
W.jc,
W.keepLines,
W.keepNext,
W.kinsoku,
W.mirrorIndents,
W.numPr,
W.outlineLvl,
W.overflowPunct,
W.pageBreakBefore,
W.pBdr,
W.shd,
W.snapToGrid,
W.suppressAutoHyphens,
W.suppressLineNumbers,
W.suppressOverlap,
W.tabs,
W.textAlignment,
W.textboxTightWrap, // within a textbox
W.textDirection,
W.topLinePunct,
W.widowControl,
W.wordWrap,
W.b,
W.bCs,
W.bdr,
W.caps,
W.color,
W.cs,
W.dstrike,
W.eastAsianLayout,
W.effect,
W.em,
W.emboss,
W.fitText,
W.highlight,
W.i,
W.iCs,
W.imprint,
W.kern,
W.noProof,
W.oMath,
W.outline,
W.position,
W.rFonts,
W.rtl,
W.shadow,
W.shd,
W.smallCaps,
W.snapToGrid,
//W.spacing, // different from paragraph spacing
W.specVanish,
W.strike,
W.sz,
W.szCs,
W.u,
W.vanish,
W.vertAlign,
W._w,
W.webHidden
};
public static XName[] toggleProperties =
{
W.b,
W.bCs,
W.caps,
W.emboss,
W.i,
W.iCs,
W.imprint,
W.outline,
W.shadow,
W.smallCaps,
W.strike,
W.vanish
};
public static bool IsValidMergeElement(XName name)
{
if (ElementsWithMergeAttributesSemantics.Contains(name) ||
ElementsWithMergeElementsSemantics.Contains(name) ||
ElementsWithReplaceElementsSemantics.Contains(name))
return true;
return false;
}
public static bool IsToggleProperty(XName name)
{
if (toggleProperties.Contains(name))
return true;
return false;
}
public static bool HasReplaceSemantics(XName name)
{
if (ElementsWithReplaceElementsSemantics.Contains(name))
return true;
return false;
}
public static bool HasMergeElementsSemantics(XName name)
{
if (ElementsWithMergeElementsSemantics.Contains(name))
return true;
return false;
}
public static bool HasMergeAttributesSemantics(XName name)
{
if (ElementsWithMergeAttributesSemantics.Contains(name))
return true;
return false;
}
public static XElement MergeChildElements(XElement mergedElement, XElement element)
{
if (mergedElement == null || element == null)
{
if (element == null)
element = mergedElement;
XElement newElement = new XElement(element.Name,
new XAttribute(XNamespace.Xmlns + "w", W.w),
element.Attributes()
.Where(a =>
{
if (a.IsNamespaceDeclaration)
return false;
if (element.Name == W.style)
if (!(a.Name == W.type || a.Name == W.styleId))
return false;
return true;
}),
element.Elements().Select(e =>
{
if (e.Name == W.rPr || e.Name == W.pPr)
return MergeChildElements(null, e);
if (IsValidMergeElement(e.Name))
return e;
return null;
}));
return newElement;
}
XElement newMergedElement = new XElement(element.Name,
new XAttribute(XNamespace.Xmlns + "w", W.w),
element.Attributes()
.Where(a =>
{
if (a.IsNamespaceDeclaration)
return false;
if (element.Name == W.style)
if (!(a.Name == W.type || a.Name == W.styleId))
return false;
return true;
}),
element.Elements().Select(e =>
{
if (HasReplaceSemantics(e.Name))
return e;
// spacing within run properties has replace semantics
if (element.Name == W.rPr && e.Name == W.spacing)
return e;
if (HasMergeAttributesSemantics(e.Name))
{
XElement newElement;
newElement = new XElement(e.Name,
e.Attributes(),
mergedElement.Elements(e.Name).Attributes()
.Where(a =>
!(e.Attributes().Any(z => z.Name == a.Name))));
return newElement;
}
if (e.Name == W.rPr || e.Name == W.pPr)
{
XElement correspondingElement = mergedElement.Element(e.Name);
return MergeChildElements(correspondingElement, e);
}
return null;
}),
mergedElement.Elements()
.Where(m => !element.Elements(m.Name).Any()));
return newMergedElement;
}
}
}
Run Code Online (Sandbox Code Playgroud)