[转] – save pictures from MS Word(从Word中保存图片对象)

word文档解析的一部分,片的提取非常的重要。在google上搜索word片提取找到的基本是VBA的方式,写一段脚本嵌入word中然后
这样子,一来不熟悉,二来也不符合我word文档理的要求。我里是需要从word文档外部来解析,也就是用一个程序打word文件,取其中的内
容,
是一从外部理的方式,与VBA这种嵌入的方式有大差。其到.net framework2.0之后,VSTO已提供了很多方便操作office文件的方法,当然也包括word。里就介一下借助VSTO和剪板来提取word片的方法。

业务需求

有一个word文档,里面包含了一些构件描述信息和一些片,要求找出片另存到一个目下,然后将该图片替成一个指示出了片位置的标签,比如

img/PORT.doc/picture_2.Jpeg

目的是了在解析构件信息,并存入数据之后网站的表示可以直接根据该标签找到片并示。

可行的解决方案


然是要提取
片,那首先就得在word中找到片。片(Picture)在word中会以两形式存在——Shape和InlineShape——如
果要取出所有的
片一定住不要漏掉了任何一个。但是不是所有的Shape和InlineShape都是picture,我需要先做判断:

Shape中有两种类型的picture:
MsoShapeType.msoPicture
MsoShapeType.msoLinkedPicture

InlineShape中有两种类型的picture: WdInlineShapeType.wdInlineShapePicture WdInlineShapeType.wdInlineShapeLinkedPicture

找到所有了的片之后将它到剪板,然后就可以保存了。基本步骤如下:

  1. 首先打文档。因要替换图片,那要求打文档的候是可以编辑的——ReadOnly设为false。
  2. 取所有的shape,包括Shape和InlineShape。
  3. 取一个shape判断是否Picture,如果是将其中,并拷到剪板。
  4. 将剪板的片保存到指定目下。
  5. 找到片在word中的位置,在其前面插入标记
  6. 片从word中
  7. 继续读取下一个shape

文档

oWordApp = new ApplicationClass();
object readOnly = True;

object o_fileName = fileName;
Document wordDoc;
wordDoc = oWordApp.Documents.Open(ref o_fileName,
ref missing, ref readOnly,
ref missing, ref missing, ref missing,
ref missing, ref missing, ref missing,
ref missing, ref missing, ref isVisible,
ref missing, ref missing, ref missing, ref missing);
wordDoc.Activate();

 

取所有的shape

IList shapes = new ArrayList();
foreach(Shape shape in doc.Shapes)
{
shapes.Add(shape);
}
foreach(InlineShape shape in doc.InlineShapes)
{
shapes.Add(shape);
}

 

判断是否Picture

 


if (isCommonShape)
{
commonShape = (Shape) shape;
isPicture = (commonShape.Type == MsoShapeType.msoPicture ||
commonShape.Type == MsoShapeType.msoLinkedPicture);
}
else if(isInlineShape)
{
inlineShpae = (InlineShape) shape;
isPicture = (inlineShpae.Type == WdInlineShapeType.wdInlineShapePicture ||
inlineShpae.Type == WdInlineShapeType.wdInlineShapeLinkedPicture);
}

中,并拷到剪

if(isCommonShape)
{
commonShape.Select(ref missing);
}
else
{
inlineShpae.Select();
}

wordApp.Selection.CopyAsPicture();

 

片保存到指定目

System.Windows.Forms.Clipboard.GetImage().Save(fileNameOfPict, ImageFormat.Jpeg);

 

插入标记

 

object start = oWordApp.Selection.Start; //Shape的起始位置
doc.Range(ref start, ref start).Text = string.Format(“{0}”, fileNameOfPict);

片从word中

commonShape.Delete();

 

 

完整的代

public void ProcessAllPicturesOfDoc(Document doc)
{
IList shapes = new ArrayList();
foreach(Shape shape in doc.Shapes)
{
shapes.Add(shape);
}
foreach(InlineShape shape in doc.InlineShapes)
{
shapes.Add(shape);
}
ExtractShape(shapes,doc,oWordApp);
}
public void ExtractShape(IList shapes,Document doc,ApplicationClass wordApp)
{
object missing = Missing.Value;
string pictDirect = “img/” + doc.Name + “/”;
int i = 0;

foreach (object shape in shapes)
{
bool isPicture;
bool isCommonShape = shape is Shape;
bool isInlineShape = shape is InlineShape;

Shape commonShape = null;
InlineShape inlineShpae = null;
//check if the shape is a picture
if (isCommonShape)
{
commonShape = (Shape) shape;
isPicture = (commonShape.Type == MsoShapeType.msoPicture ||
commonShape.Type == MsoShapeType.msoLinkedPicture);
}
else if(isInlineShape)
{
inlineShpae = (InlineShape) shape;
isPicture = (inlineShpae.Type == WdInlineShapeType.wdInlineShapePicture ||
inlineShpae.Type == WdInlineShapeType.wdInlineShapeLinkedPicture);
}
else
{
throw new Exception(“unknown Shape”);
}

if (isPicture)
{

i++;
//select the range of the shape
//Note: the difference between two methods of selection
if(isCommonShape)
{
commonShape.Select(ref missing);
}
else
{
inlineShpae.Select();
}
//compy the picture to clipboard
wordApp.Selection.CopyAsPicture();
if (System.Windows.Forms.Clipboard.ContainsImage())
{
if (!Directory.Exists(pictDirect))
Directory.CreateDirectory(pictDirect);
string fileNameOfPict = pictDirect + “picture_” + i.ToString() + “.Jpeg”;
//save picture
System.Windows.Forms.Clipboard.GetImage().Save(fileNameOfPict, ImageFormat.Jpeg);
//insert the img tag just at the start position of the shape
object start = oWordApp.Selection.Start;
doc.Range(ref start, ref start).Text = string.Format(“{0}”, fileNameOfPict);
//delete the picture
if(isCommonShape)
{
commonShape.Delete();
}
else
{
inlineShpae.Delete();
}
}
else
{
throw new Exception(“error occures when copying picture”);
}
}
}
}

[此文引至:http://blog.ccidnet.com/home.php?mod=space&uid=1406&do=blog&id=144058]