Saturday, January 18, 2014

How to (parse the text content)/ read from Microsoft Word Document doc

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Runtime.InteropServices.ComTypes;
 
namespace readDOC
{
    class Program
    {
        static void Main(string[] args)
        {
            Microsoft.Office.Interop.Word.Application word = new Microsoft.Office.Interop.Word.Application();
            object miss = System.Reflection.Missing.Value;
            object path = @"C:\DOC\myDocument.docx";
            object readOnly = true;
            Microsoft.Office.Interop.Word.Document docs = word.Documents.Open(ref path, ref miss, ref readOnly, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss, ref miss);
            string totaltext = "";
            for (int i = 0; i < docs.Paragraphs.Count; i++)
            {
                   totaltext += " \r\n "+ docs.Paragraphs[i+1].Range.Text.ToString();
            }
            Console.WriteLine(totaltext);
            docs.Close();
            word.Quit();
        }
    }
}
Reference:
http://mantascode.com/c-how-to-parse-the-text-content-from-microsoft-word-document/

No comments:

Post a Comment

Image noise comparison methods

 1. using reference image technique     - peak_signal_noise_ratio (PSNR)     - SSI 2. non-reference image technique     - BRISQUE python pac...