Migration notes Leave feedback

Why To Migrate?

Here are the key reasons to use the new updated API provided by GroupDocs.Parser for .NET since version 19.8:

Parser class is introduced as a single entry point to extract data from the document.
Data extraction was unified for all data types.
The overall document related classes were unified to common.
Product architecture was redesigned from scratch in order to simplify passing options and classes to manipulate data.
Document information and preview generation procedures were simplified.

How To Migrate?

Here is brief comparison of how to extract data using the old and new API.

Text

Old coding style

// Create an extractor factory
ExtractorFactory factory = new ExtractorFactory();
// Create a text extractor
using (TextExtractor extractor = factory.CreateTextExtractor(filePath))
{
    // Extract a text from the text extractor
    string textLine = null;
    do
    {
        textLine = extractor.ExtractLine();
        if (textLine != null)
        {
            Console.WriteLine(textLine);
        }
    }
    while (textLine != null);
}

New coding style

// Create an instance of Parser class
using (Parser parser = new Parser(filePath))
{
    // Extract a text to the reader
    using (TextReader reader = parser.GetText())
    {
        // Check if text extraction is supported
        if (reader == null)
        {
            Console.WriteLine("Text extraction isn't supported.");
            return;
        }
        // Extract a text from the reader
        string textLine = null;
        do
        {
            textLine = reader.ReadLine();
            if (textLine != null)
            {
                Console.WriteLine(textLine);
            }
        }
        while (textLine != null);
    }
}

Text Page

Old coding style

// Create an extractor factory
ExtractorFactory factory = new ExtractorFactory();
// Create a text extractor
using (TextExtractor extractor = factory.CreateTextExtractor(filePath))
{
    // Check if the extractor supports pagination
    IPageTextExtractor pte = extractor as IPageTextExtractor;
    if (pte != null)
    {
        // Extract the first page
        Console.WriteLine(pte.ExtractPage(0));
    }
}

New coding style

// Create an instance of Parser class
using (Parser parser = new Parser(filePath))
{
    // Extract the first page text to the reader
    using (TextReader reader = parser.GetText(0))
    {
        // Check if text extraction is supported
        if (reader != null)
        {
            // Extract a text from the reader
            Console.WriteLine(reader.ReadToEnd());
        }
    }
}

Search

Old coding style

// Create an extractor factory
ExtractorFactory factory = new ExtractorFactory();
// Create a text extractor
using (TextExtractor extractor = factory.CreateTextExtractor(filePath))
{
    // Check if the extractor supports search
    ISearchable se = extractor as ISearchable;
    if (se != null)
    {
        // Create a handler
        ListSearchHandler handler = new ListSearchHandler();
        // Search "keyword" in the document
        se.Search(new SearchOptions(null), handler, new string[] { "keyword" });
        // Print search results
        foreach (SearchResult result in handler.List)
        {
            Console.WriteLine(string.Format("at {0}: {1}", result.Index, result.FoundText));
        }
    }
}

New coding style

// Create an instance of Parser class
using (Parser parser = new Parser(filePath))
{
    // Search "keyword" in the document
    IEnumerable<SearchResult> list = parser.Search("keyword");
    // Check if search is supported
    if (list == null)
    {
        Console.WriteLine("Search isn't supported.");
        return;
    }
    // Print search results
    foreach (SearchResult result in list)
    {
        Console.WriteLine(string.Format("at {0}: {1}", result.Position, result.Text));
    }
}

File Type Detection

Old coding style

// Detect and print file type
Console.WriteLine(CompositeMediaTypeDetector.Default.Detect(filePath));

New coding style

// Create an instance of Parser class
using (Parser parser = new Parser(filePath))
{
    // Detect and print file type
    Console.WriteLine(parser.GetDocumentInfo().FileType);
}

Metadata

Old coding style

// Create an extractor factory
ExtractorFactory factory = new ExtractorFactory();
// Create a metadata extractor
MetadataExtractor extractor = factory.CreateMetadataExtractor(filePath);
// Extract metadata
MetadataCollection metadata = extractor.ExtractMetadata(filePath);
// Print metadata
foreach (KeyValuePair<string, string> item in metadata)
{
    Console.WriteLine(string.Format("{0} = {1}", item.Key, item.Value));
}

New coding style

// Create an instance of Parser class
using (Parser parser = new Parser(filePath))
{
    // Extract metadata
    IEnumerable<MetadataItem> metadata = parser.GetMetadata();
    // Check if metadata extraction is supported
    if (metadata == null)
    {
        Console.WriteLine("Metadata extraction isn't supported.");
        return;
    }
    // Print metadata
    foreach (MetadataItem item in metadata)
    {
        Console.WriteLine(string.Format("{0} = {1}", item.Name, item.Value));
    }
}

Structure

Old coding style

// Create an extractor factory
ExtractorFactory factory = new ExtractorFactory();
// Create a text extractor
using (TextExtractor extractor = factory.CreateTextExtractor(filePath))
{
    // Check if the extractor supports text structure extraction
    IStructuredExtractor se = extractor as IStructuredExtractor;
    if (se != null)
    {
        // Create a handler
        Handler handler = new Handler();
        // Extract text structure
        se.ExtractStructured(handler);
        // Print hyperlinks
        foreach (string link in handler.Links)
        {
            Console.WriteLine(link);
        }
    }
}

// Handler for the hyperlink extraction
private class Handler : StructuredHandler
{
    public Handler()
    {
        Links = new List<string>();
    }
    public List<string> Links { get; private set; }
    // Override the method to catch hyperlinks
    protected override void OnStartHyperlink(HyperlinkProperties properties)
    {
        Links.Add(properties.Link);
    }
}

New coding style

// Create an instance of Parser class
using (Parser parser = new Parser(filePath))
{
    // Extract text structure to the XML reader
    using (XmlReader reader = parser.GetStructure())
    {
        // Check if text structure extraction is supported
        if (reader == null)
        {
            Console.WriteLine("Text structure extraction isn't supported.");
            return;
        }
        // Read the XML document to search hyperlinks
        while (reader.Read())
        {
            // Check if this is a start element with "hyperlink" name
            if (reader.NodeType == XmlNodeType.Element && reader.IsStartElement() && reader.Name.ToLowerInvariant() == "hyperlink")
            {
                // Extract "link" attribute
                string value = reader.GetAttribute("link");
                if (value != null)
                {
                    Console.WriteLine(value);
                }
            }
        }
    }
}

We value your opinion. Your feedback will help us improve our documentation.

Migration notes Leave feedback

Why To Migrate?

How To Migrate?

Text

Text Page

Search

File Type Detection

Metadata

Structure

Was this page helpful?

Any additional feedback you'd like to share with us?

Please tell us how we can improve this page.

Thank you for your feedback!