Migration notes
Why To Migrate?
Here are the key reasons to use the new updated API provided by GroupDocs.Parser for .NET since version 19.8:
- Parser class is introduced as a single entry point to extract data from the document.
- Data extraction was unified for all data types.
- The overall document related classes were unified to common.
- Product architecture was redesigned from scratch in order to simplify passing options and classes to manipulate data.
- Document information and preview generation procedures were simplified.
How To Migrate?
Here is brief comparison of how to extract data using the old and new API.
Text
Old coding style
// Create an extractor factory
ExtractorFactory factory = new ExtractorFactory();
// Create a text extractor
using (TextExtractor extractor = factory.CreateTextExtractor(filePath))
{
// Extract a text from the text extractor
string textLine = null;
do
{
textLine = extractor.ExtractLine();
if (textLine != null)
{
Console.WriteLine(textLine);
}
}
while (textLine != null);
}
New coding style
// Create an instance of Parser class
using (Parser parser = new Parser(filePath))
{
// Extract a text to the reader
using (TextReader reader = parser.GetText())
{
// Check if text extraction is supported
if (reader == null)
{
Console.WriteLine("Text extraction isn't supported.");
return;
}
// Extract a text from the reader
string textLine = null;
do
{
textLine = reader.ReadLine();
if (textLine != null)
{
Console.WriteLine(textLine);
}
}
while (textLine != null);
}
}
Text Page
Old coding style
// Create an extractor factory
ExtractorFactory factory = new ExtractorFactory();
// Create a text extractor
using (TextExtractor extractor = factory.CreateTextExtractor(filePath))
{
// Check if the extractor supports pagination
IPageTextExtractor pte = extractor as IPageTextExtractor;
if (pte != null)
{
// Extract the first page
Console.WriteLine(pte.ExtractPage(0));
}
}
New coding style
// Create an instance of Parser class
using (Parser parser = new Parser(filePath))
{
// Extract the first page text to the reader
using (TextReader reader = parser.GetText(0))
{
// Check if text extraction is supported
if (reader != null)
{
// Extract a text from the reader
Console.WriteLine(reader.ReadToEnd());
}
}
}
Search
Old coding style
// Create an extractor factory
ExtractorFactory factory = new ExtractorFactory();
// Create a text extractor
using (TextExtractor extractor = factory.CreateTextExtractor(filePath))
{
// Check if the extractor supports search
ISearchable se = extractor as ISearchable;
if (se != null)
{
// Create a handler
ListSearchHandler handler = new ListSearchHandler();
// Search "keyword" in the document
se.Search(new SearchOptions(null), handler, new string[] { "keyword" });
// Print search results
foreach (SearchResult result in handler.List)
{
Console.WriteLine(string.Format("at {0}: {1}", result.Index, result.FoundText));
}
}
}
New coding style
// Create an instance of Parser class
using (Parser parser = new Parser(filePath))
{
// Search "keyword" in the document
IEnumerable<SearchResult> list = parser.Search("keyword");
// Check if search is supported
if (list == null)
{
Console.WriteLine("Search isn't supported.");
return;
}
// Print search results
foreach (SearchResult result in list)
{
Console.WriteLine(string.Format("at {0}: {1}", result.Position, result.Text));
}
}
File Type Detection
Old coding style
// Detect and print file type
Console.WriteLine(CompositeMediaTypeDetector.Default.Detect(filePath));
New coding style
// Create an instance of Parser class
using (Parser parser = new Parser(filePath))
{
// Detect and print file type
Console.WriteLine(parser.GetDocumentInfo().FileType);
}
Metadata
Old coding style
// Create an extractor factory
ExtractorFactory factory = new ExtractorFactory();
// Create a metadata extractor
MetadataExtractor extractor = factory.CreateMetadataExtractor(filePath);
// Extract metadata
MetadataCollection metadata = extractor.ExtractMetadata(filePath);
// Print metadata
foreach (KeyValuePair<string, string> item in metadata)
{
Console.WriteLine(string.Format("{0} = {1}", item.Key, item.Value));
}
New coding style
// Create an instance of Parser class
using (Parser parser = new Parser(filePath))
{
// Extract metadata
IEnumerable<MetadataItem> metadata = parser.GetMetadata();
// Check if metadata extraction is supported
if (metadata == null)
{
Console.WriteLine("Metadata extraction isn't supported.");
return;
}
// Print metadata
foreach (MetadataItem item in metadata)
{
Console.WriteLine(string.Format("{0} = {1}", item.Name, item.Value));
}
}
Structure
Old coding style
// Create an extractor factory
ExtractorFactory factory = new ExtractorFactory();
// Create a text extractor
using (TextExtractor extractor = factory.CreateTextExtractor(filePath))
{
// Check if the extractor supports text structure extraction
IStructuredExtractor se = extractor as IStructuredExtractor;
if (se != null)
{
// Create a handler
Handler handler = new Handler();
// Extract text structure
se.ExtractStructured(handler);
// Print hyperlinks
foreach (string link in handler.Links)
{
Console.WriteLine(link);
}
}
}
// Handler for the hyperlink extraction
private class Handler : StructuredHandler
{
public Handler()
{
Links = new List<string>();
}
public List<string> Links { get; private set; }
// Override the method to catch hyperlinks
protected override void OnStartHyperlink(HyperlinkProperties properties)
{
Links.Add(properties.Link);
}
}
New coding style
// Create an instance of Parser class
using (Parser parser = new Parser(filePath))
{
// Extract text structure to the XML reader
using (XmlReader reader = parser.GetStructure())
{
// Check if text structure extraction is supported
if (reader == null)
{
Console.WriteLine("Text structure extraction isn't supported.");
return;
}
// Read the XML document to search hyperlinks
while (reader.Read())
{
// Check if this is a start element with "hyperlink" name
if (reader.NodeType == XmlNodeType.Element && reader.IsStartElement() && reader.Name.ToLowerInvariant() == "hyperlink")
{
// Extract "link" attribute
string value = reader.GetAttribute("link");
if (value != null)
{
Console.WriteLine(value);
}
}
}
}
}