Here are the key reasons to use the new updated API provided by GroupDocs.Parser for .NET since version 19.8:
Parser class is introduced as a single entry point to extract data from the document.
Data extraction was unified for all data types.
The overall document related classes were unified to common.
Product architecture was redesigned from scratch in order to simplify passing options and classes to manipulate data.
Document information and preview generation procedures were simplified.
How To Migrate?
Here is brief comparison of how to extract data using the old and new API.
Text
Old coding style
// Create an extractor factoryExtractorFactoryfactory=newExtractorFactory();// Create a text extractorusing(TextExtractorextractor=factory.CreateTextExtractor(filePath)){// Extract a text from the text extractorstringtextLine=null;do{textLine=extractor.ExtractLine();if(textLine!=null){Console.WriteLine(textLine);}}while(textLine!=null);}
New coding style
// Create an instance of Parser classusing(Parserparser=newParser(filePath)){// Extract a text to the readerusing(TextReaderreader=parser.GetText()){// Check if text extraction is supportedif(reader==null){Console.WriteLine("Text extraction isn't supported.");return;}// Extract a text from the readerstringtextLine=null;do{textLine=reader.ReadLine();if(textLine!=null){Console.WriteLine(textLine);}}while(textLine!=null);}}
Text Page
Old coding style
// Create an extractor factoryExtractorFactoryfactory=newExtractorFactory();// Create a text extractorusing(TextExtractorextractor=factory.CreateTextExtractor(filePath)){// Check if the extractor supports paginationIPageTextExtractorpte=extractorasIPageTextExtractor;if(pte!=null){// Extract the first pageConsole.WriteLine(pte.ExtractPage(0));}}
New coding style
// Create an instance of Parser classusing(Parserparser=newParser(filePath)){// Extract the first page text to the readerusing(TextReaderreader=parser.GetText(0)){// Check if text extraction is supportedif(reader!=null){// Extract a text from the readerConsole.WriteLine(reader.ReadToEnd());}}}
Search
Old coding style
// Create an extractor factoryExtractorFactoryfactory=newExtractorFactory();// Create a text extractorusing(TextExtractorextractor=factory.CreateTextExtractor(filePath)){// Check if the extractor supports searchISearchablese=extractorasISearchable;if(se!=null){// Create a handlerListSearchHandlerhandler=newListSearchHandler();// Search "keyword" in the documentse.Search(newSearchOptions(null),handler,newstring[]{"keyword"});// Print search resultsforeach(SearchResultresultinhandler.List){Console.WriteLine(string.Format("at {0}: {1}",result.Index,result.FoundText));}}}
New coding style
// Create an instance of Parser classusing(Parserparser=newParser(filePath)){// Search "keyword" in the documentIEnumerable<SearchResult>list=parser.Search("keyword");// Check if search is supportedif(list==null){Console.WriteLine("Search isn't supported.");return;}// Print search resultsforeach(SearchResultresultinlist){Console.WriteLine(string.Format("at {0}: {1}",result.Position,result.Text));}}
File Type Detection
Old coding style
// Detect and print file typeConsole.WriteLine(CompositeMediaTypeDetector.Default.Detect(filePath));
New coding style
// Create an instance of Parser classusing(Parserparser=newParser(filePath)){// Detect and print file typeConsole.WriteLine(parser.GetDocumentInfo().FileType);}
Metadata
Old coding style
// Create an extractor factoryExtractorFactoryfactory=newExtractorFactory();// Create a metadata extractorMetadataExtractorextractor=factory.CreateMetadataExtractor(filePath);// Extract metadataMetadataCollectionmetadata=extractor.ExtractMetadata(filePath);// Print metadataforeach(KeyValuePair<string,string>iteminmetadata){Console.WriteLine(string.Format("{0} = {1}",item.Key,item.Value));}
New coding style
// Create an instance of Parser classusing(Parserparser=newParser(filePath)){// Extract metadataIEnumerable<MetadataItem>metadata=parser.GetMetadata();// Check if metadata extraction is supportedif(metadata==null){Console.WriteLine("Metadata extraction isn't supported.");return;}// Print metadataforeach(MetadataItemiteminmetadata){Console.WriteLine(string.Format("{0} = {1}",item.Name,item.Value));}}
Structure
Old coding style
// Create an extractor factoryExtractorFactoryfactory=newExtractorFactory();// Create a text extractorusing(TextExtractorextractor=factory.CreateTextExtractor(filePath)){// Check if the extractor supports text structure extractionIStructuredExtractorse=extractorasIStructuredExtractor;if(se!=null){// Create a handlerHandlerhandler=newHandler();// Extract text structurese.ExtractStructured(handler);// Print hyperlinksforeach(stringlinkinhandler.Links){Console.WriteLine(link);}}}// Handler for the hyperlink extractionprivateclassHandler:StructuredHandler{publicHandler(){Links=newList<string>();}publicList<string>Links{get;privateset;}// Override the method to catch hyperlinksprotectedoverridevoidOnStartHyperlink(HyperlinkPropertiesproperties){Links.Add(properties.Link);}}
New coding style
// Create an instance of Parser classusing(Parserparser=newParser(filePath)){// Extract text structure to the XML readerusing(XmlReaderreader=parser.GetStructure()){// Check if text structure extraction is supportedif(reader==null){Console.WriteLine("Text structure extraction isn't supported.");return;}// Read the XML document to search hyperlinkswhile(reader.Read()){// Check if this is a start element with "hyperlink" nameif(reader.NodeType==XmlNodeType.Element&&reader.IsStartElement()&&reader.Name.ToLowerInvariant()=="hyperlink"){// Extract "link" attributestringvalue=reader.GetAttribute("link");if(value!=null){Console.WriteLine(value);}}}}}
Was this page helpful?
Any additional feedback you'd like to share with us?
Please tell us how we can improve this page.
Thank you for your feedback!
We value your opinion. Your feedback will help us improve our documentation.