GroupDocs.Search allows indexing documents from various sources:
From files in the file system.
From a stream.
From a data structure as an array of fields.
The library also allows indexing from all presented sources with lazy initialization.
Please note that the update operation automatically generates a list of changed files only when indexing from the local file system. When indexing from streams or structures, documents cannot be updated with the update operation. To update documents from these sources, you must re-index the modified documents by passing their keys and updated data to the Add method.
Indexing from a file
It should be borne in mind that the Add method with the parameter of type Document[] allows indexing only documents individually, and not entire folders. The advantage of using this method overload is that you can add attributes and additional fields to the indexed document before calling the Add method. The following example demonstrates how to index a document from a file.
C#
stringindexFolder=@"c:\MyIndex";stringdocumentFilePath=@"c:\MyDocuments\ExampleDocument.pdf";// Creating an indexIndexindex=newIndex(indexFolder);// Creating a document objectDocumentdocument=Document.CreateFromFile(documentFilePath);Document[]documents=newDocument[]{document,};// Indexing document from the fileIndexingOptionsoptions=newIndexingOptions();index.Add(documents,options);
Indexing from a stream
The following example demonstrates how to index a document from a stream.
C#
stringindexFolder=@"c:\MyIndex";stringdocumentFilePath=@"c:\MyDocuments\ExampleDocument.pdf";// Creating an indexIndexindex=newIndex(indexFolder);// Creating a document objectStreamstream=File.OpenRead(documentFilePath);// Opening a streamDocumentdocument=Document.CreateFromStream(documentFilePath,DateTime.Now,".pdf",stream);Document[]documents=newDocument[]{document,};// Indexing document from the streamIndexingOptionsoptions=newIndexingOptions();index.Add(documents,options);// Closing the document stream after indexing is completestream.Close();
Indexing from a structure
The following example demonstrates how to index a document from a structure.
C#
stringindexFolder=@"c:\MyIndex";stringdocumentFilePath=@"c:\MyDocuments\ExampleDocument.txt";// Creating an indexIndexindex=newIndex(indexFolder);// Creating a document objectstringtext=File.ReadAllText(documentFilePath);DocumentField[]fields=newDocumentField[]{newDocumentField(CommonFieldNames.Content,text),};Documentdocument=Document.CreateFromStructure("ExampleDocument",DateTime.Now,fields);Document[]documents=newDocument[]{document,};// Indexing document from the structureIndexingOptionsoptions=newIndexingOptions();index.Add(documents,options);
Indexing from URL
The following example demonstrates how to index a document by URL when lazy initialized.
C#
privateclassDocumentLoaderFromUrl:IDocumentLoader{privatereadonlystringdocumentKey;privatereadonlystringurl;privatereadonlystringextension;publicDocumentLoaderFromUrl(stringdocumentKey,stringurl,stringextension){this.documentKey=documentKey;this.url=url;this.extension=extension;}publicDocumentLoadDocument(){ServicePointManager.Expect100Continue=true;ServicePointManager.SecurityProtocol=SecurityProtocolType.Ssl3|SecurityProtocolType.Tls|SecurityProtocolType.Tls12|SecurityProtocolType.Tls11;WebRequestrequest=WebRequest.Create(url);using(WebResponseresponse=request.GetResponse())using(Streamstream=response.GetResponseStream()){MemoryStreammemoryStream=newMemoryStream();stream.CopyTo(memoryStream);memoryStream.Position=0;Documentdocument=Document.CreateFromStream(documentKey,DateTime.Now,extension,memoryStream);returndocument;}}publicvoidCloseDocument(){}}stringindexFolder=@"c:\MyIndex";stringurl="http://example.com/ExampleDocument.pdf";// Creating an indexIndexindex=newIndex(indexFolder);// Creating a document objectstringdocumentKey=url;IDocumentLoaderdocumentLoader=newDocumentLoaderFromUrl(documentKey,url,".pdf");Documentdocument=Document.CreateLazy(DocumentSourceKind.Stream,documentKey,documentLoader);Document[]documents=newDocument[]{document,};// Indexing the lazy-loaded documentIndexingOptionsoptions=newIndexingOptions();index.Add(documents,options);
Indexing from FTP
The following example demonstrates how to index a document from FTP when lazy initialized.
C#
privateclassDocumentLoaderFromUrl:IDocumentLoader{privatereadonlystringdocumentKey;privatereadonlystringurl;privatereadonlystringextension;publicDocumentLoaderFromUrl(stringdocumentKey,stringurl,stringextension){this.documentKey=documentKey;this.url=url;this.extension=extension;}publicDocumentLoadDocument(){FtpWebRequestrequest=(FtpWebRequest)WebRequest.Create(url);using(WebResponseresponse=request.GetResponse())using(Streamstream=response.GetResponseStream()){MemoryStreammemoryStream=newMemoryStream();stream.CopyTo(memoryStream);memoryStream.Position=0;Documentdocument=Document.CreateFromStream(documentKey,DateTime.Now,extension,memoryStream);returndocument;}}publicvoidCloseDocument(){}}stringindexFolder=@"c:\MyIndex";stringurl="ftp://example.com/ExampleDocument.pdf";// Creating an indexIndexindex=newIndex(indexFolder);// Creating a document objectstringdocumentKey=url;IDocumentLoaderdocumentLoader=newDocumentLoaderFromUrl(documentKey,url,".pdf");Documentdocument=Document.CreateLazy(DocumentSourceKind.Stream,documentKey,documentLoader);Document[]documents=newDocument[]{document,};// Indexing the lazy-loaded documentIndexingOptionsoptions=newIndexingOptions();index.Add(documents,options);
Indexing from Amazon S3 Storage
The following example demonstrates how to index a document from Amazon S3 Storage when lazy initialized.
C#
privateclassDocumentLoaderFromAmazon:IDocumentLoader{privatereadonlystringdocumentKey;privatereadonlystringextension;privatereadonlystringstorageKey;publicDocumentLoaderFromAmazon(stringdocumentKey,stringextension,stringstorageKey){this.documentKey=documentKey;this.extension=extension;this.storageKey=storageKey;}publicDocumentLoadDocument(){AmazonS3Clientclient=newAmazonS3Client();stringbucketName="my-bucket";GetObjectRequestrequest=newGetObjectRequest{Key=storageKey,BucketName=bucketName,};using(GetObjectResponseresponse=client.GetObject(request)){MemoryStreammemoryStream=newMemoryStream();response.ResponseStream.CopyTo(memoryStream);memoryStream.Position=0;Documentdocument=Document.CreateFromStream(documentKey,DateTime.Now,extension,memoryStream);returndocument;}}publicvoidCloseDocument(){}}stringindexFolder=@"c:\MyIndex";stringkey="example.pdf";// Creating an indexIndexindex=newIndex(indexFolder);// Creating a document objectIDocumentLoaderdocumentLoader=newDocumentLoaderFromAmazon("documentKey",".pdf",key);Documentdocument=Document.CreateLazy(DocumentSourceKind.Stream,"documentKey",documentLoader);Document[]documents=newDocument[]{document,};// Indexing the lazy-loaded documentIndexingOptionsoptions=newIndexingOptions();index.Add(documents,options);
Indexing from Azure Blob Storage
The following example demonstrates how to index a document from Azure Blob Storage when lazy initialized.
C#
privateclassDocumentLoaderFromAzure:IDocumentLoader{privatereadonlystringdocumentKey;privatereadonlystringextension;privatereadonlystringblobName;publicDocumentLoaderFromAzure(stringdocumentKey,stringextension,stringblobName){this.documentKey=documentKey;this.extension=extension;this.blobName=blobName;}publicDocumentLoadDocument(){stringaccountName="***";stringaccountKey="***";stringendpoint=$"https://{accountName}.blob.core.windows.net/";stringcontainerName="***";StorageCredentialsstorageCredentials=newStorageCredentials(accountName,accountKey);CloudStorageAccountcloudStorageAccount=newCloudStorageAccount(storageCredentials,newUri(endpoint),null,null,null);CloudBlobClientcloudBlobClient=cloudStorageAccount.CreateCloudBlobClient();CloudBlobContainercontainer=cloudBlobClient.GetContainerReference(containerName);container.CreateIfNotExists();CloudBlobblob=container.GetBlobReference(blobName);MemoryStreammemoryStream=newMemoryStream();blob.DownloadToStream(memoryStream);memoryStream.Position=0;Documentdocument=Document.CreateFromStream(documentKey,DateTime.Now,extension,memoryStream);returndocument;}publicvoidCloseDocument(){}}stringindexFolder=@"c:\MyIndex";stringblobName="example.pdf";// Creating an indexIndexindex=newIndex(indexFolder);// Creating a document objectIDocumentLoaderdocumentLoader=newDocumentLoaderFromAzure("documentKey",".pdf",blobName);Documentdocument=Document.CreateLazy(DocumentSourceKind.Stream,"documentKey",documentLoader);Document[]documents=newDocument[]{document,};// Indexing the lazy-loaded documentIndexingOptionsoptions=newIndexingOptions();index.Add(documents,options);
More resources
GitHub examples
You may easily run the code from documentation articles and see the features in action in our GitHub examples: