This implementation is based on Microsoft Azure Computer Vision API. The service is paid, but you can create a free subscription. Once you’ve done with subscription, you will have to create Computer Vision resource using the free pricing tier (F0) to try the service, and upgrade later to a paid tier for production. As a result, you will get Computer Vision Endpoint and Subscription Key (let’s suppose they are stored in the environment variables COMPUTER_VISION_ENDPOINT and COMPUTER_VISION_SUBSCRIPTION_KEY respectively).
Java
importjava.net.URI;importjava.io.InputStream;importjava.util.ArrayList;importjava.awt.Rectangle;importjava.security.SecureRandom;importjava.security.cert.CertificateException;importjava.security.cert.X509Certificate;importjavax.net.ssl.HostnameVerifier;importjavax.net.ssl.SSLContext;importjavax.net.ssl.SSLSession;importjavax.net.ssl.TrustManager;importjavax.net.ssl.X509TrustManager;importorg.apache.http.HttpEntity;importorg.apache.http.HttpResponse;importorg.apache.http.client.methods.HttpPost;importorg.apache.http.entity.ContentType;importorg.apache.http.client.utils.URIBuilder;importorg.apache.http.impl.client.CloseableHttpClient;importorg.apache.http.impl.client.HttpClientBuilder;importorg.apache.http.util.EntityUtils;importorg.apache.http.entity.InputStreamEntity;importorg.apache.http.conn.ssl.SSLConnectionSocketFactory;importorg.bouncycastle.util.Strings;importorg.json.JSONArray;importorg.json.JSONObject;importcom.groupdocs.redaction.integration.IOcrConnector;importcom.groupdocs.redaction.integration.RecognizedImage;importcom.groupdocs.redaction.integration.TextFragment;importcom.groupdocs.redaction.integration.TextLine;publicclassMicrosoftAzureOcrConnectorimplementsIOcrConnector{privatestaticfinalStringOcrUri="vision/v3.1/ocr";privatefinalStringgetSubscriptionKey(){returnSystem.getenv("COMPUTER_VISION_SUBSCRIPTION_KEY");}privatefinalStringgetEndpoint(){returnSystem.getenv("COMPUTER_VISION_ENDPOINT");}privatefinalStringgetUriBase(){returngetEndpoint()+OcrUri;}publicMicrosoftAzureOcrConnector(){}publicfinalRecognizedImagerecognize(InputStreamimageStream){try{SSLConnectionSocketFactorysslSocketFactory=createUnsecureSocketFactory();try(CloseableHttpClienthttpClient=HttpClientBuilder.create().setSSLSocketFactory(sslSocketFactory).build()){URIBuilderuriBuilder=newURIBuilder(getUriBase());uriBuilder.setParameter("language","unk");uriBuilder.setParameter("detectOrientation","true");// Request parameters.
URIuri=uriBuilder.build();HttpPostrequest=newHttpPost(uri);// Request headers.
request.setHeader("Content-Type","application/octet-stream");request.setHeader("Ocp-Apim-Subscription-Key",getSubscriptionKey());request.setHeader("Accept","application/json");// Request body.
InputStreamEntityrequestEntity=newInputStreamEntity(imageStream,ContentType.create("application/octet-stream"));request.setEntity(requestEntity);StringstringResponse=null;try{// Call the REST API method and get the response entity.
HttpResponseresponse=httpClient.execute(request);HttpEntityentity=response.getEntity();if(entity!=null){// Format and display the JSON response.
stringResponse=EntityUtils.toString(entity);System.out.println("REST Response:\n");System.out.println(stringResponse);}}catch(java.lang.Exceptionex){// MS Azure Cognintive services reports 400 Bad requests and other exceptions on small pictures and pictures with no text
System.out.println("Microsoft Azure Cognitive Services consider this image as wrong: "+ex.toString());}if(stringResponse!=null){returncreateDtoFromResponse(newJSONObject(stringResponse));}}}catch(java.lang.Exceptionex){System.out.println("Microsoft Azure Cognitive Services Text Recognition failed: "+ex.toString());}returnnewRecognizedImage(newTextLine[0]);}privateSSLConnectionSocketFactorycreateUnsecureSocketFactory()throwsException{TrustManager[]trustAllCerts=newTrustManager[]{newX509TrustManager(){publicX509Certificate[]getAcceptedIssuers(){returnnull;}publicvoidcheckServerTrusted(X509Certificate[]arg0,Stringarg1)throwsCertificateException{}publicvoidcheckClientTrusted(X509Certificate[]arg0,Stringarg1)throwsCertificateException{}}};SSLContextcontext=SSLContext.getInstance("TLS");context.init(null,trustAllCerts,newSecureRandom());SSLConnectionSocketFactorysslSocketFactory=newSSLConnectionSocketFactory(context,newString[]{"TLSv1.2"},null,newHostnameVerifier(){@Overridepublicbooleanverify(Stringarg0,SSLSessionarg1){returntrue;}});returnsslSocketFactory;}privateRecognizedImagecreateDtoFromResponse(JSONObjectjToken){// Parse json response to extract lines and words with the bounding rectangles.
}...}
The service returns a JSON-serialized with text regions and recognized words, each word with its bounding rectangle.
Along with full featured Java library we provide simple, but powerful free Apps.
You are welcome to perform redactions for various document formats like PDF, DOC, DOCX, PPT, PPTX, XLS, XLSX, Emails and more with our free online Free Online Document Redaction App.
Was this page helpful?
Any additional feedback you'd like to share with us?
Please tell us how we can improve this page.
Thank you for your feedback!
We value your opinion. Your feedback will help us improve our documentation.