Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

To extract images from the page getImageAreas methods are used:

Code Block
titleC#Java
languagecsharpjava
class DocumentContent {
  public IList<ImageArea> getImageAreas(int pageIndex);
  public IList<ImageArea> getImageAreas(int pageIndex, ImageAreaSearchOptions searchOptions);
}

...

To extract images from the page GetImageAreasgetImageAreas methods are used:

Code Block
titleJava
languagejava
private static void extractImages() throws java.io.IOException {
    // Create a text extractor
    PdfTextExtractor extractor = new PdfTextExtractor("cv.pdf");
 
    // Create search options
    ImageAreaSearchOptions searchOptions = new ImageAreaSearchOptions();
    // Limit the search with the rectangle: position (0; 0), size (300; 300)
    searchOptions.setRectangle(new Rectangle(0, 0, 300, 300));
 
    // Get images from the first page
    java.util.List<ImageArea> imageAreas = extractor.getDocumentContent().getImageAreas(0, searchOptions);
 
    // Iterate over the images
    for (int i = 0; i < imageAreas.size(); i++) {
        try (java.io.OutputStream fs = new java.io.FileOutputStream(String.format("%d.jpg", i))) {
            // Save the image to the file
            copyStream(imageAreas.get(i).getRawStream(), fs);
        }
    }
}
 
private static void copyStream(java.io.InputStream source, java.io.OutputStream dest) throws java.io.IOException {
    byte[] buffer = new byte[4096];
 
    int r = 0;
    do {
        r = source.read(buffer);
        if (r > 0) {
            dest.write(buffer);
        }
    }
    while (r > 0);
}

...

To extract images from the sheet GetImageAreasgetImageAreas methods are used:

Code Block
titleJava
languagejava
private static void extractImages() throws java.io.IOException {
    // Create a text extractor
    CellsTextExtractor extractor = new CellsTextExtractor("catalog.xlsx");
 
    // Create search options
    ImageAreaSearchOptions searchOptions = new ImageAreaSearchOptions();
    // Limit the search with the rectangle: position (0; 0), size (300; 300)
    searchOptions.setRectangle(new Rectangle(0, 0, 300, 300));
 
    // Get images from the first sheet
    java.util.List<ImageArea> imageAreas = extractor.getDocumentContent().getImageAreas(0, searchOptions);
 
    // Iterate over the images
    for (int i = 0; i < imageAreas.size(); i++) {
        try (java.io.OutputStream fs = new java.io.FileOutputStream(String.format("%d.jpg", i))) {
            // Save the image to the file
            copyStream(imageAreas.get(i).getRawStream(), fs);
        }
    }
}
 
private static void copyStream(java.io.InputStream source, java.io.OutputStream dest) throws java.io.IOException {
    byte[] buffer = new byte[4096];
 
    int r = 0;
    do {
        r = source.read(buffer);
        if (r > 0) {
            dest.write(buffer);
        }
    }
    while (r > 0);
}

...

To extract images from the page GetImageAreasgetImageAreas methods are used:

Code Block
titleJava
languagejava
private static void extractImages() throws java.io.IOException {
    // Create a text extractor
    WordsTextExtractor extractor = new WordsTextExtractor("cv.docx");
 
    // Create search options
    ImageAreaSearchOptions searchOptions = new ImageAreaSearchOptions();
    // Limit the search with the rectangle: position (0; 0), size (300; 300)
    searchOptions.setRectangle(new Rectangle(0, 0, 300, 300));
 
    // Get images from the first page
    java.util.List<ImageArea> imageAreas = extractor.getDocumentContent().getImageAreas(0, searchOptions);
 
    // Iterate over the images
    for (int i = 0; i < imageAreas.size(); i++) {
        try (java.io.OutputStream fs = new java.io.FileOutputStream(String.format("%d.jpg", i))) {
            // Save the image to the file
            copyStream(imageAreas.get(i).getRawStream(), fs);
        }
    }
}
 
private static void copyStream(java.io.InputStream source, java.io.OutputStream dest) throws java.io.IOException {
    byte[] buffer = new byte[4096];
 
    int r = 0;
    do {
        r = source.read(buffer);
        if (r > 0) {
            dest.write(buffer);
        }
    }
    while (r > 0);
}

...

To extract images from the slide GetImageAreasgetImageAreas methods are used:

Code Block
titleJava
languagejava
private static void extractImages() throws java.io.IOException {
    // Create a text extractor
    SlidesTextExtractor extractor = new SlidesTextExtractor("presentation.pptx");
 
    // Create search options
    ImageAreaSearchOptions searchOptions = new ImageAreaSearchOptions();
    // Limit the search with the rectangle: position (0; 0), size (300; 300)
    searchOptions.setRectangle(new Rectangle(0, 0, 300, 300));
 
    // Get images from the first slide
    java.util.List<ImageArea> imageAreas = extractor.getDocumentContent().getImageAreas(0, searchOptions);
 
    // Iterate over the images
    for (int i = 0; i < imageAreas.size(); i++) {
        try (java.io.OutputStream fs = new java.io.FileOutputStream(String.format("%d.jpg", i))) {
            // Save the image to the file
            copyStream(imageAreas.get(i).getRawStream(), fs);
        }
    }
}
 
private static void copyStream(java.io.InputStream source, java.io.OutputStream dest) throws java.io.IOException {
    byte[] buffer = new byte[4096];
 
    int r = 0;
    do {
        r = source.read(buffer);
        if (r > 0) {
            dest.write(buffer);
        }
    }
    while (r > 0);
}