Content is user-generated and unverified.
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; import org.apache.pdfbox.cos.COSName; import javax.imageio.ImageIO; import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; import java.util.HashSet; import java.util.Set; public class PDFImageExtractor { private static final int MIN_DIMENSION = 150; private int imageCounter = 0; private Set<String> processedImages = new HashSet<>(); public static void main(String[] args) { if (args.length < 1) { System.out.println("Usage: java PDFImageExtractor <path-to-pdf-file>"); System.exit(1); } String pdfPath = args[0]; File pdfFile = new File(pdfPath); if (!pdfFile.exists()) { System.err.println("Error: PDF file not found: " + pdfPath); System.exit(1); } PDFImageExtractor extractor = new PDFImageExtractor(); extractor.extractImages(pdfFile); } public void extractImages(File pdfFile) { try (PDDocument document = PDDocument.load(pdfFile)) { System.out.println("Processing PDF: " + pdfFile.getName()); System.out.println("Total pages: " + document.getNumberOfPages()); String outputDir = pdfFile.getParent(); if (outputDir == null) { outputDir = "."; } String baseName = pdfFile.getName().replaceFirst("[.][^.]+$", ""); int totalPages = document.getNumberOfPages(); int pagesToProcess = Math.max(0, totalPages - 4); for (int pageNum = 0; pageNum < pagesToProcess; pageNum++) { PDPage page = document.getPage(pageNum); processPage(page, outputDir, baseName, pageNum + 1); } System.out.println("\nExtraction complete!"); System.out.println("Total images extracted: " + imageCounter); } catch (IOException e) { System.err.println("Error processing PDF: " + e.getMessage()); e.printStackTrace(); } } private void processPage(PDPage page, String outputDir, String baseName, int pageNum) { try { PDResources resources = page.getResources(); if (resources == null) { return; } processResources(resources, outputDir, baseName, pageNum); } catch (IOException e) { System.err.println("Error processing page " + pageNum + ": " + e.getMessage()); } } private void processResources(PDResources resources, String outputDir, String baseName, int pageNum) throws IOException { for (COSName name : resources.getXObjectNames()) { if (resources.isImageXObject(name)) { PDImageXObject image = (PDImageXObject) resources.getXObject(name); processImage(image, outputDir, baseName, pageNum, name.getName()); } } } private void processImage(PDImageXObject image, String outputDir, String baseName, int pageNum, String imageName) { try { int width = image.getWidth(); int height = image.getHeight(); // Filter: both dimensions must be >= 150px if (width < MIN_DIMENSION || height < MIN_DIMENSION) { return; } // Create a unique identifier for this image to avoid duplicates String imageId = imageName + "_" + width + "x" + height; if (processedImages.contains(imageId)) { return; } processedImages.add(imageId); BufferedImage bImage = image.getImage(); if (bImage == null) { return; } imageCounter++; String fileName = String.format("%s_page%d_img%d_%dx%d.png", baseName, pageNum, imageCounter, width, height); String outputPath = outputDir + File.separator + fileName; File outputFile = new File(outputPath); ImageIO.write(bImage, "PNG", outputFile); System.out.println("Extracted: " + fileName + " (" + width + "x" + height + "px)"); } catch (IOException e) { System.err.println("Error extracting image: " + e.getMessage()); } } }
Content is user-generated and unverified.
    PDF Image Extractor: Java Tool to Extract Images | Claude