feat(compliance): implement compliance verfication for get info on PDF (#5435)

# Description of Changes


This PR replaces the legacy, fragile PDF/A validation logic (which
relied on manual regex parsing and PDFBox Preflight) with the robust
**VeraPDF** integration. Additionally, it introduces a new check for
**SEC/EDGAR compliance** and refactors the frontend report view to
display detailed verification results.

### Key Changes

**Backend (`GetInfoOnPDF.java`):**

* **VeraPDF Integration:** Replaced the custom `checkForStandard` and
`validatePdfAWithPreflight` methods with `VeraPDFService.validatePDF()`.
This aligns validation with industry standards.
* **Code Cleanup:** Removed approximately ~200 lines of technical debt,
including raw XML regex parsing and file-locking prone `PreflightParser`
logic.
* **SEC Compliance:** Added `isSECCompliant()` logic to validate
documents against typical EDGAR requirements:
* No Encryption.
* No Active Content (JavaScript).
* No External Links.
* No Embedded Files.
* No AcroForms.



**Frontend (`GetPdfInfo`):**

* **New Component:** Created `ComplianceSection.tsx` to handle the
complexity of compliance reporting.
* **Enhanced UI:**
* Added visual badges (Passed/Failed/Not Detected) using
`CheckIcon`/`CloseIcon`.
* Added support for displaying the detailed VeraPDF summary alongside
legacy boolean flags.
* Improved handling of "Not PDF/A" states.


### Motivation

The previous validation logic was prone to false positives/negatives and
memory issues with large files (due to Preflight). Moving to VeraPDF
provides accurate, profile-based validation (e.g., PDF/A-1b, 2b, etc.).
The SEC check satisfies a growing need for users validating documents
for financial filings.



<!--
Please provide a summary of the changes, including:

- What was changed
- Why the change was made
- Any challenges encountered

Closes #(issue_number)
-->

---

## Checklist

### General

- [X] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [X] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md)
(if applicable)
- [X] I have performed a self-review of my own code
- [X] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### Translations (if applicable)

- [ ] I ran
[`scripts/counter_translation.py`](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/docs/counter_translation.md)

### UI Changes (if applicable)

- [X] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [X] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing)
for more details.

---------

Signed-off-by: Balázs Szücs <bszucs1209@gmail.com>
Signed-off-by: brios <brios@heim-041-30.jkh.uni-linz.ac.at>
This commit is contained in:
Balázs Szücs
2026-01-23 22:36:35 +01:00
committed by GitHub
parent 0b86dd79d3
commit 188408fc1e
7 changed files with 637 additions and 329 deletions

View File

@@ -3190,6 +3190,18 @@ other = "Other"
perPageInfo = "Per Page Info"
tableOfContents = "Table of Contents"
[getPdfInfo.compliance]
notDetected = "Not Detected"
passed = "Passed"
failed = "Failed"
compliant = "Compliant"
nonCompliant = "Non-Compliant"
none = "No standards detected"
passedCount = "passed"
failedCount = "failed"
noVerification = "No Verification Performed"
noVerificationDesc = "PDF standards compliance was not verified for this document."
[getPdfInfo.other]
attachments = "Attachments"
embeddedFiles = "Embedded Files"
@@ -3221,6 +3233,8 @@ permsRestricted = "{{count}} restrictions"
permsMixed = "Some permissions restricted"
hasCompliance = "Has compliance standards"
noCompliance = "No Compliance Standards"
compliancePassed = "{{standards}} compliant"
complianceChecked = "Standards verified ({{failed}} failed)"
basic = "Basic Information"
documentInfo = "Document Information"
securityTitle = "Security Status"

View File

@@ -13,6 +13,7 @@ import KeyValueSection from '@app/components/tools/getPdfInfo/sections/KeyValueS
import TableOfContentsSection from '@app/components/tools/getPdfInfo/sections/TableOfContentsSection';
import OtherSection from '@app/components/tools/getPdfInfo/sections/OtherSection';
import PerPageSection from '@app/components/tools/getPdfInfo/sections/PerPageSection';
import ComplianceSection from '@app/components/tools/getPdfInfo/sections/ComplianceSection';
/** Valid section anchor IDs for navigation */
@@ -105,7 +106,11 @@ const GetPdfInfoReportView: React.FC<GetPdfInfoReportViewProps> = ({ data }) =>
<KeyValueSection title={t('getPdfInfo.sections.documentInfo', 'Document Info')} anchorId="documentInfo" obj={sections.documentInfo} />
<KeyValueSection title={t('getPdfInfo.sections.compliance', 'Compliance')} anchorId="compliance" obj={sections.compliance} />
<ComplianceSection
anchorId="compliance"
complianceSummary={sections.summaryData?.Compliance}
legacyCompliance={sections.compliance}
/>
<KeyValueSection title={t('getPdfInfo.sections.encryption', 'Encryption')} anchorId="encryption" obj={sections.encryption} />

View File

@@ -0,0 +1,299 @@
import React, { useMemo } from 'react';
import { Badge, Group, Stack, Text, ThemeIcon, Paper, Tooltip, Divider } from '@mantine/core';
import CheckIcon from '@mui/icons-material/Check';
import CloseIcon from '@mui/icons-material/Close';
import InfoIcon from '@mui/icons-material/InfoOutlined';
import SectionBlock from '@app/components/tools/getPdfInfo/shared/SectionBlock';
import type { PdfCompliance, PdfComplianceSummary } from '@app/types/getPdfInfo';
import { useTranslation } from 'react-i18next';
interface ComplianceSectionProps {
anchorId: string;
complianceSummary?: PdfComplianceSummary[] | null;
legacyCompliance?: PdfCompliance | null;
}
interface ComplianceCheckResult {
/** Display name for the standard (e.g., "PDF/A-3B", "PDF/UA-1", "SEC (EDGAR)") */
displayName: string;
/** Category for grouping (e.g., "PDF/A", "PDF/UA", "SEC") */
category: string;
/** Whether the PDF is compliant with this standard */
isCompliant: boolean;
/** Human-readable summary from the verification */
summary: string;
/** Original standard identifier from backend */
standardId: string;
/** Sort order for display */
sortOrder: number;
}
const parseStandardDisplayName = (standardId: string): { displayName: string; category: string; sortOrder: number } => {
const id = standardId.toLowerCase().trim();
// PDF/A variants: pdfa-1a, pdfa-1b, pdfa-2a, pdfa-2b, pdfa-2u, pdfa-3a, pdfa-3b, pdfa-3u, pdfa-4, etc.
const pdfaMatch = id.match(/^pdf[_-]?a[_-]?(\d+)([abuf])?$/i);
if (pdfaMatch) {
const version = pdfaMatch[1];
const level = pdfaMatch[2]?.toUpperCase() || '';
return {
displayName: `PDF/A-${version}${level}`,
category: 'PDF/A',
sortOrder: 10 + parseInt(version) * 10 + (level === 'A' ? 1 : level === 'B' ? 2 : level === 'U' ? 3 : 0),
};
}
// PDF/UA variants: pdfua-1, pdfua-2, etc.
const pdfuaMatch = id.match(/^pdf[_-]?ua[_-]?(\d+)?$/i);
if (pdfuaMatch) {
const version = pdfuaMatch[1] || '1';
return {
displayName: `PDF/UA-${version}`,
category: 'PDF/UA',
sortOrder: 200 + parseInt(version),
};
}
// PDF/X variants
const pdfxMatch = id.match(/^pdf[_-]?x[_-]?(.+)?$/i);
if (pdfxMatch) {
const version = pdfxMatch[1]?.toUpperCase() || '';
return {
displayName: `PDF/X${version ? `-${version}` : ''}`,
category: 'PDF/X',
sortOrder: 300,
};
}
// PDF/E variants
const pdfeMatch = id.match(/^pdf[_-]?e[_-]?(.+)?$/i);
if (pdfeMatch) {
const version = pdfeMatch[1]?.toUpperCase() || '';
return {
displayName: `PDF/E${version ? `-${version}` : ''}`,
category: 'PDF/E',
sortOrder: 400,
};
}
// PDF/VT
if (id.includes('pdfvt') || id.includes('pdf-vt') || id.includes('pdf_vt')) {
return { displayName: 'PDF/VT', category: 'PDF/VT', sortOrder: 500 };
}
// SEC (EDGAR) compliance
if (id.includes('sec') || id.includes('edgar')) {
return { displayName: 'SEC (EDGAR)', category: 'SEC', sortOrder: 600 };
}
// Not PDF/A indicator
if (id === 'not-pdfa' || id === 'not_pdfa') {
return { displayName: 'PDF/A Detection', category: 'Detection', sortOrder: 1 };
}
// Fallback: capitalize and format
return {
displayName: standardId.toUpperCase().replace(/[-_]/g, '/'),
category: 'Other',
sortOrder: 999,
};
};
const buildComplianceResults = (
complianceSummary?: PdfComplianceSummary[] | null,
legacyCompliance?: PdfCompliance | null
): ComplianceCheckResult[] => {
const results: ComplianceCheckResult[] = [];
const processedCategories = new Set<string>();
if (complianceSummary && complianceSummary.length > 0) {
for (const item of complianceSummary) {
// Skip the "not-pdfa" detection marker - it's informational, not a compliance check
if (item.Standard.toLowerCase() === 'not-pdfa') {
continue;
}
const { displayName, category, sortOrder } = parseStandardDisplayName(item.Standard);
processedCategories.add(category);
results.push({
displayName,
category,
isCompliant: item.Compliant,
summary: item.Summary,
standardId: item.Standard,
sortOrder,
});
}
}
// Then, add SEC compliance from legacy data if not already present
// SEC compliance is checked separately by PDFBox, not VeraPDF
if (legacyCompliance && 'IsPDF/SECCompliant' in legacyCompliance && !processedCategories.has('SEC')) {
const isSecCompliant = legacyCompliance['IsPDF/SECCompliant'] as boolean;
results.push({
displayName: 'SEC (EDGAR)',
category: 'SEC',
isCompliant: isSecCompliant,
summary: isSecCompliant
? 'Document meets SEC EDGAR filing requirements'
: 'Document does not meet SEC EDGAR filing requirements',
standardId: 'sec-edgar',
sortOrder: 600,
});
}
// Sort by sortOrder for consistent display
results.sort((a, b) => a.sortOrder - b.sortOrder);
return results;
};
const getConformanceLevel = (results: ComplianceCheckResult[]): string | null => {
const passingPdfA = results
.filter(r => r.category === 'PDF/A' && r.isCompliant)
.sort((a, b) => b.sortOrder - a.sortOrder);
if (passingPdfA.length > 0) {
return passingPdfA[0].displayName;
}
return null;
};
const ComplianceRow: React.FC<{
result: ComplianceCheckResult;
}> = ({ result }) => {
const { t } = useTranslation();
const Icon = result.isCompliant ? CheckIcon : CloseIcon;
const color = result.isCompliant ? 'teal' : 'red';
const statusText = result.isCompliant
? t('getPdfInfo.compliance.passed', 'Passed')
: t('getPdfInfo.compliance.failed', 'Failed');
return (
<Paper
p="sm"
radius="sm"
withBorder
style={{
borderColor: `var(--mantine-color-${color}-6)`,
}}
>
<Group justify="space-between" wrap="nowrap">
<Group gap="sm" wrap="nowrap" style={{ minWidth: 0 }}>
<ThemeIcon color={color} variant="light" size="lg" radius="xl">
<Icon style={{ fontSize: '1.2rem' }} />
</ThemeIcon>
<Stack gap={2} style={{ minWidth: 0 }}>
<Text size="sm" fw={600} truncate>
{result.displayName}
</Text>
<Tooltip label={result.summary} multiline maw={400} withArrow>
<Text size="xs" c="dimmed" lineClamp={1} style={{ cursor: 'help' }}>
{result.summary}
</Text>
</Tooltip>
</Stack>
</Group>
<Badge
color={color}
variant="light"
size="md"
leftSection={<Icon style={{ width: 12, height: 12 }} />}
>
{statusText}
</Badge>
</Group>
</Paper>
);
};
const EmptyComplianceState: React.FC = () => {
const { t } = useTranslation();
return (
<Paper p="md" radius="sm" withBorder>
<Group gap="sm">
<ThemeIcon color="gray" variant="light" size="lg" radius="xl">
<InfoIcon style={{ fontSize: '1.2rem' }} />
</ThemeIcon>
<Stack gap={2}>
<Text size="sm" fw={500}>
{t('getPdfInfo.compliance.noVerification', 'No Verification Performed')}
</Text>
<Text size="xs" c="dimmed">
{t('getPdfInfo.compliance.noVerificationDesc', 'PDF standards compliance was not verified for this document.')}
</Text>
</Stack>
</Group>
</Paper>
);
};
const ComplianceSection: React.FC<ComplianceSectionProps> = ({
anchorId,
complianceSummary,
legacyCompliance,
}) => {
const { t } = useTranslation();
const complianceResults = useMemo(
() => buildComplianceResults(complianceSummary, legacyCompliance),
[complianceSummary, legacyCompliance]
);
const conformanceLevel = useMemo(
() => getConformanceLevel(complianceResults),
[complianceResults]
);
const passedCount = complianceResults.filter(r => r.isCompliant).length;
const failedCount = complianceResults.filter(r => !r.isCompliant).length;
const hasResults = complianceResults.length > 0;
return (
<SectionBlock title={t('getPdfInfo.sections.compliance', 'Compliance')} anchorId={anchorId}>
<Stack gap="md">
{/* Summary header when there are results */}
{hasResults && (
<>
<Group justify="space-between" wrap="wrap" gap="xs">
<Group gap="xs">
{conformanceLevel && (
<Badge color="cyan" variant="light" size="lg">
{conformanceLevel}
</Badge>
)}
{passedCount > 0 && (
<Badge color="teal" variant="outline" size="sm">
{passedCount} {t('getPdfInfo.compliance.passedCount', 'passed')}
</Badge>
)}
{failedCount > 0 && (
<Badge color="red" variant="outline" size="sm">
{failedCount} {t('getPdfInfo.compliance.failedCount', 'failed')}
</Badge>
)}
</Group>
</Group>
<Divider />
</>
)}
{/* Compliance results list */}
{hasResults ? (
<Stack gap="xs">
{complianceResults.map((result, index) => (
<ComplianceRow key={`${result.standardId}-${index}`} result={result} />
))}
</Stack>
) : (
<EmptyComplianceState />
)}
</Stack>
</SectionBlock>
);
};
export default ComplianceSection;

View File

@@ -1,7 +1,7 @@
import React, { useMemo } from 'react';
import { Stack, Text } from '@mantine/core';
import { useTranslation } from 'react-i18next';
import type { ParsedPdfSections, PdfFontInfo } from '@app/types/getPdfInfo';
import type { ParsedPdfSections, PdfFontInfo, PdfComplianceSummary } from '@app/types/getPdfInfo';
import SectionBlock from '@app/components/tools/getPdfInfo/shared/SectionBlock';
import KeyValueList from '@app/components/tools/getPdfInfo/shared/KeyValueList';
@@ -10,6 +10,52 @@ interface SummarySectionProps {
hideSectionTitle?: boolean;
}
/**
* Get a summary of compliance status from VeraPDF results.
* Uses the authoritative complianceSummary data when available.
*/
const getComplianceSummaryInfo = (
complianceSummary?: PdfComplianceSummary[] | null
): { hasCompliance: boolean; passedStandards: string[]; failedCount: number } => {
if (!complianceSummary || complianceSummary.length === 0) {
return { hasCompliance: false, passedStandards: [], failedCount: 0 };
}
// Filter out informational markers like "not-pdfa"
const actualChecks = complianceSummary.filter(
item => item.Standard.toLowerCase() !== 'not-pdfa'
);
if (actualChecks.length === 0) {
return { hasCompliance: false, passedStandards: [], failedCount: 0 };
}
const passedStandards = actualChecks
.filter(item => item.Compliant)
.map(item => {
// Format standard ID to display name (e.g., "pdfa-3b" -> "PDF/A-3B")
const id = item.Standard.toLowerCase();
const pdfaMatch = id.match(/^pdf[_-]?a[_-]?(\d+)([abuf])?$/i);
if (pdfaMatch) {
return `PDF/A-${pdfaMatch[1]}${pdfaMatch[2]?.toUpperCase() || ''}`;
}
const pdfuaMatch = id.match(/^pdf[_-]?ua[_-]?(\d+)?$/i);
if (pdfuaMatch) {
return `PDF/UA-${pdfuaMatch[1] || '1'}`;
}
return item.Standard.toUpperCase();
});
const failedCount = actualChecks.filter(item => !item.Compliant).length;
return { hasCompliance: true, passedStandards, failedCount };
};
interface SummarySectionProps {
sections: ParsedPdfSections;
hideSectionTitle?: boolean;
}
const SummarySection: React.FC<SummarySectionProps> = ({ sections, hideSectionTitle = false }) => {
const { t } = useTranslation();
@@ -54,8 +100,16 @@ const SummarySection: React.FC<SummarySectionProps> = ({ sections, hideSectionTi
? t('getPdfInfo.summary.permsRestricted', '{{count}} restrictions', { count: restrictedCount })
: t('getPdfInfo.summary.permsMixed', 'Some permissions restricted');
const complianceText = sections.compliance && Object.values(sections.compliance).some(Boolean)
? t('getPdfInfo.summary.hasCompliance', 'Has compliance standards')
// Use authoritative VeraPDF results for compliance summary
const complianceInfo = getComplianceSummaryInfo(summary.Compliance);
const complianceText = complianceInfo.hasCompliance
? complianceInfo.passedStandards.length > 0
? t('getPdfInfo.summary.compliancePassed', '{{standards}} compliant', {
standards: complianceInfo.passedStandards.join(', ')
})
: t('getPdfInfo.summary.complianceChecked', 'Standards verified ({{failed}} failed)', {
failed: complianceInfo.failedCount
})
: t('getPdfInfo.summary.noCompliance', 'No Compliance Standards');
// Helper to get first page data

View File

@@ -55,16 +55,16 @@ export interface PdfPermissions {
/** Compliance section */
export interface PdfCompliance {
'IsPDF/ACompliant'?: boolean;
'PDF/AConformanceLevel'?: string;
'IsPDF/AValidated'?: boolean;
'IsPDF/XCompliant'?: boolean;
'IsPDF/ECompliant'?: boolean;
'IsPDF/VTCompliant'?: boolean;
'IsPDF/UACompliant'?: boolean;
'IsPDF/BCompliant'?: boolean;
'IsPDF/SECCompliant'?: boolean;
[key: string]: unknown;
"IsPDF/ACompliant"?: boolean;
"PDF/AConformanceLevel"?: string;
"IsPDF/UACompliant"?: boolean;
"IsPDF/XCompliant"?: boolean;
"IsPDF/ECompliant"?: boolean;
"IsPDF/VTCompliant"?: boolean;
"IsPDF/BCompliant"?: boolean;
"IsPDF/SECCompliant"?: boolean;
// VeraPDF verified content - keys will be standard IDs like "pdfa-2b", "pdfua-1"
[key: string]: boolean | string | unknown;
}
/** Font info within a page */
@@ -207,14 +207,19 @@ export interface PdfTocEntry {
[key: string]: unknown;
}
/** Compliance summary entry */
export interface PdfComplianceSummary {
Standard: string;
Compliant: boolean;
Summary: string;
}
/** Summary data section */
export interface PdfSummaryData {
encrypted?: boolean;
restrictedPermissions?: string[];
restrictedPermissionsCount?: number;
standardCompliance?: string;
standardPurpose?: string;
standardValidationPassed?: boolean;
Compliance?: PdfComplianceSummary[];
}
/** Form fields section */