Stirling-PDF/frontend/src/core/utils/textDiff.ts
EthanHealy01 a5e2b54274
Feature/v2/compare tool (#4751)
# Description of Changes

- Addition of the compare tool
- 
---

## Checklist

### General

- [ ] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [ ] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md)
(if applicable)
- [ ] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md)
(if applicable)
- [ ] I have performed a self-review of my own code
- [ ] My changes generate no new warnings

### Documentation

- [ ] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### UI Changes (if applicable)

- [ ] Screenshots or videos demonstrating the UI changes are attached
(e.g., as comments or direct attachments in the PR)

### Testing (if applicable)

- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/devGuide/DeveloperGuide.md#6-testing)
for more details.

---------

Co-authored-by: James Brunton <jbrunton96@gmail.com>
2025-11-12 14:54:01 +00:00

51 lines
1.6 KiB
TypeScript

// Shared text diff and normalization utilities for compare tool
export const shouldConcatWithoutSpace = (word: string) => {
return /^[.,!?;:)\]}]/.test(word) || word.startsWith("'") || word === "'s";
};
export const appendWord = (existing: string, word: string) => {
if (!existing) return word;
if (shouldConcatWithoutSpace(word)) return `${existing}${word}`;
return `${existing} ${word}`;
};
export const tokenize = (text: string): string[] => text.split(/\s+/).filter(Boolean);
type TokenType = 'unchanged' | 'removed' | 'added';
export interface LocalToken { type: TokenType; text: string }
const buildLcsMatrix = (a: string[], b: string[]) => {
const rows = a.length + 1;
const cols = b.length + 1;
const m: number[][] = new Array(rows);
for (let i = 0; i < rows; i += 1) m[i] = new Array(cols).fill(0);
for (let i = 1; i < rows; i += 1) {
for (let j = 1; j < cols; j += 1) {
m[i][j] = a[i - 1] === b[j - 1] ? m[i - 1][j - 1] + 1 : Math.max(m[i][j - 1], m[i - 1][j]);
}
}
return m;
};
export const diffWords = (a: string[], b: string[]): LocalToken[] => {
const matrix = buildLcsMatrix(a, b);
const tokens: LocalToken[] = [];
let i = a.length;
let j = b.length;
while (i > 0 || j > 0) {
if (i > 0 && j > 0 && a[i - 1] === b[j - 1]) {
tokens.unshift({ type: 'unchanged', text: a[i - 1] });
i -= 1; j -= 1;
} else if (j > 0 && (i === 0 || matrix[i][j] === matrix[i][j - 1])) {
tokens.unshift({ type: 'added', text: b[j - 1] });
j -= 1;
} else if (i > 0) {
tokens.unshift({ type: 'removed', text: a[i - 1] });
i -= 1;
}
}
return tokens;
};