mirror of
https://github.com/Frooodle/Stirling-PDF.git
synced 2025-12-18 20:04:17 +01:00
tpye3 text edit init
This commit is contained in:
parent
a6bee1436f
commit
e915e1aa7d
14
.gitignore
vendored
14
.gitignore
vendored
@ -210,3 +210,17 @@ node_modules/
|
||||
test_batch.json
|
||||
*.backup.*.json
|
||||
frontend/public/locales/*/translation.backup*.json
|
||||
|
||||
# Development/build artifacts
|
||||
.gradle-cache/
|
||||
scripts/pdf-collection/
|
||||
**/tmp/
|
||||
*.backup
|
||||
|
||||
# Type3 development data
|
||||
docs/type3/signatures/
|
||||
|
||||
|
||||
# Type3 sample PDFs (development only)
|
||||
**/type3/samples/
|
||||
|
||||
|
||||
@ -183,6 +183,10 @@ stirling:
|
||||
python-command: /opt/venv/bin/python3 # Python interpreter path
|
||||
python-script: /scripts/convert_cff_to_ttf.py # Path to font wrapping script
|
||||
fontforge-command: fontforge # Override if FontForge is installed under a different name/path
|
||||
type3:
|
||||
library:
|
||||
enabled: true # Match common Type3 fonts against the built-in library of converted programs
|
||||
index: classpath:/type3/library/index.json # Override to point at a custom index.json (supports http:, file:, classpath:)
|
||||
|
||||
ui:
|
||||
appNameNavbar: '' # name displayed on the navigation bar
|
||||
|
||||
592
app/core/src/main/resources/type3/catalogue.json
Normal file
592
app/core/src/main/resources/type3/catalogue.json
Normal file
@ -0,0 +1,592 @@
|
||||
[
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "1867"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "1888"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "2029"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "2069"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "2089"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "2116"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSansMono",
|
||||
"encoding": "2174"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans-Oblique",
|
||||
"encoding": "2192"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "2209"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "Cmsy10",
|
||||
"encoding": "2228"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "STIXSizeThreeSym-Regular",
|
||||
"encoding": "2233"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSansDisplay",
|
||||
"encoding": "2239"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "4403"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "4438"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "4519"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "4685"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "4733"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "4782"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "4813"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "4834"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSansMono",
|
||||
"encoding": "4878"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "4906"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "4929"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "4971"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5001"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5030"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5052"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5083"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5116"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5143"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5175"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5207"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5243"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "Cmr10",
|
||||
"encoding": "5263"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "Cmex10",
|
||||
"encoding": "5270"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "Cmsy10",
|
||||
"encoding": "5275"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "Cmmi10",
|
||||
"encoding": "5280"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5295"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans-Oblique",
|
||||
"encoding": "5313"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSansDisplay",
|
||||
"encoding": "5319"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5334"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5370"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5399"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5427"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5459"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5486"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5513"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5554"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5601"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5647"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5694"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5732"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5771"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5803"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5861"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5904"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5924"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "5951"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "6084"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "6445"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "7195"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "7409"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "7474"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "7708"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "7747"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "7885"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "9029"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "9617"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "10460"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "11445"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans-Bold",
|
||||
"encoding": "11486"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "11497"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "11543"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "12280"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "12301"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "12350"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "12372"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "12395"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "12416"
|
||||
},
|
||||
{
|
||||
"source": "01_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "13324"
|
||||
},
|
||||
{
|
||||
"source": "02_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "3214"
|
||||
},
|
||||
{
|
||||
"source": "02_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "3251"
|
||||
},
|
||||
{
|
||||
"source": "02_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "7190"
|
||||
},
|
||||
{
|
||||
"source": "02_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "9937"
|
||||
},
|
||||
{
|
||||
"source": "02_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "10792"
|
||||
},
|
||||
{
|
||||
"source": "02_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "10852"
|
||||
},
|
||||
{
|
||||
"source": "02_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "14712"
|
||||
},
|
||||
{
|
||||
"source": "02_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "18396"
|
||||
},
|
||||
{
|
||||
"source": "02_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "18719"
|
||||
},
|
||||
{
|
||||
"source": "02_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans-Bold",
|
||||
"encoding": "18741"
|
||||
},
|
||||
{
|
||||
"source": "02_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "18778"
|
||||
},
|
||||
{
|
||||
"source": "02_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "18804"
|
||||
},
|
||||
{
|
||||
"source": "02_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "20974"
|
||||
},
|
||||
{
|
||||
"source": "02_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "20993"
|
||||
},
|
||||
{
|
||||
"source": "02_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "21093"
|
||||
},
|
||||
{
|
||||
"source": "02_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "21117"
|
||||
},
|
||||
{
|
||||
"source": "02_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "21141"
|
||||
},
|
||||
{
|
||||
"source": "02_Matplotlib.pdf",
|
||||
"fontName": "DejaVuSans",
|
||||
"encoding": "21174"
|
||||
},
|
||||
{
|
||||
"source": "03_handout-beginner.pdf",
|
||||
"fontName": "BMQQDV+DejaVuSans",
|
||||
"encoding": "17"
|
||||
},
|
||||
{
|
||||
"source": "03_handout-beginner.pdf",
|
||||
"fontName": "EVICAO+DejaVuSans-Bold",
|
||||
"encoding": "133"
|
||||
},
|
||||
{
|
||||
"source": "03_handout-beginner.pdf",
|
||||
"fontName": "BMQQDV+DejaVuSans",
|
||||
"encoding": "152"
|
||||
},
|
||||
{
|
||||
"source": "04_handout-intermediate.pdf",
|
||||
"fontName": "BMQQDV+DejaVuSans",
|
||||
"encoding": "13"
|
||||
},
|
||||
{
|
||||
"source": "04_handout-intermediate.pdf",
|
||||
"fontName": "BMQQDV+DejaVuSans",
|
||||
"encoding": "85"
|
||||
},
|
||||
{
|
||||
"source": "04_handout-intermediate.pdf",
|
||||
"fontName": "BMQQDV+DejaVuSans",
|
||||
"encoding": "104"
|
||||
},
|
||||
{
|
||||
"source": "04_handout-intermediate.pdf",
|
||||
"fontName": "BMQQDV+DejaVuSans",
|
||||
"encoding": "121"
|
||||
},
|
||||
{
|
||||
"source": "04_handout-intermediate.pdf",
|
||||
"fontName": "BMQQDV+DejaVuSans",
|
||||
"encoding": "135"
|
||||
},
|
||||
{
|
||||
"source": "04_handout-intermediate.pdf",
|
||||
"fontName": "BMQQDV+DejaVuSans",
|
||||
"encoding": "159"
|
||||
},
|
||||
{
|
||||
"source": "04_handout-intermediate.pdf",
|
||||
"fontName": "BMQQDV+DejaVuSans",
|
||||
"encoding": "179"
|
||||
},
|
||||
{
|
||||
"source": "04_handout-intermediate.pdf",
|
||||
"fontName": "BMQQDV+DejaVuSans",
|
||||
"encoding": "198"
|
||||
},
|
||||
{
|
||||
"source": "04_handout-intermediate.pdf",
|
||||
"fontName": "NVMZUP+SourceCodePro-Regular",
|
||||
"encoding": "208"
|
||||
},
|
||||
{
|
||||
"source": "04_handout-intermediate.pdf",
|
||||
"fontName": "BMQQDV+DejaVuSans",
|
||||
"encoding": "231"
|
||||
},
|
||||
{
|
||||
"source": "04_handout-intermediate.pdf",
|
||||
"fontName": "NVMZUP+SourceCodePro-Regular",
|
||||
"encoding": "241"
|
||||
},
|
||||
{
|
||||
"source": "07_matplotlib.pdf",
|
||||
"fontName": "SauceCodePowerline-Bold",
|
||||
"encoding": "22"
|
||||
},
|
||||
{
|
||||
"source": "07_matplotlib.pdf",
|
||||
"fontName": "SauceCodePowerline-Regular",
|
||||
"encoding": "47"
|
||||
},
|
||||
{
|
||||
"source": "07_matplotlib.pdf",
|
||||
"fontName": "SauceCodePowerline-Regular",
|
||||
"encoding": "65"
|
||||
},
|
||||
{
|
||||
"source": "07_matplotlib.pdf",
|
||||
"fontName": "SauceCodePowerline-Bold",
|
||||
"encoding": "110"
|
||||
},
|
||||
{
|
||||
"source": "08_matplotlib.pdf",
|
||||
"fontName": "F36",
|
||||
"encoding": "12"
|
||||
},
|
||||
{
|
||||
"source": "08_matplotlib.pdf",
|
||||
"fontName": "F59",
|
||||
"encoding": "42"
|
||||
}
|
||||
]
|
||||
@ -0,0 +1,25 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
|
||||
<html>
|
||||
<head>
|
||||
<title>Index of /sites/ctan.org/fonts/cm/ps-type1/bakoma</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Index of /sites/ctan.org/fonts/cm/ps-type1/bakoma</h1>
|
||||
<pre><img src="/icons/blank.gif" alt="Icon "> <a href="?C=N;O=D">Name</a> <a href="?C=M;O=A">Last modified</a> <a href="?C=S;O=A">Size</a> <a href="?C=D;O=A">Description</a><hr><img src="/icons/back.gif" alt="[PARENTDIR]"> <a href="/sites/ctan.org/fonts/cm/ps-type1/">Parent Directory</a> -
|
||||
<img src="/icons/unknown.gif" alt="[ ]"> <a href="BaKoMa-AMS.Fonts">BaKoMa-AMS.Fonts</a> 2005-06-15 21:10 9.0K
|
||||
<img src="/icons/unknown.gif" alt="[ ]"> <a href="BaKoMa-CM.Fonts">BaKoMa-CM.Fonts</a> 2005-06-15 21:09 9.4K
|
||||
<img src="/icons/unknown.gif" alt="[ ]"> <a href="LICENCE">LICENCE</a> 2005-06-15 22:56 1.4K
|
||||
<img src="/icons/unknown.gif" alt="[ ]"> <a href="README.news">README.news</a> 2005-06-15 23:25 959
|
||||
<img src="/icons/folder.gif" alt="[DIR]"> <a href="afm/">afm/</a> 2009-06-30 22:43 -
|
||||
<img src="/icons/compressed.gif" alt="[ ]"> <a href="bkm-art.ps.gz">bkm-art.ps.gz</a> 1995-01-23 11:12 110K
|
||||
<img src="/icons/folder.gif" alt="[DIR]"> <a href="etc/">etc/</a> 2009-06-30 22:43 -
|
||||
<img src="/icons/folder.gif" alt="[DIR]"> <a href="mac/">mac/</a> 2009-06-30 22:43 -
|
||||
<img src="/icons/folder.gif" alt="[DIR]"> <a href="mrf/">mrf/</a> 2009-06-30 22:43 -
|
||||
<img src="/icons/folder.gif" alt="[DIR]"> <a href="otf/">otf/</a> 2009-06-30 22:43 -
|
||||
<img src="/icons/folder.gif" alt="[DIR]"> <a href="patched-pfb/">patched-pfb/</a> 2009-06-30 22:43 -
|
||||
<img src="/icons/folder.gif" alt="[DIR]"> <a href="pfb/">pfb/</a> 2009-06-30 22:43 -
|
||||
<img src="/icons/folder.gif" alt="[DIR]"> <a href="pfm/">pfm/</a> 2009-06-30 22:43 -
|
||||
<img src="/icons/folder.gif" alt="[DIR]"> <a href="tfm/">tfm/</a> 2009-06-30 22:43 -
|
||||
<img src="/icons/folder.gif" alt="[DIR]"> <a href="ttf/">ttf/</a> 2009-06-30 22:43 -
|
||||
<hr></pre>
|
||||
</body></html>
|
||||
BIN
app/core/src/main/resources/type3/library/fonts/cm/cmr10.ttf
Normal file
BIN
app/core/src/main/resources/type3/library/fonts/cm/cmr10.ttf
Normal file
Binary file not shown.
BIN
app/core/src/main/resources/type3/library/fonts/cm/cmsy10.ttf
Normal file
BIN
app/core/src/main/resources/type3/library/fonts/cm/cmsy10.ttf
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
462
app/core/src/main/resources/type3/library/index.json
Normal file
462
app/core/src/main/resources/type3/library/index.json
Normal file
@ -0,0 +1,462 @@
|
||||
[
|
||||
{
|
||||
"id": "dejavu-sans-regular",
|
||||
"label": "DejaVu Sans",
|
||||
"aliases": [
|
||||
"DejaVuSans",
|
||||
"BMQQDV+DejaVuSans",
|
||||
"DejaVuSansDisplay",
|
||||
"dejavusans",
|
||||
"dejavusansdisplay"
|
||||
],
|
||||
"program": {
|
||||
"resource": "type3/library/fonts/dejavu/DejaVuSans.ttf",
|
||||
"format": "ttf"
|
||||
},
|
||||
"webProgram": {
|
||||
"resource": "type3/library/fonts/dejavu/DejaVuSans.ttf",
|
||||
"format": "ttf"
|
||||
},
|
||||
"pdfProgram": {
|
||||
"resource": "type3/library/fonts/dejavu/DejaVuSans.ttf",
|
||||
"format": "ttf"
|
||||
},
|
||||
"source": "DejaVu Fonts 2.37",
|
||||
"signatures": [
|
||||
"sha256:2be58b6ef1e29a83b8634d70b9e32c37a15dea2e608894439ef7224c35b77f5d",
|
||||
"sha256:994c963d70041eee141fd275fa22c525a71283de2b4a952814d02e0bbfa8caea",
|
||||
"sha256:93573cb1ab32b9cb09378298fb120de079f6a309908d2ee86f91392a6aba5c31",
|
||||
"sha256:4febfad91e0141f9658506a0bf8fc2a449f0ea7d97b44e95fc9a970c77af4b0a",
|
||||
"sha256:0386e5811612ba4b998d57cd3869d7fbc48092a79d436deda774af107a4af813",
|
||||
"sha256:b95fa2a272cbc950b81320790d04fcf19ebb24050fa2139ba6a474172cac596b",
|
||||
"sha256:d034d16ac69e3e1c5008e77c4c24dc3179308a2742601e89d5c8ab327e4040dd",
|
||||
"sha256:ae77c4eb2c49f72c616272f3d7ac624ddb0b4db1c77acbe6b9d13531f68e1d5d",
|
||||
"sha256:85e16e36ed0290c149647be7e468a7c46e7b66fd290131213040f7bad905aa44",
|
||||
"sha256:3654d4d9bcbbf6ad51628082203094069a17aad3a5e6f5c7972833566e42ab6b",
|
||||
"sha256:d0c6cddc416d130701395246621a0f669fc292df4097a7a74395602faf4475df",
|
||||
"sha256:cadf43a2df81340368af44c76b499223931d78dcc76c70cf4b4a93d133e368af",
|
||||
"sha256:f1a874c4268b1bffffc99acabbe0a60aa662611b4bac0e688e4fc0ae3f2033bb",
|
||||
"sha256:e3d87c113463c8642a4f22943064fd75c133ad31fe5efebf6de6abf211b74b5a",
|
||||
"sha256:d47afb7581e98f588f0e70953e8692249aaa2ec3df36fbd90985f27b1ce1cf50",
|
||||
"sha256:e47b8f112a875361a43bcb6d9c6467e0296412d29e417e58a0e60c90b664d281",
|
||||
"sha256:9c67df2ac5c3dcf957dfb0cd048fa450322a72b5a2dfb05f816c536b3b090607",
|
||||
"sha256:3ee773a0af6fdedb9853dca9f4d8b80a421a0024bdf06bea41f15d58e3b90c87",
|
||||
"sha256:4fa06c90399d80b41cb718163a5d78af2b203df6b6579246fb0b24d349b7a591",
|
||||
"sha256:ac6756c76d6e43b771cc1e643dfc7891dfaaac05aa5e302190d0a662838ab031",
|
||||
"sha256:bf7b95498f7d00d228c5c155db62b6c1aa7e0215cca1690c9fdb0adcfd496b10",
|
||||
"sha256:39b8e5ec8e20a788cd45166baf0ab796397f152c9cd8dec1f882c635380cad92",
|
||||
"sha256:27b98489865df8df55f19e4505c093501f236465885ca3bf5b66b6f047a85bb2",
|
||||
"sha256:497ddd27e1f56ef6504c61613e3a159bab13314a4970a3be13b3a556648964da",
|
||||
"sha256:3b41f9e5f3a7ffa6f4cdffa2a46f02781ec1b2b0c99994707cfb139aa15a11e2",
|
||||
"sha256:93723fe436a1aa654db56caf133f56280444b9dc0682af50b83787c3e49ee3ec",
|
||||
"sha256:a648cb0524465bcb3bf4a2f65e0761cfc5167b1871a7db9488bee11b56062727",
|
||||
"sha256:2f18ed7f982aeb954aaae388ba0c75e3c676717ca324156b42bb17f3f20ef403",
|
||||
"sha256:18ce863feb57f42f2b92ac85a8c55ef3eeaa15488c5d6cd8c724b085994c64fa",
|
||||
"sha256:a3eb7054e426aad7d1fac1f39ad6d3f886e34c04f780def5cf22b53cb3a45b46",
|
||||
"sha256:edd22119635bfb0f2bff750137c6c6400a7fae4ff80cc252d2e6f2ca88f599a7",
|
||||
"sha256:aae1797f3e3ff55d71b02590333aff86663d6bb4a5768bed7550e5987f40afe8",
|
||||
"sha256:0165552fad28860f2ea6079be7a87ea0833acde99309b3ef619c8f81707c46a3",
|
||||
"sha256:792a1c5aaa1743ab203a363a8f6cd07c3b043e33c72e97c4ea21f5862158e6c1",
|
||||
"sha256:f4bfd64f36bf33dea79800561a67f78d5ccdb436363574abf0892f58b376a2e6",
|
||||
"sha256:119da04d962622c8aa46d77f6bdfccb5d4a4ef7173775275b046efd59098e5d9",
|
||||
"sha256:003af1c45e3a5ab09544e226eba25e3a70abfe6e36dd48584474cc7a497685f6",
|
||||
"sha256:88b3471db1978cc83233f249453806a8369c766b089b424c86c2584196ed5dbf",
|
||||
"sha256:a15cc90b7fc110cef4f07fe8a692d572e1289a9ee29c95732294662fded4e042",
|
||||
"sha256:fb54c23aa081562ac114676ffe43032c9c0fb63af3e5b7b3441b88872d1f2e7a",
|
||||
"sha256:4b553d51d58f5891af071359fb016caf1c6137778da129a6b208dcc8cb0c4635",
|
||||
"sha256:b318f65b9dc209eb6f004e3a6c20a772ebbca3d752adc10c66a6a8a479da2838",
|
||||
"sha256:64f725573c1f5d90196e94ed338a7af06baf274420414befeb9693c80acd0f77",
|
||||
"sha256:9a701e082ba5a779e2b20b8de0c7844b3f7838ba8cd4bd7ef366893761fb994d",
|
||||
"sha256:2f6f8d63ff6235f3b7cd6f5eba8076854892037afa2ea6962953b3e7cda3736e",
|
||||
"sha256:f17b5eb0ee996d1388c548f79fa50fa2d8c6076959eff189bb745d156d54547f",
|
||||
"sha256:f22c75548364bb25fc3efbe11f05c56e29f07c15c3046ddbc85a64e5cc5a97bd",
|
||||
"sha256:54a6c2e4bc290b48e21eece7f81cb6633c4b53a91f198fdaabfc73743b0e4499",
|
||||
"sha256:059af9dbaaab27c1d660ef00de6d4fd6e1687cfe2abca0a4c07265c2b2b450c6",
|
||||
"sha256:6651550d7b913850087244b7a70961989c2efc6d8c8d060d8663ff087b7723f6",
|
||||
"sha256:4d4ee6f04f57a40a589741df4747990ed485c192b0fc179a415aba822f352a8d",
|
||||
"sha256:e808a8ecba94bf0190ab7218bb0702698125ee2e456e82e00da709e8188e2bf8",
|
||||
"sha256:b5064b202eb1dae41545eddf674ee23bd82176e76aac8eb749540c2689f2e3ec",
|
||||
"sha256:f8f14410ec170248916e19f9d09120cfd786c47906b7c3735781d24e944b094e",
|
||||
"sha256:c43134bebeaf8328ac299ba978d7e663e2dc4fe99463b9d7f72f72f77936204e",
|
||||
"sha256:4f763d5e2cd0bdcd4650936ac505bd0e011899712ffe80ffa4b4d43f42941327",
|
||||
"sha256:cb72de0c6105b9802d360c47a292a1f7bc344939a6801b879ea09dae4e45e863",
|
||||
"sha256:2add5b5ad6e536f3614b75e246b49a006edbbecdd309d24bd42c874a3ae3c8ed",
|
||||
"sha256:31d0e67bc63a816302c9ff6ad9c19e17603aef1a4c3677b81b1d9084caa86e03",
|
||||
"sha256:4b509d2ae2cfab89783a73df2c66f0fd50949f97696079cb58f1e58b81daaa07",
|
||||
"sha256:831f7012db360331ffb5a5de6a6d6e03ffaad29f48d81cabe9fc613b25aad818",
|
||||
"sha256:bf790625423c5ebdf94760eb796c847af885b930d3a30861509b07f1c77c3f60",
|
||||
"sha256:f7c3be2199c397a4c702dd434ac63fc9e046d749eff8cede4513fbc2774751b4",
|
||||
"sha256:8f7bf7a6382e8a762c5a84f19f84f0675f61eb1b34bd42562c0b3ac6712e29ef",
|
||||
"sha256:dfaf8075e13be0e51f72485f9d825cea9ad077eb2dd9d63b9922add67d7d2761",
|
||||
"sha256:853422e67ac88fe7ae28d5c459dc9f5a84f24e7840eeb2d82a00719032119326",
|
||||
"sha256:b42182c55ec4bd53ab0698bee5f92945921dbccb534fdb5c6b41f1782e1fe88e",
|
||||
"sha256:75466035ac34f2523215e599452e32d796d7d02bc7122ed3d02fe91ebe064c25"
|
||||
],
|
||||
"glyphCoverage": [
|
||||
32,
|
||||
33,
|
||||
37,
|
||||
39,
|
||||
40,
|
||||
41,
|
||||
43,
|
||||
44,
|
||||
45,
|
||||
46,
|
||||
48,
|
||||
49,
|
||||
50,
|
||||
51,
|
||||
52,
|
||||
53,
|
||||
54,
|
||||
55,
|
||||
56,
|
||||
57,
|
||||
58,
|
||||
61,
|
||||
65,
|
||||
66,
|
||||
67,
|
||||
68,
|
||||
69,
|
||||
70,
|
||||
71,
|
||||
72,
|
||||
73,
|
||||
76,
|
||||
77,
|
||||
78,
|
||||
79,
|
||||
80,
|
||||
82,
|
||||
83,
|
||||
84,
|
||||
85,
|
||||
87,
|
||||
88,
|
||||
89,
|
||||
91,
|
||||
93,
|
||||
95,
|
||||
97,
|
||||
98,
|
||||
99,
|
||||
100,
|
||||
101,
|
||||
102,
|
||||
103,
|
||||
104,
|
||||
105,
|
||||
106,
|
||||
107,
|
||||
108,
|
||||
109,
|
||||
110,
|
||||
111,
|
||||
112,
|
||||
113,
|
||||
114,
|
||||
115,
|
||||
116,
|
||||
117,
|
||||
118,
|
||||
119,
|
||||
120,
|
||||
121,
|
||||
122,
|
||||
215
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "dejavu-sans-bold",
|
||||
"label": "DejaVu Sans Bold",
|
||||
"aliases": [
|
||||
"DejaVuSans-Bold",
|
||||
"EVICAO+DejaVuSans-Bold",
|
||||
"dejavusans-bold"
|
||||
],
|
||||
"program": {
|
||||
"resource": "type3/library/fonts/dejavu/DejaVuSans-Bold.ttf",
|
||||
"format": "ttf"
|
||||
},
|
||||
"webProgram": {
|
||||
"resource": "type3/library/fonts/dejavu/DejaVuSans-Bold.ttf",
|
||||
"format": "ttf"
|
||||
},
|
||||
"pdfProgram": {
|
||||
"resource": "type3/library/fonts/dejavu/DejaVuSans-Bold.ttf",
|
||||
"format": "ttf"
|
||||
},
|
||||
"source": "DejaVu Fonts 2.37",
|
||||
"signatures": [
|
||||
"sha256:dc03917f2edd92a7a68a46ad36f65a908e4feb85e61cb37e9026205f3986574a",
|
||||
"sha256:c845063bef18f173afbfcb90fbf6773f43648c5f0666ecfa0132afe4e164068d"
|
||||
],
|
||||
"glyphCoverage": [
|
||||
32,
|
||||
65,
|
||||
83,
|
||||
87,
|
||||
97,
|
||||
100,
|
||||
101,
|
||||
103,
|
||||
105,
|
||||
110,
|
||||
116,
|
||||
118,
|
||||
119
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "dejavu-sans-oblique",
|
||||
"label": "DejaVu Sans Oblique",
|
||||
"aliases": [
|
||||
"DejaVuSans-Oblique",
|
||||
"dejavusans-oblique"
|
||||
],
|
||||
"program": {
|
||||
"resource": "type3/library/fonts/dejavu/DejaVuSans-Oblique.ttf",
|
||||
"format": "ttf"
|
||||
},
|
||||
"webProgram": {
|
||||
"resource": "type3/library/fonts/dejavu/DejaVuSans-Oblique.ttf",
|
||||
"format": "ttf"
|
||||
},
|
||||
"pdfProgram": {
|
||||
"resource": "type3/library/fonts/dejavu/DejaVuSans-Oblique.ttf",
|
||||
"format": "ttf"
|
||||
},
|
||||
"source": "DejaVu Fonts 2.37",
|
||||
"signatures": [
|
||||
"sha256:81cd2d4d9353ee02c7ed80c2892658072b2a8bbd9ed1832b474129dfbe35d5d8",
|
||||
"sha256:08864aa8e8d17cead6059d5b4f1b1eea2053fa0ea3ca64e885d6eaacb78bccaf"
|
||||
],
|
||||
"glyphCoverage": [
|
||||
70,
|
||||
71,
|
||||
85,
|
||||
87,
|
||||
100,
|
||||
101,
|
||||
103,
|
||||
109,
|
||||
112,
|
||||
114,
|
||||
116,
|
||||
118,
|
||||
120
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "dejavu-sans-mono",
|
||||
"label": "DejaVu Sans Mono",
|
||||
"aliases": [
|
||||
"DejaVuSansMono",
|
||||
"dejavusansmono"
|
||||
],
|
||||
"program": {
|
||||
"resource": "type3/library/fonts/dejavu/DejaVuSansMono.ttf",
|
||||
"format": "ttf"
|
||||
},
|
||||
"webProgram": {
|
||||
"resource": "type3/library/fonts/dejavu/DejaVuSansMono.ttf",
|
||||
"format": "ttf"
|
||||
},
|
||||
"pdfProgram": {
|
||||
"resource": "type3/library/fonts/dejavu/DejaVuSansMono.ttf",
|
||||
"format": "ttf"
|
||||
},
|
||||
"source": "DejaVu Fonts 2.37",
|
||||
"signatures": [
|
||||
"sha256:88758adf0b41a81204ed3ad63463f5d15c7c2f80e8942cee501d06fa7274dc4e",
|
||||
"sha256:74e60bcb2d7975b0c7b372aca9fc25f55c9018005425a741830e7c4370b8d593"
|
||||
],
|
||||
"glyphCoverage": [
|
||||
35,
|
||||
39,
|
||||
48,
|
||||
49,
|
||||
50,
|
||||
51,
|
||||
52,
|
||||
53,
|
||||
54,
|
||||
55,
|
||||
56,
|
||||
57,
|
||||
67,
|
||||
97,
|
||||
98,
|
||||
99,
|
||||
100,
|
||||
101,
|
||||
102,
|
||||
103,
|
||||
105,
|
||||
107,
|
||||
108,
|
||||
109,
|
||||
111,
|
||||
112,
|
||||
114,
|
||||
116,
|
||||
121
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "cmr10",
|
||||
"label": "Computer Modern Roman 10pt",
|
||||
"aliases": [
|
||||
"cmr10",
|
||||
"Cmr10"
|
||||
],
|
||||
"signatures": [
|
||||
"sha256:5b535a05c982fb8ff029dfbedd5e9d28c1c4379ebac259d207f65606a94e5b15"
|
||||
],
|
||||
"program": {
|
||||
"resource": "type3/library/fonts/cm/cmr10.ttf",
|
||||
"format": "ttf"
|
||||
},
|
||||
"webProgram": {
|
||||
"resource": "type3/library/fonts/cm/cmr10.ttf",
|
||||
"format": "ttf"
|
||||
},
|
||||
"pdfProgram": {
|
||||
"resource": "type3/library/fonts/cm/cmr10.ttf",
|
||||
"format": "ttf"
|
||||
},
|
||||
"glyphCoverage": [
|
||||
48,
|
||||
49,
|
||||
53
|
||||
],
|
||||
"source": "type3/samples/01_Matplotlib.pdf"
|
||||
},
|
||||
{
|
||||
"id": "cmmi10",
|
||||
"label": "Computer Modern Math Italic 10pt",
|
||||
"aliases": [
|
||||
"cmmi10",
|
||||
"Cmmi10"
|
||||
],
|
||||
"signatures": [
|
||||
"sha256:6c72170517812e39f970746f53a2ae08dafbbe7374c20bcb4d5a60adc49cb77b"
|
||||
],
|
||||
"program": {
|
||||
"resource": "type3/library/fonts/cm/cmmi10.ttf",
|
||||
"format": "ttf"
|
||||
},
|
||||
"webProgram": {
|
||||
"resource": "type3/library/fonts/cm/cmmi10.ttf",
|
||||
"format": "ttf"
|
||||
},
|
||||
"pdfProgram": {
|
||||
"resource": "type3/library/fonts/cm/cmmi10.ttf",
|
||||
"format": "ttf"
|
||||
},
|
||||
"glyphCoverage": [
|
||||
100,
|
||||
120
|
||||
],
|
||||
"source": "type3/samples/01_Matplotlib.pdf"
|
||||
},
|
||||
{
|
||||
"id": "cmsy10",
|
||||
"label": "Computer Modern Symbol 10pt",
|
||||
"aliases": [
|
||||
"cmsy10",
|
||||
"Cmsy10"
|
||||
],
|
||||
"signatures": [
|
||||
"sha256:1324cd8127143ef9023616b7911c570db3b1eb35758cdc9258ec16c0f4587775",
|
||||
"sha256:2832e219b2db3bacf0d5a147d4b74ad5226fdf7562c395ef3fb12937633e037d"
|
||||
],
|
||||
"program": {
|
||||
"resource": "type3/library/fonts/cm/cmsy10.ttf",
|
||||
"format": "ttf"
|
||||
},
|
||||
"webProgram": {
|
||||
"resource": "type3/library/fonts/cm/cmsy10.ttf",
|
||||
"format": "ttf"
|
||||
},
|
||||
"pdfProgram": {
|
||||
"resource": "type3/library/fonts/cm/cmsy10.ttf",
|
||||
"format": "ttf"
|
||||
},
|
||||
"glyphCoverage": [
|
||||
48,
|
||||
8734
|
||||
],
|
||||
"source": "type3/samples/01_Matplotlib.pdf"
|
||||
},
|
||||
{
|
||||
"id": "stix-size-three",
|
||||
"label": "STIX Size Three Symbols",
|
||||
"aliases": [
|
||||
"stixsizethreesym-regular",
|
||||
"STIXSizeThreeSym-Regular"
|
||||
],
|
||||
"signatures": [
|
||||
"sha256:33d0ab9d9d72c1aed1edfc9b815dd6a2d618cbbe9084309c7f2de0f3df3073d7"
|
||||
],
|
||||
"program": {
|
||||
"resource": "type3/library/fonts/stix/STIXSizeThreeSym-Regular.otf",
|
||||
"format": "otf"
|
||||
},
|
||||
"webProgram": {
|
||||
"resource": "type3/library/fonts/stix/STIXSizeThreeSym-Regular.otf",
|
||||
"format": "otf"
|
||||
},
|
||||
"pdfProgram": {
|
||||
"resource": "type3/library/fonts/stix/STIXSizeThreeSym-Regular.otf",
|
||||
"format": "otf"
|
||||
},
|
||||
"glyphCoverage": [
|
||||
91,
|
||||
93
|
||||
],
|
||||
"source": "type3/samples/01_Matplotlib.pdf"
|
||||
},
|
||||
{
|
||||
"id": "sourcecodepro-regular",
|
||||
"label": "Source Code Pro",
|
||||
"aliases": [
|
||||
"SourceCodePro-Regular",
|
||||
"sourcecodepro-regular",
|
||||
"NVMZUP+SourceCodePro-Regular"
|
||||
],
|
||||
"signatures": [
|
||||
"sha256:96ba693001b2ab224ad5b5a7464cecd4d33e68f30fb23f78a8473dbb031ce246",
|
||||
"sha256:72fca14e9e44fc41b0cdb1c6a088f0b07f882f9f04c51a0145f43cf8b285c5b6"
|
||||
],
|
||||
"program": {
|
||||
"resource": "type3/library/fonts/scp/SauceCodeProNerdFont-Regular.ttf",
|
||||
"format": "ttf"
|
||||
},
|
||||
"webProgram": {
|
||||
"resource": "type3/library/fonts/scp/SauceCodeProNerdFont-Regular.ttf",
|
||||
"format": "ttf"
|
||||
},
|
||||
"pdfProgram": {
|
||||
"resource": "type3/library/fonts/scp/SauceCodeProNerdFont-Regular.ttf",
|
||||
"format": "ttf"
|
||||
},
|
||||
"glyphCoverage": [
|
||||
46,
|
||||
48,
|
||||
49,
|
||||
50,
|
||||
51,
|
||||
52,
|
||||
53,
|
||||
54,
|
||||
55,
|
||||
56,
|
||||
57,
|
||||
67
|
||||
],
|
||||
"source": "type3/samples/04_handout-intermediate.pdf"
|
||||
}
|
||||
]
|
||||
@ -75,3 +75,11 @@ dependencies {
|
||||
}
|
||||
|
||||
tasks.register('prepareKotlinBuildScriptModel') {}
|
||||
|
||||
tasks.register('type3SignatureTool', JavaExec) {
|
||||
group = 'type3'
|
||||
description = 'Dump Type3 font signatures and glyph coverage for the Type3 library'
|
||||
classpath = sourceSets.main.runtimeClasspath
|
||||
mainClass = 'stirling.software.SPDF.service.pdfjson.type3.tool.Type3SignatureTool'
|
||||
standardInput = System.in
|
||||
}
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
package stirling.software.SPDF.model.json;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -56,6 +58,12 @@ public class PdfJsonFont {
|
||||
/** Format hint for the pdfProgram payload. */
|
||||
private String pdfProgramFormat;
|
||||
|
||||
/** Glyph metadata for Type3 fonts to enable precise text rewrites. */
|
||||
private List<PdfJsonFontType3Glyph> type3Glyphs;
|
||||
|
||||
/** Per-strategy synthesized font payloads for Type3 normalization. */
|
||||
private List<PdfJsonFontConversionCandidate> conversionCandidates;
|
||||
|
||||
/** ToUnicode stream encoded as Base64 when present. */
|
||||
private String toUnicode;
|
||||
|
||||
|
||||
@ -0,0 +1,69 @@
|
||||
package stirling.software.SPDF.model.json;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public class PdfJsonFontConversionCandidate {
|
||||
|
||||
/** Stable identifier for the strategy that produced this candidate. */
|
||||
private String strategyId;
|
||||
|
||||
/** Human-readable label for diagnostics and UI toggles. */
|
||||
private String strategyLabel;
|
||||
|
||||
/** Outcome of the conversion attempt. */
|
||||
private PdfJsonFontConversionStatus status;
|
||||
|
||||
/** Summary diagnostics or error details. */
|
||||
private String message;
|
||||
|
||||
/** Count of glyphs successfully synthesized. */
|
||||
private Integer synthesizedGlyphs;
|
||||
|
||||
/** Count of glyphs that could not be reproduced accurately. */
|
||||
private Integer missingGlyphs;
|
||||
|
||||
/** Approximate width delta (in glyph units) across the test sample. */
|
||||
private Double widthDelta;
|
||||
|
||||
/** Approximate bounding box delta (in glyph units). */
|
||||
private Double bboxDelta;
|
||||
|
||||
/** Base64-encoded font program (typically TTF/OTF) produced by the strategy. */
|
||||
private String program;
|
||||
|
||||
/** Format hint for {@link #program}. */
|
||||
private String programFormat;
|
||||
|
||||
/** Web-optimized payload (e.g. TTF) for browser preview. */
|
||||
private String webProgram;
|
||||
|
||||
/** Format for the web payload. */
|
||||
private String webProgramFormat;
|
||||
|
||||
/** PDF-friendly payload for re-embedding during export. */
|
||||
private String pdfProgram;
|
||||
|
||||
/** Format for the PDF payload. */
|
||||
private String pdfProgramFormat;
|
||||
|
||||
/** Optional PNG preview of rendered glyphs (Base64). */
|
||||
private String previewImage;
|
||||
|
||||
/** Additional structured diagnostics (JSON string). */
|
||||
private String diagnostics;
|
||||
|
||||
/** Known unicode/codepoint coverage derived from the conversion strategy. */
|
||||
private List<Integer> glyphCoverage;
|
||||
}
|
||||
@ -0,0 +1,9 @@
|
||||
package stirling.software.SPDF.model.json;
|
||||
|
||||
public enum PdfJsonFontConversionStatus {
|
||||
SUCCESS,
|
||||
WARNING,
|
||||
FAILURE,
|
||||
SKIPPED,
|
||||
UNSUPPORTED
|
||||
}
|
||||
@ -0,0 +1,27 @@
|
||||
package stirling.software.SPDF.model.json;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@Builder
|
||||
@NoArgsConstructor
|
||||
@AllArgsConstructor
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public class PdfJsonFontType3Glyph {
|
||||
/** Character code used in the content stream to reference this glyph. */
|
||||
private Integer charCode;
|
||||
|
||||
/** PostScript glyph name, when available. */
|
||||
private String glyphName;
|
||||
|
||||
/** Unicode code point represented by this glyph, if it can be resolved. */
|
||||
private Integer unicode;
|
||||
|
||||
/** Raw char code used in the Type3 font encoding (0-255). */
|
||||
private Integer charCodeRaw;
|
||||
}
|
||||
@ -37,4 +37,5 @@ public class PdfJsonTextElement {
|
||||
private PdfJsonTextColor strokeColor;
|
||||
private Integer renderingMode;
|
||||
private Boolean fallbackUsed;
|
||||
private List<Integer> charCodes;
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -11,6 +11,7 @@ import java.util.concurrent.ConcurrentHashMap;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.font.PDFont;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType0Font;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType3Font;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.core.io.Resource;
|
||||
import org.springframework.core.io.ResourceLoader;
|
||||
@ -122,10 +123,19 @@ public class PdfJsonFallbackFontService {
|
||||
if (font == null || text == null || text.isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
if (font instanceof PDType3Font) {
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
font.encode(text);
|
||||
return true;
|
||||
} catch (IOException | IllegalArgumentException ex) {
|
||||
} catch (IOException | IllegalArgumentException | UnsupportedOperationException ex) {
|
||||
log.info(
|
||||
"[FONT-DEBUG] Font {} cannot encode text '{}' ({}): {}",
|
||||
font != null ? font.getName() : "null",
|
||||
text,
|
||||
font != null ? font.getClass().getSimpleName() : "null",
|
||||
ex.getMessage());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@ -0,0 +1,17 @@
|
||||
package stirling.software.SPDF.service.pdfjson.type3;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType3Font;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
|
||||
@Getter
|
||||
@Builder
|
||||
public class Type3ConversionRequest {
|
||||
private final PDDocument document;
|
||||
private final PDType3Font font;
|
||||
private final String fontId;
|
||||
private final int pageNumber;
|
||||
private final String fontUid;
|
||||
}
|
||||
@ -0,0 +1,32 @@
|
||||
package stirling.software.SPDF.service.pdfjson.type3;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import stirling.software.SPDF.model.json.PdfJsonFontConversionCandidate;
|
||||
|
||||
public interface Type3ConversionStrategy {
|
||||
|
||||
/** Unique identifier used when reporting results. */
|
||||
String getId();
|
||||
|
||||
/** Human-readable label for UI toggles or logs. */
|
||||
String getLabel();
|
||||
|
||||
/** True when the underlying tooling is usable on this host. */
|
||||
boolean isAvailable();
|
||||
|
||||
/** Quick predicate to avoid running on unsupported Type3 shapes. */
|
||||
default boolean supports(Type3ConversionRequest request, Type3GlyphContext context)
|
||||
throws IOException {
|
||||
return request != null && request.getFont() != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to synthesise a font program for the supplied Type3 font.
|
||||
*
|
||||
* @param request contextual information for the conversion attempt
|
||||
* @return a candidate describing the outcome, never {@code null}
|
||||
*/
|
||||
PdfJsonFontConversionCandidate convert(
|
||||
Type3ConversionRequest request, Type3GlyphContext context) throws IOException;
|
||||
}
|
||||
@ -0,0 +1,133 @@
|
||||
package stirling.software.SPDF.service.pdfjson.type3;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import stirling.software.SPDF.model.json.PdfJsonFontConversionCandidate;
|
||||
import stirling.software.SPDF.model.json.PdfJsonFontConversionStatus;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class Type3FontConversionService {
|
||||
|
||||
private final List<Type3ConversionStrategy> strategies;
|
||||
private final Type3GlyphExtractor glyphExtractor;
|
||||
|
||||
public List<PdfJsonFontConversionCandidate> synthesize(Type3ConversionRequest request) {
|
||||
if (request == null || request.getFont() == null) {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
if (strategies == null || strategies.isEmpty()) {
|
||||
log.debug(
|
||||
"[TYPE3] No conversion strategies registered for font {}", request.getFontId());
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
List<PdfJsonFontConversionCandidate> candidates = new ArrayList<>();
|
||||
Type3GlyphContext glyphContext = null;
|
||||
for (Type3ConversionStrategy strategy : strategies) {
|
||||
if (strategy == null) {
|
||||
continue;
|
||||
}
|
||||
PdfJsonFontConversionCandidate candidate =
|
||||
runStrategy(
|
||||
strategy,
|
||||
request,
|
||||
glyphContext == null
|
||||
? (glyphContext =
|
||||
new Type3GlyphContext(request, glyphExtractor))
|
||||
: glyphContext);
|
||||
if (candidate != null) {
|
||||
candidates.add(candidate);
|
||||
}
|
||||
}
|
||||
return candidates;
|
||||
}
|
||||
|
||||
private PdfJsonFontConversionCandidate runStrategy(
|
||||
Type3ConversionStrategy strategy,
|
||||
Type3ConversionRequest request,
|
||||
Type3GlyphContext glyphContext) {
|
||||
if (!strategy.isAvailable()) {
|
||||
return PdfJsonFontConversionCandidate.builder()
|
||||
.strategyId(strategy.getId())
|
||||
.strategyLabel(strategy.getLabel())
|
||||
.status(PdfJsonFontConversionStatus.SKIPPED)
|
||||
.message("Strategy unavailable on current host")
|
||||
.build();
|
||||
}
|
||||
try {
|
||||
if (!strategy.supports(request, glyphContext)) {
|
||||
return PdfJsonFontConversionCandidate.builder()
|
||||
.strategyId(strategy.getId())
|
||||
.strategyLabel(strategy.getLabel())
|
||||
.status(PdfJsonFontConversionStatus.UNSUPPORTED)
|
||||
.message("Font not supported by strategy")
|
||||
.build();
|
||||
}
|
||||
} catch (IOException supportCheckException) {
|
||||
log.warn(
|
||||
"[TYPE3] Strategy {} support check failed for font {}: {}",
|
||||
strategy.getId(),
|
||||
request.getFontUid(),
|
||||
supportCheckException.getMessage(),
|
||||
supportCheckException);
|
||||
return PdfJsonFontConversionCandidate.builder()
|
||||
.strategyId(strategy.getId())
|
||||
.strategyLabel(strategy.getLabel())
|
||||
.status(PdfJsonFontConversionStatus.UNSUPPORTED)
|
||||
.message("Support check failed: " + supportCheckException.getMessage())
|
||||
.build();
|
||||
}
|
||||
|
||||
try {
|
||||
PdfJsonFontConversionCandidate result = strategy.convert(request, glyphContext);
|
||||
if (result == null) {
|
||||
log.info(
|
||||
"[TYPE3] Strategy {} returned null result for font {}",
|
||||
strategy.getId(),
|
||||
request.getFontUid());
|
||||
return PdfJsonFontConversionCandidate.builder()
|
||||
.strategyId(strategy.getId())
|
||||
.strategyLabel(strategy.getLabel())
|
||||
.status(PdfJsonFontConversionStatus.FAILURE)
|
||||
.message("Strategy returned null result")
|
||||
.build();
|
||||
}
|
||||
if (result.getStrategyId() == null) {
|
||||
result.setStrategyId(strategy.getId());
|
||||
}
|
||||
if (result.getStrategyLabel() == null) {
|
||||
result.setStrategyLabel(strategy.getLabel());
|
||||
}
|
||||
log.info(
|
||||
"[TYPE3] Strategy {} finished with status {} (message: {}) for font {}",
|
||||
strategy.getId(),
|
||||
result.getStatus(),
|
||||
result.getMessage(),
|
||||
request.getFontUid());
|
||||
return result;
|
||||
} catch (IOException ex) {
|
||||
log.warn(
|
||||
"[TYPE3] Strategy {} failed for font {}: {}",
|
||||
strategy.getId(),
|
||||
request.getFontUid(),
|
||||
ex.getMessage(),
|
||||
ex);
|
||||
return PdfJsonFontConversionCandidate.builder()
|
||||
.strategyId(strategy.getId())
|
||||
.strategyLabel(strategy.getLabel())
|
||||
.status(PdfJsonFontConversionStatus.FAILURE)
|
||||
.message(ex.getMessage())
|
||||
.build();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,218 @@
|
||||
package stirling.software.SPDF.service.pdfjson.type3;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.pdfbox.cos.COSArray;
|
||||
import org.apache.pdfbox.cos.COSBase;
|
||||
import org.apache.pdfbox.cos.COSDictionary;
|
||||
import org.apache.pdfbox.cos.COSName;
|
||||
import org.apache.pdfbox.cos.COSNumber;
|
||||
import org.apache.pdfbox.cos.COSStream;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType3CharProc;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType3Font;
|
||||
import org.apache.pdfbox.pdmodel.font.encoding.Encoding;
|
||||
import org.apache.pdfbox.util.Matrix;
|
||||
|
||||
/**
|
||||
* Computes a reproducible hash for Type3 fonts so we can match them against a pre-built library of
|
||||
* converted programs. The signature intentionally combines multiple aspects of the font (encoding,
|
||||
* CharProc streams, glyph widths, font metrics) to minimise collisions between unrelated fonts that
|
||||
* coincidentally share glyph names.
|
||||
*/
|
||||
public final class Type3FontSignatureCalculator {
|
||||
|
||||
private Type3FontSignatureCalculator() {}
|
||||
|
||||
public static String computeSignature(PDType3Font font) throws IOException {
|
||||
if (font == null) {
|
||||
return null;
|
||||
}
|
||||
MessageDigest digest = newDigest();
|
||||
updateMatrix(digest, font.getFontMatrix());
|
||||
updateRectangle(digest, font.getFontBBox());
|
||||
updateEncoding(digest, font.getEncoding());
|
||||
updateCharProcs(digest, font);
|
||||
byte[] hash = digest.digest();
|
||||
return "sha256:" + toHex(hash);
|
||||
}
|
||||
|
||||
private static void updateEncoding(MessageDigest digest, Encoding encoding) {
|
||||
if (encoding == null) {
|
||||
updateInt(digest, -1);
|
||||
return;
|
||||
}
|
||||
for (int code = 0; code <= 0xFF; code++) {
|
||||
String name = encoding.getName(code);
|
||||
if (name != null) {
|
||||
updateInt(digest, code);
|
||||
updateString(digest, name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void updateCharProcs(MessageDigest digest, PDType3Font font) throws IOException {
|
||||
COSDictionary charProcs =
|
||||
(COSDictionary) font.getCOSObject().getDictionaryObject(COSName.CHAR_PROCS);
|
||||
if (charProcs == null || charProcs.size() == 0) {
|
||||
updateInt(digest, 0);
|
||||
return;
|
||||
}
|
||||
List<COSName> glyphNames = new ArrayList<>(charProcs.keySet());
|
||||
glyphNames.sort(Comparator.comparing(COSName::getName, String.CASE_INSENSITIVE_ORDER));
|
||||
for (COSName glyphName : glyphNames) {
|
||||
updateString(digest, glyphName.getName());
|
||||
int code = resolveCharCode(font, glyphName.getName());
|
||||
updateInt(digest, code);
|
||||
if (code >= 0) {
|
||||
try {
|
||||
updateFloat(digest, font.getWidthFromFont(code));
|
||||
} catch (IOException ignored) {
|
||||
updateFloat(digest, 0f);
|
||||
}
|
||||
} else {
|
||||
updateFloat(digest, 0f);
|
||||
}
|
||||
|
||||
COSStream stream =
|
||||
charProcs.getDictionaryObject(glyphName) instanceof COSStream cosStream
|
||||
? cosStream
|
||||
: null;
|
||||
if (stream != null) {
|
||||
byte[] payload = readAllBytes(stream);
|
||||
updateInt(digest, payload.length);
|
||||
digest.update(payload);
|
||||
PDType3CharProc charProc = new PDType3CharProc(font, stream);
|
||||
updateRectangle(digest, extractGlyphBoundingBox(font, charProc));
|
||||
} else {
|
||||
updateInt(digest, -1);
|
||||
}
|
||||
}
|
||||
updateInt(digest, glyphNames.size());
|
||||
}
|
||||
|
||||
private static byte[] readAllBytes(COSStream stream) throws IOException {
|
||||
try (InputStream inputStream = stream.createInputStream()) {
|
||||
return inputStream.readAllBytes();
|
||||
}
|
||||
}
|
||||
|
||||
private static COSArray extractGlyphBoundingBox(PDType3Font font, PDType3CharProc charProc) {
|
||||
if (charProc == null) {
|
||||
return null;
|
||||
}
|
||||
COSStream stream = charProc.getCOSObject();
|
||||
if (stream != null) {
|
||||
COSArray bboxArray = (COSArray) stream.getDictionaryObject(COSName.BBOX);
|
||||
if (bboxArray != null && bboxArray.size() == 4) {
|
||||
return bboxArray;
|
||||
}
|
||||
}
|
||||
return font.getCOSObject().getCOSArray(COSName.BBOX);
|
||||
}
|
||||
|
||||
private static int resolveCharCode(PDType3Font font, String glyphName) {
|
||||
if (glyphName == null || font.getEncoding() == null) {
|
||||
return -1;
|
||||
}
|
||||
Encoding encoding = font.getEncoding();
|
||||
for (int code = 0; code <= 0xFF; code++) {
|
||||
String name = encoding.getName(code);
|
||||
if (glyphName.equals(name)) {
|
||||
return code;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
private static void updateMatrix(MessageDigest digest, Matrix matrix) {
|
||||
if (matrix == null) {
|
||||
updateInt(digest, -1);
|
||||
return;
|
||||
}
|
||||
float[][] values = matrix.getValues();
|
||||
updateInt(digest, values.length);
|
||||
for (float[] row : values) {
|
||||
if (row == null) {
|
||||
updateInt(digest, -1);
|
||||
continue;
|
||||
}
|
||||
updateInt(digest, row.length);
|
||||
for (float value : row) {
|
||||
updateFloat(digest, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void updateRectangle(MessageDigest digest, PDRectangle rectangle) {
|
||||
if (rectangle == null) {
|
||||
updateInt(digest, -1);
|
||||
return;
|
||||
}
|
||||
updateFloat(digest, rectangle.getLowerLeftX());
|
||||
updateFloat(digest, rectangle.getLowerLeftY());
|
||||
updateFloat(digest, rectangle.getUpperRightX());
|
||||
updateFloat(digest, rectangle.getUpperRightY());
|
||||
}
|
||||
|
||||
private static void updateRectangle(MessageDigest digest, COSArray array) {
|
||||
if (array == null) {
|
||||
updateInt(digest, -1);
|
||||
return;
|
||||
}
|
||||
updateInt(digest, array.size());
|
||||
for (int i = 0; i < array.size(); i++) {
|
||||
COSBase value = array.getObject(i);
|
||||
if (value instanceof COSNumber number) {
|
||||
updateFloat(digest, number.floatValue());
|
||||
} else {
|
||||
updateFloat(digest, 0f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void updateString(MessageDigest digest, String value) {
|
||||
if (value == null) {
|
||||
updateInt(digest, -1);
|
||||
return;
|
||||
}
|
||||
byte[] bytes = value.getBytes(java.nio.charset.StandardCharsets.UTF_8);
|
||||
updateInt(digest, bytes.length);
|
||||
digest.update(bytes);
|
||||
}
|
||||
|
||||
private static void updateInt(MessageDigest digest, int value) {
|
||||
digest.update(ByteBuffer.allocate(Integer.BYTES).putInt(value).array());
|
||||
}
|
||||
|
||||
private static void updateFloat(MessageDigest digest, float value) {
|
||||
if (Float.isNaN(value) || Float.isInfinite(value)) {
|
||||
value = 0f;
|
||||
}
|
||||
digest.update(ByteBuffer.allocate(Float.BYTES).putFloat(value).array());
|
||||
}
|
||||
|
||||
private static MessageDigest newDigest() {
|
||||
try {
|
||||
return MessageDigest.getInstance("SHA-256");
|
||||
} catch (NoSuchAlgorithmException ex) {
|
||||
throw new IllegalStateException("Missing SHA-256 MessageDigest", ex);
|
||||
}
|
||||
}
|
||||
|
||||
private static String toHex(byte[] bytes) {
|
||||
StringBuilder builder = new StringBuilder(bytes.length * 2);
|
||||
for (byte value : bytes) {
|
||||
builder.append(String.format(Locale.ROOT, "%02x", Byte.toUnsignedInt(value)));
|
||||
}
|
||||
return builder.toString();
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,38 @@
|
||||
package stirling.software.SPDF.service.pdfjson.type3;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.font.PDType3Font;
|
||||
|
||||
import stirling.software.SPDF.service.pdfjson.type3.model.Type3GlyphOutline;
|
||||
|
||||
class Type3GlyphContext {
|
||||
private final Type3ConversionRequest request;
|
||||
private final Type3GlyphExtractor extractor;
|
||||
private final AtomicReference<List<Type3GlyphOutline>> glyphs = new AtomicReference<>();
|
||||
|
||||
Type3GlyphContext(Type3ConversionRequest request, Type3GlyphExtractor extractor) {
|
||||
this.request = request;
|
||||
this.extractor = extractor;
|
||||
}
|
||||
|
||||
public List<Type3GlyphOutline> getGlyphs() throws IOException {
|
||||
List<Type3GlyphOutline> cached = glyphs.get();
|
||||
if (cached == null) {
|
||||
cached =
|
||||
extractor.extractGlyphs(
|
||||
request.getDocument(),
|
||||
request.getFont(),
|
||||
request.getFontId(),
|
||||
request.getPageNumber());
|
||||
glyphs.compareAndSet(null, cached);
|
||||
}
|
||||
return cached;
|
||||
}
|
||||
|
||||
public PDType3Font getFont() {
|
||||
return request.getFont();
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,126 @@
|
||||
package stirling.software.SPDF.service.pdfjson.type3;
|
||||
|
||||
import java.awt.geom.GeneralPath;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.pdfbox.cos.COSArray;
|
||||
import org.apache.pdfbox.cos.COSDictionary;
|
||||
import org.apache.pdfbox.cos.COSName;
|
||||
import org.apache.pdfbox.cos.COSStream;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType3CharProc;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType3Font;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import stirling.software.SPDF.service.pdfjson.type3.model.Type3GlyphOutline;
|
||||
|
||||
@Slf4j
|
||||
@Component
|
||||
public class Type3GlyphExtractor {
|
||||
|
||||
public List<Type3GlyphOutline> extractGlyphs(
|
||||
PDDocument document, PDType3Font font, String fontId, int pageNumber)
|
||||
throws IOException {
|
||||
Objects.requireNonNull(font, "font");
|
||||
COSDictionary charProcs =
|
||||
(COSDictionary) font.getCOSObject().getDictionaryObject(COSName.CHAR_PROCS);
|
||||
if (charProcs == null || charProcs.size() == 0) {
|
||||
return List.of();
|
||||
}
|
||||
List<Type3GlyphOutline> outlines = new ArrayList<>();
|
||||
for (COSName glyphName : charProcs.keySet()) {
|
||||
COSStream stream =
|
||||
charProcs.getDictionaryObject(glyphName) instanceof COSStream cosStream
|
||||
? cosStream
|
||||
: null;
|
||||
if (stream == null) {
|
||||
continue;
|
||||
}
|
||||
PDType3CharProc charProc = new PDType3CharProc(font, stream);
|
||||
outlines.add(analyseGlyph(document, font, glyphName, charProc, fontId, pageNumber));
|
||||
}
|
||||
return outlines;
|
||||
}
|
||||
|
||||
private Type3GlyphOutline analyseGlyph(
|
||||
PDDocument document,
|
||||
PDType3Font font,
|
||||
COSName glyphName,
|
||||
PDType3CharProc charProc,
|
||||
String fontId,
|
||||
int pageNumber)
|
||||
throws IOException {
|
||||
int code = resolveCharCode(font, glyphName.getName());
|
||||
float advanceWidth = 0f;
|
||||
if (code >= 0) {
|
||||
advanceWidth = font.getWidthFromFont(code);
|
||||
}
|
||||
|
||||
PDRectangle glyphBBox = extractGlyphBoundingBox(font, charProc);
|
||||
PDRectangle bbox = font.getFontBBox();
|
||||
GlyphGraphicsExtractor extractor =
|
||||
new GlyphGraphicsExtractor(new PDPage(bbox != null ? bbox : new PDRectangle()));
|
||||
extractor.process(charProc);
|
||||
GeneralPath outline = extractor.getAccumulatedPath();
|
||||
Integer unicodeValue = null;
|
||||
if (code >= 0) {
|
||||
String unicode = font.toUnicode(code);
|
||||
if (unicode != null && !unicode.isEmpty()) {
|
||||
unicodeValue = unicode.codePointAt(0);
|
||||
} else {
|
||||
unicodeValue = code;
|
||||
}
|
||||
}
|
||||
return Type3GlyphOutline.builder()
|
||||
.glyphName(glyphName.getName())
|
||||
.charCode(code)
|
||||
.advanceWidth(advanceWidth)
|
||||
.boundingBox(glyphBBox)
|
||||
.outline(outline)
|
||||
.hasFill(extractor.isSawFill())
|
||||
.hasStroke(extractor.isSawStroke())
|
||||
.hasImages(extractor.isSawImage())
|
||||
.hasText(extractor.isSawText())
|
||||
.hasShading(extractor.isSawShading())
|
||||
.warnings(extractor.getWarnings())
|
||||
.unicode(unicodeValue)
|
||||
.build();
|
||||
}
|
||||
|
||||
private PDRectangle extractGlyphBoundingBox(PDType3Font font, PDType3CharProc charProc) {
|
||||
COSStream stream = charProc != null ? charProc.getCOSObject() : null;
|
||||
if (stream != null) {
|
||||
COSArray bboxArray = (COSArray) stream.getDictionaryObject(COSName.BBOX);
|
||||
if (bboxArray != null && bboxArray.size() == 4) {
|
||||
return new PDRectangle(bboxArray);
|
||||
}
|
||||
}
|
||||
return font.getFontBBox();
|
||||
}
|
||||
|
||||
private int resolveCharCode(PDType3Font font, String glyphName) {
|
||||
if (glyphName == null || font.getEncoding() == null) {
|
||||
return -1;
|
||||
}
|
||||
for (int code = 0; code <= 0xFF; code++) {
|
||||
String name = font.getEncoding().getName(code);
|
||||
if (glyphName.equals(name)) {
|
||||
return code;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
private static final class GlyphGraphicsExtractor extends Type3GraphicsEngine {
|
||||
GlyphGraphicsExtractor(PDPage page) {
|
||||
super(page);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,164 @@
|
||||
package stirling.software.SPDF.service.pdfjson.type3;
|
||||
|
||||
import java.awt.geom.GeneralPath;
|
||||
import java.awt.geom.Point2D;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.pdfbox.contentstream.PDFGraphicsStreamEngine;
|
||||
import org.apache.pdfbox.contentstream.operator.Operator;
|
||||
import org.apache.pdfbox.cos.COSName;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.font.PDFont;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType3CharProc;
|
||||
import org.apache.pdfbox.pdmodel.graphics.image.PDImage;
|
||||
import org.apache.pdfbox.util.Matrix;
|
||||
import org.apache.pdfbox.util.Vector;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
@Slf4j
|
||||
class Type3GraphicsEngine extends PDFGraphicsStreamEngine {
|
||||
|
||||
private final GeneralPath accumulatedPath = new GeneralPath();
|
||||
private final GeneralPath linePath = new GeneralPath();
|
||||
private final Point2D.Float currentPoint = new Point2D.Float();
|
||||
private boolean hasCurrentPoint;
|
||||
@Getter private boolean sawStroke;
|
||||
@Getter private boolean sawFill;
|
||||
@Getter private boolean sawImage;
|
||||
@Getter private boolean sawText;
|
||||
@Getter private boolean sawShading;
|
||||
@Getter private String warnings;
|
||||
|
||||
protected Type3GraphicsEngine(PDPage page) {
|
||||
super(page);
|
||||
}
|
||||
|
||||
public GeneralPath getAccumulatedPath() {
|
||||
return (GeneralPath) accumulatedPath.clone();
|
||||
}
|
||||
|
||||
public void process(PDType3CharProc charProc) throws IOException {
|
||||
accumulatedPath.reset();
|
||||
linePath.reset();
|
||||
sawStroke = false;
|
||||
sawFill = false;
|
||||
sawImage = false;
|
||||
sawText = false;
|
||||
sawShading = false;
|
||||
warnings = null;
|
||||
if (charProc != null) {
|
||||
processChildStream(charProc, getPage());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void appendRectangle(Point2D p0, Point2D p1, Point2D p2, Point2D p3) throws IOException {
|
||||
moveTo((float) p0.getX(), (float) p0.getY());
|
||||
lineTo((float) p1.getX(), (float) p1.getY());
|
||||
lineTo((float) p2.getX(), (float) p2.getY());
|
||||
lineTo((float) p3.getX(), (float) p3.getY());
|
||||
closePath();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void drawImage(PDImage pdImage) throws IOException {
|
||||
sawImage = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void shadingFill(COSName shadingName) throws IOException {
|
||||
sawShading = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void strokePath() throws IOException {
|
||||
accumulatedPath.append(linePath, false);
|
||||
linePath.reset();
|
||||
sawStroke = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fillPath(int windingRule) throws IOException {
|
||||
linePath.setWindingRule(
|
||||
windingRule == 0 ? GeneralPath.WIND_EVEN_ODD : GeneralPath.WIND_NON_ZERO);
|
||||
accumulatedPath.append(linePath, false);
|
||||
linePath.reset();
|
||||
sawFill = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void fillAndStrokePath(int windingRule) throws IOException {
|
||||
fillPath(windingRule);
|
||||
sawStroke = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clip(int windingRule) throws IOException {
|
||||
// ignore
|
||||
}
|
||||
|
||||
@Override
|
||||
public void moveTo(float x, float y) throws IOException {
|
||||
linePath.moveTo(x, y);
|
||||
currentPoint.setLocation(x, y);
|
||||
hasCurrentPoint = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void lineTo(float x, float y) throws IOException {
|
||||
linePath.lineTo(x, y);
|
||||
currentPoint.setLocation(x, y);
|
||||
hasCurrentPoint = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void curveTo(float x1, float y1, float x2, float y2, float x3, float y3)
|
||||
throws IOException {
|
||||
linePath.curveTo(x1, y1, x2, y2, x3, y3);
|
||||
currentPoint.setLocation(x3, y3);
|
||||
hasCurrentPoint = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Point2D getCurrentPoint() throws IOException {
|
||||
return hasCurrentPoint ? (Point2D) currentPoint.clone() : null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void closePath() throws IOException {
|
||||
linePath.closePath();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void endPath() throws IOException {
|
||||
linePath.reset();
|
||||
hasCurrentPoint = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void showText(byte[] string) throws IOException {
|
||||
sawText = true;
|
||||
super.showText(string);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void showFontGlyph(
|
||||
Matrix textRenderingMatrix, PDFont font, int code, Vector displacement)
|
||||
throws IOException {
|
||||
sawText = true;
|
||||
super.showFontGlyph(textRenderingMatrix, font, code, displacement);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void processOperator(
|
||||
Operator operator, java.util.List<org.apache.pdfbox.cos.COSBase> operands)
|
||||
throws IOException {
|
||||
if ("cm".equals(operator.getName())) {
|
||||
warnings =
|
||||
warnings == null ? "Encountered CTM concatenation" : warnings + "; CTM concat";
|
||||
}
|
||||
super.processOperator(operator, operands);
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,113 @@
|
||||
package stirling.software.SPDF.service.pdfjson.type3;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.core.annotation.Order;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import stirling.software.SPDF.model.json.PdfJsonFontConversionCandidate;
|
||||
import stirling.software.SPDF.model.json.PdfJsonFontConversionStatus;
|
||||
import stirling.software.SPDF.service.pdfjson.type3.library.Type3FontLibrary;
|
||||
import stirling.software.SPDF.service.pdfjson.type3.library.Type3FontLibraryEntry;
|
||||
import stirling.software.SPDF.service.pdfjson.type3.library.Type3FontLibraryMatch;
|
||||
import stirling.software.SPDF.service.pdfjson.type3.library.Type3FontLibraryPayload;
|
||||
|
||||
@Slf4j
|
||||
@Component
|
||||
@Order(0)
|
||||
@RequiredArgsConstructor
|
||||
public class Type3LibraryStrategy implements Type3ConversionStrategy {
|
||||
|
||||
private final Type3FontLibrary fontLibrary;
|
||||
|
||||
@Value("${stirling.pdf.json.type3.library.enabled:true}")
|
||||
private boolean enabled;
|
||||
|
||||
@Override
|
||||
public String getId() {
|
||||
return "type3-library";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getLabel() {
|
||||
return "Type3 Font Library";
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isAvailable() {
|
||||
return enabled && fontLibrary != null && fontLibrary.isLoaded();
|
||||
}
|
||||
|
||||
@Override
|
||||
public PdfJsonFontConversionCandidate convert(
|
||||
Type3ConversionRequest request, Type3GlyphContext context) throws IOException {
|
||||
if (request == null || request.getFont() == null) {
|
||||
return PdfJsonFontConversionCandidate.builder()
|
||||
.strategyId(getId())
|
||||
.strategyLabel(getLabel())
|
||||
.status(PdfJsonFontConversionStatus.FAILURE)
|
||||
.message("No font supplied")
|
||||
.build();
|
||||
}
|
||||
if (!isAvailable()) {
|
||||
return PdfJsonFontConversionCandidate.builder()
|
||||
.strategyId(getId())
|
||||
.strategyLabel(getLabel())
|
||||
.status(PdfJsonFontConversionStatus.SKIPPED)
|
||||
.message("Library disabled")
|
||||
.build();
|
||||
}
|
||||
|
||||
Type3FontLibraryMatch match = fontLibrary.match(request.getFont(), request.getFontUid());
|
||||
if (match == null || match.getEntry() == null) {
|
||||
return PdfJsonFontConversionCandidate.builder()
|
||||
.strategyId(getId())
|
||||
.strategyLabel(getLabel())
|
||||
.status(PdfJsonFontConversionStatus.UNSUPPORTED)
|
||||
.message("No library entry found")
|
||||
.build();
|
||||
}
|
||||
|
||||
Type3FontLibraryEntry entry = match.getEntry();
|
||||
if (!entry.hasAnyPayload()) {
|
||||
return PdfJsonFontConversionCandidate.builder()
|
||||
.strategyId(getId())
|
||||
.strategyLabel(getLabel())
|
||||
.status(PdfJsonFontConversionStatus.FAILURE)
|
||||
.message("Library entry has no payloads")
|
||||
.build();
|
||||
}
|
||||
|
||||
String message =
|
||||
String.format(
|
||||
"Matched %s via %s",
|
||||
entry.getLabel(),
|
||||
match.getMatchType() != null ? match.getMatchType() : "alias");
|
||||
|
||||
return PdfJsonFontConversionCandidate.builder()
|
||||
.strategyId(getId())
|
||||
.strategyLabel(getLabel())
|
||||
.status(PdfJsonFontConversionStatus.SUCCESS)
|
||||
.program(toBase64(entry.getProgram()))
|
||||
.programFormat(toFormat(entry.getProgram()))
|
||||
.webProgram(toBase64(entry.getWebProgram()))
|
||||
.webProgramFormat(toFormat(entry.getWebProgram()))
|
||||
.pdfProgram(toBase64(entry.getPdfProgram()))
|
||||
.pdfProgramFormat(toFormat(entry.getPdfProgram()))
|
||||
.glyphCoverage(entry.getGlyphCoverage())
|
||||
.message(message)
|
||||
.build();
|
||||
}
|
||||
|
||||
private String toBase64(Type3FontLibraryPayload payload) {
|
||||
return payload != null ? payload.getBase64() : null;
|
||||
}
|
||||
|
||||
private String toFormat(Type3FontLibraryPayload payload) {
|
||||
return payload != null ? payload.getFormat() : null;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,299 @@
|
||||
package stirling.software.SPDF.service.pdfjson.type3.library;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Base64;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import org.apache.pdfbox.cos.COSName;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType3Font;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.core.io.Resource;
|
||||
import org.springframework.core.io.ResourceLoader;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import stirling.software.SPDF.service.pdfjson.type3.Type3FontSignatureCalculator;
|
||||
|
||||
@Slf4j
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
public class Type3FontLibrary {
|
||||
|
||||
private final ObjectMapper objectMapper;
|
||||
private final ResourceLoader resourceLoader;
|
||||
|
||||
@Value("${stirling.pdf.json.type3.library.index:classpath:/type3/library/index.json}")
|
||||
private String indexLocation;
|
||||
|
||||
private final Map<String, Type3FontLibraryEntry> signatureIndex = new ConcurrentHashMap<>();
|
||||
private final Map<String, Type3FontLibraryEntry> aliasIndex = new ConcurrentHashMap<>();
|
||||
private List<Type3FontLibraryEntry> entries = List.of();
|
||||
|
||||
@jakarta.annotation.PostConstruct
|
||||
void initialise() {
|
||||
Resource resource = resourceLoader.getResource(indexLocation);
|
||||
if (!resource.exists()) {
|
||||
log.info("[TYPE3] Library index {} not found; Type3 library disabled", indexLocation);
|
||||
entries = List.of();
|
||||
return;
|
||||
}
|
||||
try (InputStream inputStream = resource.getInputStream()) {
|
||||
List<RawEntry> rawEntries =
|
||||
objectMapper.readValue(inputStream, new TypeReference<List<RawEntry>>() {});
|
||||
List<Type3FontLibraryEntry> loaded = new ArrayList<>();
|
||||
for (RawEntry rawEntry : rawEntries) {
|
||||
Type3FontLibraryEntry entry = toEntry(rawEntry);
|
||||
if (entry != null && entry.hasAnyPayload()) {
|
||||
loaded.add(entry);
|
||||
}
|
||||
}
|
||||
entries = Collections.unmodifiableList(loaded);
|
||||
signatureIndex.clear();
|
||||
aliasIndex.clear();
|
||||
|
||||
for (Type3FontLibraryEntry entry : entries) {
|
||||
if (entry.getSignatures() != null) {
|
||||
for (String signature : entry.getSignatures()) {
|
||||
if (signature == null) {
|
||||
continue;
|
||||
}
|
||||
String key = signature.toLowerCase(Locale.ROOT);
|
||||
signatureIndex.putIfAbsent(key, entry);
|
||||
}
|
||||
}
|
||||
if (entry.getAliases() != null) {
|
||||
for (String alias : entry.getAliases()) {
|
||||
String normalized = normalizeAlias(alias);
|
||||
if (normalized != null) {
|
||||
aliasIndex.putIfAbsent(normalized, entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
log.info(
|
||||
"[TYPE3] Loaded {} Type3 library entries (signatures={}, aliases={}) from {}",
|
||||
entries.size(),
|
||||
signatureIndex.size(),
|
||||
aliasIndex.size(),
|
||||
indexLocation);
|
||||
} catch (IOException ex) {
|
||||
log.warn(
|
||||
"[TYPE3] Failed to load Type3 library index {}: {}",
|
||||
indexLocation,
|
||||
ex.getMessage(),
|
||||
ex);
|
||||
entries = List.of();
|
||||
signatureIndex.clear();
|
||||
aliasIndex.clear();
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isLoaded() {
|
||||
return !entries.isEmpty();
|
||||
}
|
||||
|
||||
public Type3FontLibraryMatch match(PDType3Font font, String fontUid) throws IOException {
|
||||
if (font == null || entries.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
String signature = Type3FontSignatureCalculator.computeSignature(font);
|
||||
if (signature != null) {
|
||||
Type3FontLibraryEntry entry = signatureIndex.get(signature.toLowerCase(Locale.ROOT));
|
||||
if (entry != null) {
|
||||
log.debug(
|
||||
"[TYPE3] Matched Type3 font {} to library entry {} via signature {}",
|
||||
fontUid,
|
||||
entry.getId(),
|
||||
signature);
|
||||
return Type3FontLibraryMatch.builder()
|
||||
.entry(entry)
|
||||
.matchType("signature")
|
||||
.signature(signature)
|
||||
.build();
|
||||
}
|
||||
log.debug(
|
||||
"[TYPE3] No library entry for signature {} (font {})",
|
||||
signature,
|
||||
fontUid != null ? fontUid : font.getName());
|
||||
}
|
||||
|
||||
String aliasKey = normalizeAlias(resolveBaseFontName(font));
|
||||
if (aliasKey != null) {
|
||||
Type3FontLibraryEntry entry = aliasIndex.get(aliasKey);
|
||||
if (entry != null) {
|
||||
log.debug(
|
||||
"[TYPE3] Matched Type3 font {} to library entry {} via alias {}",
|
||||
fontUid,
|
||||
entry.getId(),
|
||||
aliasKey);
|
||||
return Type3FontLibraryMatch.builder()
|
||||
.entry(entry)
|
||||
.matchType("alias:" + aliasKey)
|
||||
.signature(signature)
|
||||
.build();
|
||||
}
|
||||
}
|
||||
|
||||
if (signature != null) {
|
||||
log.debug(
|
||||
"[TYPE3] Library had no alias match for signature {} (font {})",
|
||||
signature,
|
||||
fontUid != null ? fontUid : font.getName());
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private Type3FontLibraryEntry toEntry(RawEntry rawEntry) {
|
||||
if (rawEntry == null || rawEntry.id == null) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
Type3FontLibraryEntry.Type3FontLibraryEntryBuilder builder =
|
||||
Type3FontLibraryEntry.builder()
|
||||
.id(rawEntry.id)
|
||||
.label(rawEntry.label != null ? rawEntry.label : rawEntry.id)
|
||||
.signatures(normalizeList(rawEntry.signatures))
|
||||
.aliases(normalizeList(rawEntry.aliases))
|
||||
.program(loadPayload(rawEntry.program))
|
||||
.webProgram(loadPayload(rawEntry.webProgram))
|
||||
.pdfProgram(loadPayload(rawEntry.pdfProgram))
|
||||
.source(rawEntry.source);
|
||||
if (rawEntry.glyphCoverage != null && !rawEntry.glyphCoverage.isEmpty()) {
|
||||
for (Integer codePoint : rawEntry.glyphCoverage) {
|
||||
if (codePoint != null) {
|
||||
builder.glyphCode(codePoint);
|
||||
}
|
||||
}
|
||||
}
|
||||
return builder.build();
|
||||
} catch (IOException ex) {
|
||||
log.warn(
|
||||
"[TYPE3] Failed to load Type3 library entry {}: {}",
|
||||
rawEntry.id,
|
||||
ex.getMessage());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private Type3FontLibraryPayload loadPayload(RawPayload payload) throws IOException {
|
||||
if (payload == null) {
|
||||
return null;
|
||||
}
|
||||
byte[] data = null;
|
||||
if (payload.base64 != null && !payload.base64.isBlank()) {
|
||||
try {
|
||||
data = Base64.getDecoder().decode(payload.base64);
|
||||
} catch (IllegalArgumentException ex) {
|
||||
log.warn("[TYPE3] Invalid base64 payload in Type3 library: {}", ex.getMessage());
|
||||
}
|
||||
} else if (payload.resource != null && !payload.resource.isBlank()) {
|
||||
data = loadResourceBytes(payload.resource);
|
||||
}
|
||||
if (data == null || data.length == 0) {
|
||||
return null;
|
||||
}
|
||||
String base64 = Base64.getEncoder().encodeToString(data);
|
||||
return new Type3FontLibraryPayload(base64, normalizeFormat(payload.format));
|
||||
}
|
||||
|
||||
private byte[] loadResourceBytes(String location) throws IOException {
|
||||
String resolved = resolveLocation(location);
|
||||
Resource resource = resourceLoader.getResource(resolved);
|
||||
if (!resource.exists()) {
|
||||
throw new IOException("Resource not found: " + resolved);
|
||||
}
|
||||
try (InputStream inputStream = resource.getInputStream()) {
|
||||
return inputStream.readAllBytes();
|
||||
}
|
||||
}
|
||||
|
||||
private String resolveLocation(String location) {
|
||||
if (location == null || location.isBlank()) {
|
||||
return location;
|
||||
}
|
||||
if (location.contains(":")) {
|
||||
return location;
|
||||
}
|
||||
if (location.startsWith("/")) {
|
||||
return "classpath:" + location;
|
||||
}
|
||||
return "classpath:/" + location;
|
||||
}
|
||||
|
||||
private List<String> normalizeList(List<String> values) {
|
||||
if (values == null || values.isEmpty()) {
|
||||
return List.of();
|
||||
}
|
||||
return values.stream()
|
||||
.filter(Objects::nonNull)
|
||||
.map(String::trim)
|
||||
.filter(s -> !s.isEmpty())
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private String normalizeAlias(String alias) {
|
||||
if (alias == null) {
|
||||
return null;
|
||||
}
|
||||
String value = alias.trim();
|
||||
int plus = value.indexOf('+');
|
||||
if (plus >= 0 && plus < value.length() - 1) {
|
||||
value = value.substring(plus + 1);
|
||||
}
|
||||
return value.isEmpty() ? null : value.toLowerCase(Locale.ROOT);
|
||||
}
|
||||
|
||||
private String normalizeFormat(String format) {
|
||||
if (format == null) {
|
||||
return null;
|
||||
}
|
||||
return format.trim().toLowerCase(Locale.ROOT);
|
||||
}
|
||||
|
||||
private String resolveBaseFontName(PDType3Font font) {
|
||||
if (font == null) {
|
||||
return null;
|
||||
}
|
||||
String baseName = null;
|
||||
try {
|
||||
baseName = font.getName();
|
||||
} catch (Exception ignored) {
|
||||
// Some Type3 fonts throw when resolving names; fall back to COS dictionary.
|
||||
}
|
||||
if (baseName == null && font.getCOSObject() != null) {
|
||||
baseName = font.getCOSObject().getNameAsString(COSName.BASE_FONT);
|
||||
}
|
||||
return baseName;
|
||||
}
|
||||
|
||||
private static final class RawEntry {
|
||||
public String id;
|
||||
public String label;
|
||||
public List<String> signatures;
|
||||
public List<String> aliases;
|
||||
public RawPayload program;
|
||||
public RawPayload webProgram;
|
||||
public RawPayload pdfProgram;
|
||||
public List<Integer> glyphCoverage;
|
||||
public String source;
|
||||
}
|
||||
|
||||
private static final class RawPayload {
|
||||
public String resource;
|
||||
public String format;
|
||||
public String base64;
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,30 @@
|
||||
package stirling.software.SPDF.service.pdfjson.type3.library;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Singular;
|
||||
import lombok.Value;
|
||||
|
||||
@Value
|
||||
@Builder
|
||||
public class Type3FontLibraryEntry {
|
||||
String id;
|
||||
String label;
|
||||
@Singular List<String> signatures;
|
||||
@Singular List<String> aliases;
|
||||
Type3FontLibraryPayload program;
|
||||
Type3FontLibraryPayload webProgram;
|
||||
Type3FontLibraryPayload pdfProgram;
|
||||
|
||||
@Singular("glyphCode")
|
||||
List<Integer> glyphCoverage;
|
||||
|
||||
String source;
|
||||
|
||||
public boolean hasAnyPayload() {
|
||||
return (program != null && program.hasPayload())
|
||||
|| (webProgram != null && webProgram.hasPayload())
|
||||
|| (pdfProgram != null && pdfProgram.hasPayload());
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,12 @@
|
||||
package stirling.software.SPDF.service.pdfjson.type3.library;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Value;
|
||||
|
||||
@Value
|
||||
@Builder
|
||||
public class Type3FontLibraryMatch {
|
||||
Type3FontLibraryEntry entry;
|
||||
String matchType;
|
||||
String signature;
|
||||
}
|
||||
@ -0,0 +1,13 @@
|
||||
package stirling.software.SPDF.service.pdfjson.type3.library;
|
||||
|
||||
import lombok.Value;
|
||||
|
||||
@Value
|
||||
public class Type3FontLibraryPayload {
|
||||
String base64;
|
||||
String format;
|
||||
|
||||
public boolean hasPayload() {
|
||||
return base64 != null && !base64.isBlank();
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,25 @@
|
||||
package stirling.software.SPDF.service.pdfjson.type3.model;
|
||||
|
||||
import java.awt.geom.GeneralPath;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.common.PDRectangle;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Value;
|
||||
|
||||
@Value
|
||||
@Builder
|
||||
public class Type3GlyphOutline {
|
||||
String glyphName;
|
||||
int charCode;
|
||||
float advanceWidth;
|
||||
PDRectangle boundingBox;
|
||||
GeneralPath outline;
|
||||
boolean hasStroke;
|
||||
boolean hasFill;
|
||||
boolean hasImages;
|
||||
boolean hasText;
|
||||
boolean hasShading;
|
||||
String warnings;
|
||||
Integer unicode;
|
||||
}
|
||||
@ -0,0 +1,299 @@
|
||||
package stirling.software.SPDF.service.pdfjson.type3.tool;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayDeque;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Deque;
|
||||
import java.util.IdentityHashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.pdfbox.Loader;
|
||||
import org.apache.pdfbox.cos.COSName;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDResources;
|
||||
import org.apache.pdfbox.pdmodel.font.PDFont;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType3Font;
|
||||
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
|
||||
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.fasterxml.jackson.databind.ObjectWriter;
|
||||
import com.fasterxml.jackson.databind.SerializationFeature;
|
||||
|
||||
import stirling.software.SPDF.service.pdfjson.type3.Type3FontSignatureCalculator;
|
||||
import stirling.software.SPDF.service.pdfjson.type3.Type3GlyphExtractor;
|
||||
import stirling.software.SPDF.service.pdfjson.type3.model.Type3GlyphOutline;
|
||||
|
||||
/**
|
||||
* Small CLI helper that scans a PDF for Type3 fonts, computes their signatures, and optionally
|
||||
* emits JSON describing the glyph coverage. This allows Type3 library entries to be added without
|
||||
* digging through backend logs.
|
||||
*
|
||||
* <p>Usage:
|
||||
*
|
||||
* <pre>
|
||||
* ./gradlew :proprietary:type3SignatureTool --args="--pdf path/to/sample.pdf --output type3.json --pretty"
|
||||
* </pre>
|
||||
*/
|
||||
public final class Type3SignatureTool {
|
||||
|
||||
private static final ObjectMapper OBJECT_MAPPER =
|
||||
new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT);
|
||||
|
||||
private Type3SignatureTool() {}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
Arguments arguments = Arguments.parse(args);
|
||||
if (arguments.showHelp || arguments.pdf == null) {
|
||||
printUsage();
|
||||
return;
|
||||
}
|
||||
|
||||
Path pdfPath = arguments.pdf.toAbsolutePath();
|
||||
if (!Files.exists(pdfPath)) {
|
||||
throw new IOException("PDF not found: " + pdfPath);
|
||||
}
|
||||
|
||||
List<Map<String, Object>> fonts;
|
||||
try (PDDocument document = Loader.loadPDF(pdfPath.toFile())) {
|
||||
fonts = collectType3Fonts(document);
|
||||
}
|
||||
|
||||
Map<String, Object> output = new LinkedHashMap<>();
|
||||
output.put("pdf", pdfPath.toString());
|
||||
output.put("fonts", fonts);
|
||||
ObjectWriter writer =
|
||||
arguments.pretty
|
||||
? OBJECT_MAPPER.writerWithDefaultPrettyPrinter()
|
||||
: OBJECT_MAPPER.writer();
|
||||
if (arguments.output != null) {
|
||||
Path parent = arguments.output.toAbsolutePath().getParent();
|
||||
if (parent != null) {
|
||||
Files.createDirectories(parent);
|
||||
}
|
||||
writer.writeValue(arguments.output.toFile(), output);
|
||||
verifyOutput(arguments.output, fonts.size());
|
||||
} else {
|
||||
writer.writeValue(System.out, output);
|
||||
}
|
||||
}
|
||||
|
||||
private static List<Map<String, Object>> collectType3Fonts(PDDocument document)
|
||||
throws IOException {
|
||||
if (document == null || document.getNumberOfPages() == 0) {
|
||||
return List.of();
|
||||
}
|
||||
List<Map<String, Object>> fonts = new ArrayList<>();
|
||||
Type3GlyphExtractor glyphExtractor = new Type3GlyphExtractor();
|
||||
Set<Object> visited = Collections.newSetFromMap(new IdentityHashMap<>());
|
||||
|
||||
for (int pageIndex = 0; pageIndex < document.getNumberOfPages(); pageIndex++) {
|
||||
PDPage page = document.getPage(pageIndex);
|
||||
PDResources resources = page.getResources();
|
||||
if (resources == null) {
|
||||
continue;
|
||||
}
|
||||
scanResources(document, pageIndex + 1, resources, glyphExtractor, visited, fonts);
|
||||
}
|
||||
return fonts;
|
||||
}
|
||||
|
||||
private static void scanResources(
|
||||
PDDocument document,
|
||||
int pageNumber,
|
||||
PDResources resources,
|
||||
Type3GlyphExtractor glyphExtractor,
|
||||
Set<Object> visited,
|
||||
List<Map<String, Object>> fonts)
|
||||
throws IOException {
|
||||
if (resources == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (COSName name : resources.getFontNames()) {
|
||||
PDFont font = resources.getFont(name);
|
||||
if (!(font instanceof PDType3Font type3Font)) {
|
||||
continue;
|
||||
}
|
||||
Object cosObject = type3Font.getCOSObject();
|
||||
if (cosObject != null && !visited.add(cosObject)) {
|
||||
continue;
|
||||
}
|
||||
fonts.add(
|
||||
describeFont(document, pageNumber, name.getName(), type3Font, glyphExtractor));
|
||||
}
|
||||
|
||||
Deque<PDResources> embedded = new ArrayDeque<>();
|
||||
for (COSName name : resources.getXObjectNames()) {
|
||||
PDXObject xobject = resources.getXObject(name);
|
||||
if (xobject instanceof PDFormXObject form && form.getResources() != null) {
|
||||
embedded.add(form.getResources());
|
||||
}
|
||||
}
|
||||
while (!embedded.isEmpty()) {
|
||||
scanResources(document, pageNumber, embedded.pop(), glyphExtractor, visited, fonts);
|
||||
}
|
||||
}
|
||||
|
||||
private static Map<String, Object> describeFont(
|
||||
PDDocument document,
|
||||
int pageNumber,
|
||||
String fontId,
|
||||
PDType3Font font,
|
||||
Type3GlyphExtractor glyphExtractor)
|
||||
throws IOException {
|
||||
Map<String, Object> payload = new LinkedHashMap<>();
|
||||
payload.put("pageNumber", pageNumber);
|
||||
payload.put("fontId", fontId);
|
||||
payload.put("baseName", safeFontName(font));
|
||||
payload.put("alias", normalizeAlias(safeFontName(font)));
|
||||
payload.put("encoding", resolveEncoding(font));
|
||||
payload.put("signature", Type3FontSignatureCalculator.computeSignature(font));
|
||||
|
||||
List<Type3GlyphOutline> glyphs =
|
||||
glyphExtractor.extractGlyphs(document, font, fontId, pageNumber);
|
||||
payload.put("glyphCount", glyphs != null ? glyphs.size() : 0);
|
||||
|
||||
Set<Integer> coverage = new TreeSet<>();
|
||||
if (glyphs != null) {
|
||||
for (Type3GlyphOutline glyph : glyphs) {
|
||||
if (glyph == null) {
|
||||
continue;
|
||||
}
|
||||
if (glyph.getUnicode() != null) {
|
||||
coverage.add(glyph.getUnicode());
|
||||
} else if (glyph.getCharCode() >= 0) {
|
||||
coverage.add(0xF000 | (glyph.getCharCode() & 0xFF));
|
||||
}
|
||||
}
|
||||
List<Map<String, Object>> warnings = new ArrayList<>();
|
||||
for (Type3GlyphOutline glyph : glyphs) {
|
||||
if (glyph != null && glyph.getWarnings() != null) {
|
||||
Map<String, Object> warning = new LinkedHashMap<>();
|
||||
warning.put("glyphName", glyph.getGlyphName());
|
||||
warning.put("message", glyph.getWarnings());
|
||||
warnings.add(warning);
|
||||
}
|
||||
}
|
||||
if (!warnings.isEmpty()) {
|
||||
payload.put("warnings", warnings);
|
||||
}
|
||||
}
|
||||
if (!coverage.isEmpty()) {
|
||||
payload.put("glyphCoverage", new ArrayList<>(coverage));
|
||||
}
|
||||
return payload;
|
||||
}
|
||||
|
||||
private static void verifyOutput(Path output, int fontCount) throws IOException {
|
||||
Path absolute = output.toAbsolutePath();
|
||||
if (!Files.exists(absolute)) {
|
||||
throw new IOException("Expected output file not found: " + absolute);
|
||||
}
|
||||
long size = Files.size(absolute);
|
||||
if (size == 0) {
|
||||
throw new IOException("Output file is empty: " + absolute);
|
||||
}
|
||||
System.out.println(
|
||||
"Wrote " + fontCount + " fonts to " + absolute + " (" + size + " bytes, verified)");
|
||||
}
|
||||
|
||||
private static String resolveEncoding(PDType3Font font) {
|
||||
if (font == null || font.getEncoding() == null) {
|
||||
return null;
|
||||
}
|
||||
Object encoding = font.getCOSObject().getDictionaryObject(COSName.ENCODING);
|
||||
return encoding != null
|
||||
? encoding.toString()
|
||||
: font.getEncoding().getClass().getSimpleName();
|
||||
}
|
||||
|
||||
private static String safeFontName(PDType3Font font) {
|
||||
if (font == null) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
if (font.getName() != null) {
|
||||
return font.getName();
|
||||
}
|
||||
} catch (Exception ignored) {
|
||||
// ignore
|
||||
}
|
||||
if (font.getCOSObject() != null) {
|
||||
return font.getCOSObject().getNameAsString(COSName.BASE_FONT);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private static String normalizeAlias(String name) {
|
||||
if (name == null) {
|
||||
return null;
|
||||
}
|
||||
int plus = name.indexOf('+');
|
||||
String normalized = plus >= 0 ? name.substring(plus + 1) : name;
|
||||
normalized = normalized.trim();
|
||||
return normalized.isEmpty() ? null : normalized.toLowerCase(Locale.ROOT);
|
||||
}
|
||||
|
||||
private static void printUsage() {
|
||||
System.out.println(
|
||||
"""
|
||||
Type3SignatureTool - dump Type3 font signatures for library building
|
||||
Usage:
|
||||
--pdf <file.pdf> Input PDF to analyse (required)
|
||||
--output <file.json> Optional output file (defaults to stdout)
|
||||
--pretty Pretty-print JSON output
|
||||
--help Show this help
|
||||
|
||||
Example:
|
||||
./gradlew :proprietary:type3SignatureTool --args="--pdf samples/foo.pdf --output foo.json --pretty"
|
||||
""");
|
||||
}
|
||||
|
||||
private static final class Arguments {
|
||||
private final Path pdf;
|
||||
private final Path output;
|
||||
private final boolean pretty;
|
||||
private final boolean showHelp;
|
||||
|
||||
private Arguments(Path pdf, Path output, boolean pretty, boolean showHelp) {
|
||||
this.pdf = pdf;
|
||||
this.output = output;
|
||||
this.pretty = pretty;
|
||||
this.showHelp = showHelp;
|
||||
}
|
||||
|
||||
static Arguments parse(String[] args) {
|
||||
if (args == null || args.length == 0) {
|
||||
return new Arguments(null, null, true, true);
|
||||
}
|
||||
Path pdf = null;
|
||||
Path output = null;
|
||||
boolean pretty = false;
|
||||
boolean showHelp = false;
|
||||
for (int i = 0; i < args.length; i++) {
|
||||
String arg = args[i];
|
||||
if ("--pdf".equals(arg) && i + 1 < args.length) {
|
||||
pdf = Paths.get(args[++i]);
|
||||
} else if ("--output".equals(arg) && i + 1 < args.length) {
|
||||
output = Paths.get(args[++i]);
|
||||
} else if ("--pretty".equals(arg)) {
|
||||
pretty = true;
|
||||
} else if ("--help".equals(arg) || "-h".equals(arg)) {
|
||||
showHelp = true;
|
||||
}
|
||||
}
|
||||
return new Arguments(pdf, output, pretty, showHelp);
|
||||
}
|
||||
}
|
||||
}
|
||||
443
docs/pdf_json_type3_fonts.md
Normal file
443
docs/pdf_json_type3_fonts.md
Normal file
@ -0,0 +1,443 @@
|
||||
# PDF JSON Type3 Font System
|
||||
|
||||
## Overview
|
||||
|
||||
The PDF JSON editor needs to handle **Type3 fonts** - custom vector fonts embedded in PDFs that don't follow standard font formats. These are common in PDFs generated by Matplotlib, LaTeX, scientific papers, and presentation tools.
|
||||
|
||||
When converting a PDF to JSON for editing, Type3 fonts present two challenges:
|
||||
1. **No Unicode mapping** - Character codes don't map to standard Unicode characters
|
||||
2. **Custom glyphs** - Each font contains vector drawing instructions unique to that PDF
|
||||
|
||||
This document explains how the system handles Type3 fonts during the full PDF → JSON → PDF workflow.
|
||||
|
||||
---
|
||||
|
||||
## Architecture Flow
|
||||
|
||||
### PDF → JSON Conversion Flow
|
||||
|
||||
```
|
||||
┌─────────────┐
|
||||
│ Input PDF │
|
||||
└──────┬──────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────┐
|
||||
│ PDFBox Parsing │
|
||||
│ - Extract text positions │
|
||||
│ - Identify fonts │
|
||||
└──────┬──────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────┐
|
||||
│ Font Detection │
|
||||
│ Is this a Type3 font? │
|
||||
└──────┬──────────────────────────┘
|
||||
│
|
||||
├─── YES (Type3) ───────────────────────┐
|
||||
│ │
|
||||
▼ ▼
|
||||
┌──────────────────────────┐ ┌──────────────────────────────┐
|
||||
│ Type3FontConversion │ │ Extract Type3 Metadata │
|
||||
│ Service │ │ - Glyph outlines (paths) │
|
||||
│ │ │ - Character codes │
|
||||
│ 1. Calculate signature │ │ - Font matrix │
|
||||
│ 2. Match against │ │ - Bounding boxes │
|
||||
│ library │ └──────────────────────────────┘
|
||||
└──────┬───────────────────┘
|
||||
│
|
||||
▼
|
||||
┌──────────────────────────────────────┐
|
||||
│ Library Match? │
|
||||
└──────┬───────────────────────────────┘
|
||||
│
|
||||
├─── FOUND ─────────────────────┐
|
||||
│ │
|
||||
│ ▼
|
||||
│ ┌─────────────────────────────┐
|
||||
│ │ Load Pre-built Font │
|
||||
│ │ - TTF/OTF from library │
|
||||
│ │ - Full Unicode mappings │
|
||||
│ │ - Web + PDF payloads │
|
||||
│ └──────────┬──────────────────┘
|
||||
│ │
|
||||
├─── NOT FOUND ──────────────────┘
|
||||
│
|
||||
▼
|
||||
┌──────────────────────────────────────┐
|
||||
│ Store Type3 Metadata in JSON │
|
||||
│ - type3Glyphs: [{charCode, unicode, │
|
||||
│ glyphName, outline}] │
|
||||
│ - Original char codes preserved │
|
||||
│ - Font marked as Type3 │
|
||||
└──────┬───────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────┐
|
||||
│ PdfJsonDocument Output │
|
||||
│ { │
|
||||
│ fonts: [{ │
|
||||
│ id: "F1", │
|
||||
│ baseName: "ABCD+DejaVuSans", │
|
||||
│ subtype: "Type3", │
|
||||
│ type3Glyphs: [...], │
|
||||
│ conversionCandidates: [{ │
|
||||
│ strategyId: "type3-library", │
|
||||
│ status: "SUCCESS", │
|
||||
│ pdfProgram: "base64...", │
|
||||
│ glyphCoverage: [65,66,67...] │
|
||||
│ }] │
|
||||
│ }], │
|
||||
│ textElements: [{ │
|
||||
│ text: "Hello", │
|
||||
│ fontId: "F1", │
|
||||
│ charCodes: [72,101,108,108,111]│
|
||||
│ }] │
|
||||
│ } │
|
||||
└─────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### JSON → PDF Conversion Flow
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────┐
|
||||
│ Input JSON (edited by user) │
|
||||
└──────┬──────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────┐
|
||||
│ Load Fonts from JSON │
|
||||
│ - Check for conversionCandidates │
|
||||
│ - Check for type3Glyphs │
|
||||
└──────┬──────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────┐
|
||||
│ Has conversionCandidates? │
|
||||
└──────┬──────────────────────────────┘
|
||||
│
|
||||
├─── YES (Library Match) ───────┐
|
||||
│ │
|
||||
│ ▼
|
||||
│ ┌─────────────────────────────┐
|
||||
│ │ Load from Candidate │
|
||||
│ │ 1. Decode base64 pdfProgram│
|
||||
│ │ 2. Create PDType0Font │
|
||||
│ │ 3. Embed in new PDF │
|
||||
│ └──────────┬──────────────────┘
|
||||
│ │
|
||||
├─── NO (Use Type3 Metadata) ───┘
|
||||
│
|
||||
▼
|
||||
┌──────────────────────────────────────┐
|
||||
│ Text Rendering Strategy │
|
||||
│ - Normalized Type3 fonts: │
|
||||
│ Use original text (font has │
|
||||
│ Unicode mappings) │
|
||||
│ │
|
||||
│ - Actual Type3 fonts: │
|
||||
│ Use charCodes array │
|
||||
│ │
|
||||
│ - Other fonts: │
|
||||
│ Standard encoding │
|
||||
└──────┬───────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────┐
|
||||
│ Generate PDF Content Streams │
|
||||
│ - Set font: /F1 12 Tf │
|
||||
│ - Position text: x y Td │
|
||||
│ - Show text: (encoded) Tj │
|
||||
└──────┬──────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────┐
|
||||
│ Output PDF │
|
||||
│ - Fonts embedded correctly │
|
||||
│ - Text renders with proper glyphs │
|
||||
│ - Preserves visual appearance │
|
||||
└─────────────────────────────────────┘
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Key Components
|
||||
|
||||
### 1. Type3 Font Signature Matching
|
||||
|
||||
**Location:** `Type3FontSignatureCalculator.java`
|
||||
|
||||
**Purpose:** Creates a unique fingerprint of a Type3 font based on its glyph shapes.
|
||||
|
||||
**How it works:**
|
||||
```java
|
||||
// 1. Extract glyph outlines from Type3 font
|
||||
List<Type3GlyphOutline> glyphs = extractor.extractGlyphs(document, font, fontId, pageNumber);
|
||||
|
||||
// 2. Normalize and hash the shapes
|
||||
String signature = calculator.calculateSignature(glyphs);
|
||||
// Result: "sha256:2be58b6ef1e29a83b8634d70b9e32c37a15dea2e..."
|
||||
|
||||
// 3. Look up in library
|
||||
Optional<Match> match = library.findBySignature(signature);
|
||||
```
|
||||
|
||||
**Signature includes:**
|
||||
- Glyph outline paths (curves, lines)
|
||||
- Glyph bounding boxes
|
||||
- Advance widths
|
||||
- Character code mappings
|
||||
|
||||
### 2. Type3 Font Library
|
||||
|
||||
**Location:** `app/core/src/main/resources/type3/library/`
|
||||
|
||||
**Structure:**
|
||||
```
|
||||
type3/library/
|
||||
├── index.json # Font metadata and signatures
|
||||
├── catalogue.json # Quick lookup of common fonts
|
||||
└── fonts/
|
||||
├── dejavu/
|
||||
│ ├── DejaVuSans.ttf
|
||||
│ ├── DejaVuSans-Bold.ttf
|
||||
│ └── DejaVuSans-Oblique.ttf
|
||||
├── cm/ # Computer Modern (LaTeX)
|
||||
│ ├── cmr10.ttf
|
||||
│ ├── cmmi10.ttf
|
||||
│ └── cmsy10.ttf
|
||||
├── stix/ # Scientific symbols
|
||||
│ └── STIXSizeThreeSym-Regular.otf
|
||||
└── scp/ # Monospace
|
||||
└── SauceCodeProNerdFont-Regular.ttf
|
||||
```
|
||||
|
||||
**index.json format:**
|
||||
```json
|
||||
[
|
||||
{
|
||||
"id": "dejavu-sans-bold",
|
||||
"label": "DejaVu Sans Bold",
|
||||
"aliases": [
|
||||
"DejaVuSans-Bold",
|
||||
"EVICAO+DejaVuSans-Bold",
|
||||
"dejavusans-bold"
|
||||
],
|
||||
"signatures": [
|
||||
"sha256:a1b2c3d4...",
|
||||
"sha256:e5f6g7h8..."
|
||||
],
|
||||
"pdfProgram": {
|
||||
"resource": "type3/library/fonts/dejavu/DejaVuSans-Bold.ttf",
|
||||
"format": "ttf"
|
||||
},
|
||||
"glyphCoverage": [32, 33, 65, 66, 67, ...],
|
||||
"source": "DejaVu Fonts 2.37"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
### 3. Normalized vs Actual Type3 Fonts
|
||||
|
||||
The system handles two types of Type3 fonts differently:
|
||||
|
||||
**Normalized Type3 Fonts:**
|
||||
- Original PDF has Type3 font
|
||||
- Matched against library
|
||||
- Replaced with standard TTF/OTF
|
||||
- Font object is `PDType0Font` (not `PDType3Font`)
|
||||
- Has proper Unicode mappings
|
||||
|
||||
**Actual Type3 Fonts:**
|
||||
- Original PDF has Type3 font
|
||||
- No library match found
|
||||
- Keeps Type3 glyph data in JSON
|
||||
- Font object is `PDType3Font`
|
||||
- Uses character codes instead of Unicode
|
||||
|
||||
**Rendering logic (PdfJsonConversionService.java:2411-2463):**
|
||||
```java
|
||||
boolean isNormalizedType3 = !(run.font() instanceof PDType3Font)
|
||||
&& runFontModel != null
|
||||
&& runFontModel.getType3Glyphs() != null
|
||||
&& !runFontModel.getType3Glyphs().isEmpty();
|
||||
|
||||
if (isNormalizedType3) {
|
||||
// Font has Unicode mappings, use text directly
|
||||
contentStream.showText(run.text());
|
||||
} else {
|
||||
// Use raw byte encoding (for Type3 or other fonts)
|
||||
byte[] encoded = encodeTextWithFont(run.font(), fontModel, run.text(), charCodes);
|
||||
contentStream.showText(new String(encoded, StandardCharsets.ISO_8859_1));
|
||||
}
|
||||
```
|
||||
|
||||
### 4. Character Code Preservation
|
||||
|
||||
**Why needed:** Type3 fonts often lack ToUnicode mappings. We preserve the original character codes so text can be reconstructed.
|
||||
|
||||
**Storage in JSON:**
|
||||
```json
|
||||
{
|
||||
"text": "Hello",
|
||||
"fontId": "F1",
|
||||
"charCodes": [72, 101, 108, 108, 111]
|
||||
}
|
||||
```
|
||||
|
||||
**Extraction (PDF → JSON):**
|
||||
```java
|
||||
// TextCollectingStripper.java:4431-4443
|
||||
if (pdfont instanceof PDType3Font) {
|
||||
int[] codes = position.getCharacterCodes();
|
||||
if (codes != null && codes.length > 0) {
|
||||
element.setCharCodes(Arrays.stream(codes)
|
||||
.boxed()
|
||||
.collect(Collectors.toList()));
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 5. Font Embedding Strategies
|
||||
|
||||
When converting JSON → PDF, fonts are embedded based on their type:
|
||||
|
||||
| Font Type | Strategy | Implementation |
|
||||
|-----------|----------|----------------|
|
||||
| **Normalized Type3** | Load TTF/OTF from library, embed as PDType0Font | `conversionCandidates[0].pdfProgram` |
|
||||
| **Standard fonts** | Use system fonts or embedded fonts from original | PDFBox standard loading |
|
||||
| **CFF/Type1C fonts** | Wrap as OpenType-CFF for browser compatibility | Optional Python converter |
|
||||
| **Actual Type3** | Keep original Type3 definition | Preserve from original PDF |
|
||||
|
||||
---
|
||||
|
||||
## Configuration
|
||||
|
||||
**settings.yml:**
|
||||
```yaml
|
||||
processing:
|
||||
pdf-json:
|
||||
fonts:
|
||||
type3:
|
||||
library:
|
||||
enabled: true
|
||||
index: classpath:/type3/library/index.json
|
||||
```
|
||||
|
||||
**Environment variables:**
|
||||
```bash
|
||||
# Disable Type3 library matching
|
||||
STIRLING_PDF_JSON_TYPE3_LIBRARY_ENABLED=false
|
||||
|
||||
# Use custom library
|
||||
STIRLING_PDF_JSON_TYPE3_LIBRARY_INDEX=file:/path/to/custom/index.json
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Debugging
|
||||
|
||||
### View Type3 Font Information
|
||||
|
||||
**Backend logs** show signature matching:
|
||||
```
|
||||
[TYPE3] Strategy type3-library finished with status SUCCESS
|
||||
(message: Matched DejaVu Sans Bold via alias:dejavusans-bold)
|
||||
for font 1:F2
|
||||
|
||||
[TYPE3-RUNTIME] Loading library font F2 WITHOUT subsetting
|
||||
(full glyph set) from candidate:type3-library:pdfProgram
|
||||
```
|
||||
|
||||
### Check JSON Output
|
||||
|
||||
Look for `type3Glyphs` in font definitions:
|
||||
```json
|
||||
{
|
||||
"id": "F1",
|
||||
"baseName": "BMQQDV+DejaVuSans",
|
||||
"subtype": "Type3",
|
||||
"type3Glyphs": [
|
||||
{
|
||||
"charCode": 65,
|
||||
"glyphName": "A",
|
||||
"unicode": 65,
|
||||
"advanceWidth": 684,
|
||||
"bbox": [0, 0, 684, 729],
|
||||
"outline": "M 72 0 L ..."
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Test Signature Calculation
|
||||
|
||||
Use the CLI tool to analyze any PDF:
|
||||
```bash
|
||||
./gradlew :proprietary:type3SignatureTool \
|
||||
--args="--pdf sample.pdf --output analysis.json --pretty"
|
||||
```
|
||||
|
||||
Output shows all Type3 fonts with their signatures and glyph coverage.
|
||||
|
||||
---
|
||||
|
||||
## Performance Considerations
|
||||
|
||||
### Memory Management
|
||||
|
||||
- **Type3 glyph data** can be large (detailed vector paths)
|
||||
- **Font subsetting** not used for library fonts (full glyph set loaded)
|
||||
- **Caching:** Library fonts are loaded once and cached per conversion
|
||||
|
||||
### File Size Impact
|
||||
|
||||
- **JSON size:** Type3 glyph data adds ~5-50KB per font
|
||||
- **PDF size:** Embedding TTF/OTF fonts adds ~50-500KB per font
|
||||
- **Optimization:** Use library matching to avoid storing raw Type3 data
|
||||
|
||||
---
|
||||
|
||||
## Limitations
|
||||
|
||||
1. **Library coverage:** Only common Type3 fonts are in the library
|
||||
- Matplotlib (DejaVu Sans family)
|
||||
- LaTeX (Computer Modern)
|
||||
- STIX Math symbols
|
||||
|
||||
2. **Glyph accuracy:** Signature matching assumes exact glyph shapes
|
||||
- Slight variations may not match
|
||||
- Subset fonts may have different signatures
|
||||
|
||||
3. **Unicode mapping:** Unmatched Type3 fonts lose Unicode text
|
||||
- Character codes preserved but not searchable
|
||||
- Copy/paste may not work correctly
|
||||
|
||||
4. **No runtime synthesis:** Unlike earlier designs, no attempt to convert Type3 to TTF at runtime
|
||||
- All conversions must be pre-built in library
|
||||
- Unknown Type3 fonts keep their Type3 definition
|
||||
|
||||
---
|
||||
|
||||
## Related Files
|
||||
|
||||
### Backend (Java)
|
||||
- `PdfJsonConversionService.java` - Main conversion logic
|
||||
- `Type3FontConversionService.java` - Signature calculation and matching
|
||||
- `Type3FontLibrary.java` - Library loading and lookup
|
||||
- `Type3GlyphExtractor.java` - Extract glyph data from Type3 fonts
|
||||
- `Type3FontSignatureCalculator.java` - Create font fingerprints
|
||||
- `PdfJsonFontType3Glyph.java` - Model for Type3 glyph data
|
||||
|
||||
### Frontend (TypeScript)
|
||||
- `pdfJsonEditorTypes.ts` - Type definitions for JSON structure
|
||||
- `pdfJsonEditorUtils.ts` - Font handling utilities
|
||||
|
||||
### Resources
|
||||
- `type3/library/index.json` - Font library metadata
|
||||
- `type3/library/fonts/` - Actual font files (TTF/OTF)
|
||||
- `settings.yml.template` - Configuration options
|
||||
|
||||
### Documentation
|
||||
- `pdf_text_edit_flow.md` - Overall text editing architecture
|
||||
- `type3_fallback_plan.md` - Original design and planning
|
||||
50
docs/pdf_text_edit_flow.md
Normal file
50
docs/pdf_text_edit_flow.md
Normal file
@ -0,0 +1,50 @@
|
||||
# PDF Text Edit Flow
|
||||
|
||||
This high-level diagram shows every major component involved when a user edits text inside a PDF via the JSON editor. It highlights where fonts (especially Type3) are captured, matched against the library, and re-applied during export.
|
||||
|
||||
```mermaid
|
||||
flowchart TD
|
||||
%% Upload & Extraction
|
||||
A([Upload PDF]) --> B[PdfJsonConversionService]
|
||||
B --> B1[Optional Ghostscript preflight]
|
||||
B1 --> B2[Iterate pages & resources]
|
||||
B2 --> B3[Extract text runs + fonts]
|
||||
|
||||
%% Font handling (serial tree)
|
||||
B3 --> C{Font subtype?}
|
||||
C -->|Type 0 / TrueType / CID| C1[Copy embedded program bytes]
|
||||
C -->|Type3| C2[Type3FontConversionService]
|
||||
C1 --> C4[Attach font payload + metadata]
|
||||
C2 --> C21{Library match?}
|
||||
C21 --|Yes|--> C22[Inject canonical TTF/OTF from library]
|
||||
C21 --|No|--> C23[Mark unsupported<br/>& keep Type3 glyphs]
|
||||
C2 --> C25[Record glyph charCodes + unicode mapping]
|
||||
C22 --> C25
|
||||
C23 --> C25
|
||||
|
||||
%% JSON output
|
||||
C4 --> D[Build PdfJsonDocument (pages, fonts, elements)]
|
||||
C25 --> D
|
||||
D --> E([Send JSON to UI])
|
||||
|
||||
%% Edit round-trip
|
||||
E --> F[User edits text/elements]
|
||||
F --> G[Patched JSON POSTed back]
|
||||
G --> H{Regeneration pipeline}
|
||||
H --> H1[Resolve fonts + candidates]
|
||||
H1 --> H11[Prefer library/embedded payloads]
|
||||
H1 --> H12[Fallback font service for missing glyphs]
|
||||
H --> H2{Can rewrite token stream?}
|
||||
H2 -->|Yes| H21[Rewrite existing operators]
|
||||
H2 -->|No| H22[Full page regeneration]
|
||||
H22 --> H23[Embed canonical fonts + Type3 glyph codes]
|
||||
H21 --> I[Apply annotations/metadata]
|
||||
H23 --> I
|
||||
I --> J([Download edited PDF])
|
||||
```
|
||||
|
||||
**Key points**
|
||||
- Type3 conversion happens entirely inside `Type3FontConversionService`. Matching entries pull canonical fonts from the library; when a signature is missing we simply keep the original Type3 glyph codes until a library entry is added.
|
||||
- Raw Type3 char codes are preserved in `PdfJsonTextElement.charCodes` so edits can fall back to the original glyph sequence when users do not change the text.
|
||||
- When the frontend submits changes, the backend preflights each text run, picks the proper font candidate (library > embedded > fallback), and rewrites the PDF with either token replacements or full page regeneration.
|
||||
- Glyph coverage metadata from the Type3 library now informs which fonts can legitimately render new characters, so added text keeps using the original Type3 face whenever its coverage includes those code points.
|
||||
13216
docs/type3/harvest_report.json
Normal file
13216
docs/type3/harvest_report.json
Normal file
File diff suppressed because it is too large
Load Diff
191
docs/type3/signature_inventory.md
Normal file
191
docs/type3/signature_inventory.md
Normal file
@ -0,0 +1,191 @@
|
||||
# Type3 Signature Inventory
|
||||
|
||||
_Generated from `docs\type3\signatures`. Run `scripts/summarize_type3_signatures.py` after capturing new samples._
|
||||
|
||||
## Alias: `cmex10`
|
||||
|
||||
| Signature | Samples | Glyph Count | Coverage (first 10) |
|
||||
| --- | --- | --- | --- |
|
||||
| `sha256:bdff85a28d968fea7fbffcf4869b1a28bdd7eb8b73230a016b41bdcbe28de94b` | `01_Matplotlib.pdf` | 1 | 90 |
|
||||
|
||||
## Alias: `cmmi10`
|
||||
|
||||
| Signature | Samples | Glyph Count | Coverage (first 10) |
|
||||
| --- | --- | --- | --- |
|
||||
| `sha256:6c72170517812e39f970746f53a2ae08dafbbe7374c20bcb4d5a60adc49cb77b` | `01_Matplotlib.pdf` | 2 | 100, 120 |
|
||||
|
||||
## Alias: `cmr10`
|
||||
|
||||
| Signature | Samples | Glyph Count | Coverage (first 10) |
|
||||
| --- | --- | --- | --- |
|
||||
| `sha256:5b535a05c982fb8ff029dfbedd5e9d28c1c4379ebac259d207f65606a94e5b15` | `01_Matplotlib.pdf` | 3 | 48, 49, 53 |
|
||||
|
||||
## Alias: `cmsy10`
|
||||
|
||||
| Signature | Samples | Glyph Count | Coverage (first 10) |
|
||||
| --- | --- | --- | --- |
|
||||
| `sha256:1324cd8127143ef9023616b7911c570db3b1eb35758cdc9258ec16c0f4587775` | `01_Matplotlib.pdf` | 1 | 48 |
|
||||
| `sha256:2832e219b2db3bacf0d5a147d4b74ad5226fdf7562c395ef3fb12937633e037d` | `01_Matplotlib.pdf` | 1 | 8734 |
|
||||
|
||||
## Alias: `dejavusans`
|
||||
|
||||
| Signature | Samples | Glyph Count | Coverage (first 10) |
|
||||
| --- | --- | --- | --- |
|
||||
| `sha256:2be58b6ef1e29a83b8634d70b9e32c37a15dea2e608894439ef7224c35b77f5d` | `01_Matplotlib.pdf` | 7 | 46, 48, 49, 50, 52, 53, 55 |
|
||||
| `sha256:994c963d70041eee141fd275fa22c525a71283de2b4a952814d02e0bbfa8caea` | `01_Matplotlib.pdf` | 4 | 46, 48, 49, 53 |
|
||||
| `sha256:93573cb1ab32b9cb09378298fb120de079f6a309908d2ee86f91392a6aba5c31` | `01_Matplotlib.pdf` | 30 | 32, 45, 48, 49, 50, 51, 52, 53, 54, 55 |
|
||||
| `sha256:4febfad91e0141f9658506a0bf8fc2a449f0ea7d97b44e95fc9a970c77af4b0a` | `01_Matplotlib.pdf` | 7 | 48, 49, 50, 51, 52, 53, 54 |
|
||||
| `sha256:0386e5811612ba4b998d57cd3869d7fbc48092a79d436deda774af107a4af813` | `01_Matplotlib.pdf` | 9 | 46, 48, 49, 50, 51, 52, 53, 54, 55 |
|
||||
| `sha256:b95fa2a272cbc950b81320790d04fcf19ebb24050fa2139ba6a474172cac596b` | `01_Matplotlib.pdf` | 20 | 32, 40, 41, 48, 50, 52, 54, 56, 83, 85 |
|
||||
| `sha256:d034d16ac69e3e1c5008e77c4c24dc3179308a2742601e89d5c8ab327e4040dd` | `01_Matplotlib.pdf` | 15 | 43, 48, 49, 50, 51, 56, 61, 97, 98, 105 |
|
||||
| `sha256:85e16e36ed0290c149647be7e468a7c46e7b66fd290131213040f7bad905aa44` | `01_Matplotlib.pdf` | 25 | 32, 45, 46, 48, 49, 50, 52, 53, 54, 56 |
|
||||
| `sha256:85e16e36ed0290c149647be7e468a7c46e7b66fd290131213040f7bad905aa44` | `01_Matplotlib.pdf` | 25 | 32, 45, 46, 48, 49, 50, 52, 53, 54, 56 |
|
||||
| `sha256:3654d4d9bcbbf6ad51628082203094069a17aad3a5e6f5c7972833566e42ab6b` | `01_Matplotlib.pdf` | 29 | 32, 46, 48, 49, 50, 51, 52, 53, 54, 55 |
|
||||
| `sha256:d0c6cddc416d130701395246621a0f669fc292df4097a7a74395602faf4475df` | `01_Matplotlib.pdf` | 17 | 48, 49, 53, 97, 100, 101, 102, 103, 105, 108 |
|
||||
| `sha256:cadf43a2df81340368af44c76b499223931d78dcc76c70cf4b4a93d133e368af` | `01_Matplotlib.pdf` | 27 | 32, 46, 48, 49, 50, 52, 54, 56, 84, 97 |
|
||||
| `sha256:f1a874c4268b1bffffc99acabbe0a60aa662611b4bac0e688e4fc0ae3f2033bb` | `01_Matplotlib.pdf` | 7 | 46, 48, 49, 50, 52, 54, 56 |
|
||||
| `sha256:e3d87c113463c8642a4f22943064fd75c133ad31fe5efebf6de6abf211b74b5a` | `01_Matplotlib.pdf` | 7 | 48, 49, 50, 51, 52, 54, 56 |
|
||||
| `sha256:d47afb7581e98f588f0e70953e8692249aaa2ec3df36fbd90985f27b1ce1cf50` | `01_Matplotlib.pdf` | 12 | 46, 48, 49, 50, 51, 52, 53, 54, 55, 97 |
|
||||
| `sha256:e47b8f112a875361a43bcb6d9c6467e0296412d29e417e58a0e60c90b664d281` | `01_Matplotlib.pdf` | 9 | 46, 48, 50, 97, 99, 105, 108, 115, 118 |
|
||||
| `sha256:9c67df2ac5c3dcf957dfb0cd048fa450322a72b5a2dfb05f816c536b3b090607` | `01_Matplotlib.pdf` | 19 | 32, 39, 46, 48, 49, 50, 53, 58, 97, 99 |
|
||||
| `sha256:3ee773a0af6fdedb9853dca9f4d8b80a421a0024bdf06bea41f15d58e3b90c87` | `01_Matplotlib.pdf` | 13 | 46, 48, 49, 50, 52, 54, 56, 97, 99, 105 |
|
||||
| `sha256:4fa06c90399d80b41cb718163a5d78af2b203df6b6579246fb0b24d349b7a591` | `01_Matplotlib.pdf` | 15 | 46, 48, 49, 50, 52, 53, 54, 56, 97, 98 |
|
||||
| `sha256:e47b8f112a875361a43bcb6d9c6467e0296412d29e417e58a0e60c90b664d281` | `01_Matplotlib.pdf` | 9 | 46, 48, 50, 97, 99, 105, 108, 115, 118 |
|
||||
| `sha256:ac6756c76d6e43b771cc1e643dfc7891dfaaac05aa5e302190d0a662838ab031` | `01_Matplotlib.pdf` | 13 | 46, 48, 49, 50, 51, 53, 55, 97, 99, 105 |
|
||||
| `sha256:bf7b95498f7d00d228c5c155db62b6c1aa7e0215cca1690c9fdb0adcfd496b10` | `01_Matplotlib.pdf` | 14 | 46, 48, 49, 50, 65, 66, 67, 68, 97, 99 |
|
||||
| `sha256:39b8e5ec8e20a788cd45166baf0ab796397f152c9cd8dec1f882c635380cad92` | `01_Matplotlib.pdf` | 13 | 46, 48, 49, 50, 52, 53, 54, 97, 99, 105 |
|
||||
| `sha256:27b98489865df8df55f19e4505c093501f236465885ca3bf5b66b6f047a85bb2` | `01_Matplotlib.pdf` | 19 | 46, 48, 49, 50, 51, 52, 68, 70, 72, 76 |
|
||||
| `sha256:497ddd27e1f56ef6504c61613e3a159bab13314a4970a3be13b3a556648964da` | `01_Matplotlib.pdf` | 12 | 46, 48, 49, 50, 51, 97, 98, 99, 105, 108 |
|
||||
| `sha256:3b41f9e5f3a7ffa6f4cdffa2a46f02781ec1b2b0c99994707cfb139aa15a11e2` | `01_Matplotlib.pdf` | 5 | 32, 48, 49, 50, 53 |
|
||||
| `sha256:93723fe436a1aa654db56caf133f56280444b9dc0682af50b83787c3e49ee3ec` | `01_Matplotlib.pdf` | 16 | 32, 48, 49, 50, 52, 53, 54, 56, 58, 97 |
|
||||
| `sha256:a648cb0524465bcb3bf4a2f65e0761cfc5167b1871a7db9488bee11b56062727` | `01_Matplotlib.pdf` | 14 | 32, 46, 48, 49, 50, 52, 53, 54, 56, 58 |
|
||||
| `sha256:2f18ed7f982aeb954aaae388ba0c75e3c676717ca324156b42bb17f3f20ef403` | `01_Matplotlib.pdf` | 18 | 46, 48, 49, 50, 52, 54, 97, 99, 101, 102 |
|
||||
| `sha256:18ce863feb57f42f2b92ac85a8c55ef3eeaa15488c5d6cd8c724b085994c64fa` | `01_Matplotlib.pdf` | 12 | 46, 48, 49, 50, 51, 52, 97, 99, 105, 108 |
|
||||
| `sha256:a3eb7054e426aad7d1fac1f39ad6d3f886e34c04f780def5cf22b53cb3a45b46` | `01_Matplotlib.pdf` | 15 | 46, 48, 49, 50, 52, 53, 54, 55, 56, 97 |
|
||||
| `sha256:edd22119635bfb0f2bff750137c6c6400a7fae4ff80cc252d2e6f2ca88f599a7` | `01_Matplotlib.pdf` | 15 | 46, 48, 49, 50, 51, 52, 53, 54, 56, 97 |
|
||||
| `sha256:aae1797f3e3ff55d71b02590333aff86663d6bb4a5768bed7550e5987f40afe8` | `01_Matplotlib.pdf` | 14 | 46, 48, 49, 50, 52, 53, 54, 56, 97, 99 |
|
||||
| `sha256:0165552fad28860f2ea6079be7a87ea0833acde99309b3ef619c8f81707c46a3` | `01_Matplotlib.pdf` | 14 | 46, 48, 49, 50, 51, 52, 54, 97, 99, 105 |
|
||||
| `sha256:792a1c5aaa1743ab203a363a8f6cd07c3b043e33c72e97c4ea21f5862158e6c1` | `01_Matplotlib.pdf` | 17 | 43, 46, 48, 49, 50, 51, 52, 53, 54, 56 |
|
||||
| `sha256:f4bfd64f36bf33dea79800561a67f78d5ccdb436363574abf0892f58b376a2e6` | `01_Matplotlib.pdf` | 25 | 32, 46, 48, 49, 50, 51, 52, 53, 54, 85 |
|
||||
| `sha256:119da04d962622c8aa46d77f6bdfccb5d4a4ef7173775275b046efd59098e5d9` | `01_Matplotlib.pdf` | 17 | 46, 48, 49, 50, 53, 55, 97, 101, 103, 105 |
|
||||
| `sha256:003af1c45e3a5ab09544e226eba25e3a70abfe6e36dd48584474cc7a497685f6` | `01_Matplotlib.pdf` | 28 | 32, 40, 41, 46, 48, 49, 50, 52, 54, 56 |
|
||||
| `sha256:88b3471db1978cc83233f249453806a8369c766b089b424c86c2584196ed5dbf` | `01_Matplotlib.pdf` | 14 | 46, 48, 49, 50, 52, 54, 56, 100, 101, 102 |
|
||||
| `sha256:a15cc90b7fc110cef4f07fe8a692d572e1289a9ee29c95732294662fded4e042` | `01_Matplotlib.pdf` | 16 | 32, 46, 48, 49, 50, 51, 53, 79, 83, 97 |
|
||||
| `sha256:fb54c23aa081562ac114676ffe43032c9c0fb63af3e5b7b3441b88872d1f2e7a` | `01_Matplotlib.pdf` | 16 | 46, 48, 49, 50, 52, 53, 54, 56, 66, 72 |
|
||||
| `sha256:4b553d51d58f5891af071359fb016caf1c6137778da129a6b208dcc8cb0c4635` | `01_Matplotlib.pdf` | 9 | 46, 48, 49, 50, 51, 52, 53, 54, 56 |
|
||||
| `sha256:93573cb1ab32b9cb09378298fb120de079f6a309908d2ee86f91392a6aba5c31` | `01_Matplotlib.pdf` | 30 | 32, 45, 48, 49, 50, 51, 52, 53, 54, 55 |
|
||||
| `sha256:4febfad91e0141f9658506a0bf8fc2a449f0ea7d97b44e95fc9a970c77af4b0a` | `01_Matplotlib.pdf` | 7 | 48, 49, 50, 51, 52, 53, 54 |
|
||||
| `sha256:0386e5811612ba4b998d57cd3869d7fbc48092a79d436deda774af107a4af813` | `01_Matplotlib.pdf` | 9 | 46, 48, 49, 50, 51, 52, 53, 54, 55 |
|
||||
| `sha256:b95fa2a272cbc950b81320790d04fcf19ebb24050fa2139ba6a474172cac596b` | `01_Matplotlib.pdf` | 20 | 32, 40, 41, 48, 50, 52, 54, 56, 83, 85 |
|
||||
| `sha256:b318f65b9dc209eb6f004e3a6c20a772ebbca3d752adc10c66a6a8a479da2838` | `01_Matplotlib.pdf` | 20 | 32, 40, 41, 46, 48, 49, 50, 52, 53, 54 |
|
||||
| `sha256:64f725573c1f5d90196e94ed338a7af06baf274420414befeb9693c80acd0f77` | `01_Matplotlib.pdf` | 23 | 32, 46, 48, 49, 50, 51, 52, 53, 55, 57 |
|
||||
| `sha256:9a701e082ba5a779e2b20b8de0c7844b3f7838ba8cd4bd7ef366893761fb994d` | `01_Matplotlib.pdf` | 10 | 46, 48, 49, 50, 51, 52, 53, 54, 55, 56 |
|
||||
| `sha256:2f6f8d63ff6235f3b7cd6f5eba8076854892037afa2ea6962953b3e7cda3736e` | `01_Matplotlib.pdf` | 31 | 32, 40, 41, 44, 61, 65, 66, 67, 68, 70 |
|
||||
| `sha256:f17b5eb0ee996d1388c548f79fa50fa2d8c6076959eff189bb745d156d54547f` | `01_Matplotlib.pdf` | 26 | 32, 39, 65, 66, 67, 68, 73, 97, 98, 100 |
|
||||
| `sha256:f22c75548364bb25fc3efbe11f05c56e29f07c15c3046ddbc85a64e5cc5a97bd` | `01_Matplotlib.pdf` | 21 | 32, 48, 49, 50, 72, 80, 91, 93, 99, 100 |
|
||||
| `sha256:54a6c2e4bc290b48e21eece7f81cb6633c4b53a91f198fdaabfc73743b0e4499` | `01_Matplotlib.pdf` | 15 | 48, 49, 50, 51, 52, 53, 54, 97, 100, 101 |
|
||||
| `sha256:059af9dbaaab27c1d660ef00de6d4fd6e1687cfe2abca0a4c07265c2b2b450c6` | `01_Matplotlib.pdf` | 22 | 32, 46, 48, 49, 50, 51, 53, 55, 68, 77 |
|
||||
| `sha256:6651550d7b913850087244b7a70961989c2efc6d8c8d060d8663ff087b7723f6` | `01_Matplotlib.pdf` | 20 | 32, 46, 48, 49, 50, 52, 54, 56, 97, 100 |
|
||||
| `sha256:4d4ee6f04f57a40a589741df4747990ed485c192b0fc179a415aba822f352a8d` | `01_Matplotlib.pdf` | 26 | 32, 45, 48, 49, 50, 51, 52, 53, 56, 65 |
|
||||
| `sha256:4febfad91e0141f9658506a0bf8fc2a449f0ea7d97b44e95fc9a970c77af4b0a` | `01_Matplotlib.pdf` | 7 | 48, 49, 50, 51, 52, 53, 54 |
|
||||
| `sha256:e808a8ecba94bf0190ab7218bb0702698125ee2e456e82e00da709e8188e2bf8` | `01_Matplotlib.pdf` | 28 | 32, 46, 48, 49, 50, 53, 54, 70, 83, 84 |
|
||||
| `sha256:b5064b202eb1dae41545eddf674ee23bd82176e76aac8eb749540c2689f2e3ec` | `01_Matplotlib.pdf` | 33 | 32, 33, 37, 48, 49, 50, 52, 53, 54, 68 |
|
||||
| `sha256:f8f14410ec170248916e19f9d09120cfd786c47906b7c3735781d24e944b094e` | `01_Matplotlib.pdf` | 11 | 32, 83, 84, 101, 109, 110, 111, 115, 116, 119 |
|
||||
| `sha256:2be58b6ef1e29a83b8634d70b9e32c37a15dea2e608894439ef7224c35b77f5d` | `01_Matplotlib.pdf` | 7 | 46, 48, 49, 50, 52, 53, 55 |
|
||||
| `sha256:994c963d70041eee141fd275fa22c525a71283de2b4a952814d02e0bbfa8caea` | `01_Matplotlib.pdf` | 4 | 46, 48, 49, 53 |
|
||||
| `sha256:c43134bebeaf8328ac299ba978d7e663e2dc4fe99463b9d7f72f72f77936204e` | `01_Matplotlib.pdf` | 9 | 48, 49, 50, 51, 52, 53, 54, 55, 56 |
|
||||
| `sha256:4f763d5e2cd0bdcd4650936ac505bd0e011899712ffe80ffa4b4d43f42941327` | `01_Matplotlib.pdf` | 10 | 46, 48, 49, 50, 52, 54, 56, 98, 103, 114 |
|
||||
| `sha256:2be58b6ef1e29a83b8634d70b9e32c37a15dea2e608894439ef7224c35b77f5d` | `01_Matplotlib.pdf` | 7 | 46, 48, 49, 50, 52, 53, 55 |
|
||||
| `sha256:cb72de0c6105b9802d360c47a292a1f7bc344939a6801b879ea09dae4e45e863` | `01_Matplotlib.pdf` | 7 | 46, 48, 49, 50, 51, 52, 53 |
|
||||
| `sha256:2add5b5ad6e536f3614b75e246b49a006edbbecdd309d24bd42c874a3ae3c8ed` | `01_Matplotlib.pdf` | 21 | 45, 48, 49, 50, 51, 52, 97, 99, 100, 101 |
|
||||
| `sha256:85e16e36ed0290c149647be7e468a7c46e7b66fd290131213040f7bad905aa44` | `02_Matplotlib.pdf` | 25 | 32, 45, 46, 48, 49, 50, 52, 53, 54, 56 |
|
||||
| `sha256:85e16e36ed0290c149647be7e468a7c46e7b66fd290131213040f7bad905aa44` | `02_Matplotlib.pdf` | 25 | 32, 45, 46, 48, 49, 50, 52, 53, 54, 56 |
|
||||
| `sha256:059af9dbaaab27c1d660ef00de6d4fd6e1687cfe2abca0a4c07265c2b2b450c6` | `02_Matplotlib.pdf` | 22 | 32, 46, 48, 49, 50, 51, 53, 55, 68, 77 |
|
||||
| `sha256:9a701e082ba5a779e2b20b8de0c7844b3f7838ba8cd4bd7ef366893761fb994d` | `02_Matplotlib.pdf` | 10 | 46, 48, 49, 50, 51, 52, 53, 54, 55, 56 |
|
||||
| `sha256:2f6f8d63ff6235f3b7cd6f5eba8076854892037afa2ea6962953b3e7cda3736e` | `02_Matplotlib.pdf` | 31 | 32, 40, 41, 44, 61, 65, 66, 67, 68, 70 |
|
||||
| `sha256:f17b5eb0ee996d1388c548f79fa50fa2d8c6076959eff189bb745d156d54547f` | `02_Matplotlib.pdf` | 26 | 32, 39, 65, 66, 67, 68, 73, 97, 98, 100 |
|
||||
| `sha256:6651550d7b913850087244b7a70961989c2efc6d8c8d060d8663ff087b7723f6` | `02_Matplotlib.pdf` | 20 | 32, 46, 48, 49, 50, 52, 54, 56, 97, 100 |
|
||||
| `sha256:4febfad91e0141f9658506a0bf8fc2a449f0ea7d97b44e95fc9a970c77af4b0a` | `02_Matplotlib.pdf` | 7 | 48, 49, 50, 51, 52, 53, 54 |
|
||||
| `sha256:e808a8ecba94bf0190ab7218bb0702698125ee2e456e82e00da709e8188e2bf8` | `02_Matplotlib.pdf` | 28 | 32, 46, 48, 49, 50, 53, 54, 70, 83, 84 |
|
||||
| `sha256:b5064b202eb1dae41545eddf674ee23bd82176e76aac8eb749540c2689f2e3ec` | `02_Matplotlib.pdf` | 33 | 32, 33, 37, 48, 49, 50, 52, 53, 54, 68 |
|
||||
| `sha256:f8f14410ec170248916e19f9d09120cfd786c47906b7c3735781d24e944b094e` | `02_Matplotlib.pdf` | 11 | 32, 83, 84, 101, 109, 110, 111, 115, 116, 119 |
|
||||
| `sha256:2be58b6ef1e29a83b8634d70b9e32c37a15dea2e608894439ef7224c35b77f5d` | `02_Matplotlib.pdf` | 7 | 46, 48, 49, 50, 52, 53, 55 |
|
||||
| `sha256:994c963d70041eee141fd275fa22c525a71283de2b4a952814d02e0bbfa8caea` | `02_Matplotlib.pdf` | 4 | 46, 48, 49, 53 |
|
||||
| `sha256:c43134bebeaf8328ac299ba978d7e663e2dc4fe99463b9d7f72f72f77936204e` | `02_Matplotlib.pdf` | 9 | 48, 49, 50, 51, 52, 53, 54, 55, 56 |
|
||||
| `sha256:4f763d5e2cd0bdcd4650936ac505bd0e011899712ffe80ffa4b4d43f42941327` | `02_Matplotlib.pdf` | 10 | 46, 48, 49, 50, 52, 54, 56, 98, 103, 114 |
|
||||
| `sha256:2be58b6ef1e29a83b8634d70b9e32c37a15dea2e608894439ef7224c35b77f5d` | `02_Matplotlib.pdf` | 7 | 46, 48, 49, 50, 52, 53, 55 |
|
||||
| `sha256:cb72de0c6105b9802d360c47a292a1f7bc344939a6801b879ea09dae4e45e863` | `02_Matplotlib.pdf` | 7 | 46, 48, 49, 50, 51, 52, 53 |
|
||||
| `sha256:31d0e67bc63a816302c9ff6ad9c19e17603aef1a4c3677b81b1d9084caa86e03` | `03_handout-beginner.pdf` | 6 | 46, 48, 49, 50, 51, 53 |
|
||||
| `sha256:4b509d2ae2cfab89783a73df2c66f0fd50949f97696079cb58f1e58b81daaa07` | `03_handout-beginner.pdf` | 4 | 84, 101, 105, 109 |
|
||||
| `sha256:831f7012db360331ffb5a5de6a6d6e03ffaad29f48d81cabe9fc613b25aad818` | `04_handout-intermediate.pdf` | 43 | 32, 40, 41, 46, 48, 49, 50, 51, 52, 53 |
|
||||
| `sha256:bf790625423c5ebdf94760eb796c847af885b930d3a30861509b07f1c77c3f60` | `04_handout-intermediate.pdf` | 9 | 46, 48, 49, 50, 51, 52, 53, 54, 56 |
|
||||
| `sha256:f7c3be2199c397a4c702dd434ac63fc9e046d749eff8cede4513fbc2774751b4` | `04_handout-intermediate.pdf` | 5 | 48, 49, 50, 51, 53 |
|
||||
| `sha256:8f7bf7a6382e8a762c5a84f19f84f0675f61eb1b34bd42562c0b3ac6712e29ef` | `04_handout-intermediate.pdf` | 2 | 48, 49 |
|
||||
| `sha256:dfaf8075e13be0e51f72485f9d825cea9ad077eb2dd9d63b9922add67d7d2761` | `04_handout-intermediate.pdf` | 12 | 32, 48, 49, 50, 51, 53, 80, 100, 101, 105 |
|
||||
| `sha256:853422e67ac88fe7ae28d5c459dc9f5a84f24e7840eeb2d82a00719032119326` | `04_handout-intermediate.pdf` | 10 | 32, 67, 83, 97, 100, 101, 105, 110, 111, 115 |
|
||||
| `sha256:b42182c55ec4bd53ab0698bee5f92945921dbccb534fdb5c6b41f1782e1fe88e` | `04_handout-intermediate.pdf` | 7 | 32, 48, 49, 50, 51, 53, 65 |
|
||||
| `sha256:75466035ac34f2523215e599452e32d796d7d02bc7122ed3d02fe91ebe064c25` | `04_handout-intermediate.pdf` | 6 | 48, 49, 50, 52, 54, 56 |
|
||||
| `sha256:75466035ac34f2523215e599452e32d796d7d02bc7122ed3d02fe91ebe064c25` | `04_handout-intermediate.pdf` | 6 | 48, 49, 50, 52, 54, 56 |
|
||||
|
||||
## Alias: `dejavusans-bold`
|
||||
|
||||
| Signature | Samples | Glyph Count | Coverage (first 10) |
|
||||
| --- | --- | --- | --- |
|
||||
| `sha256:dc03917f2edd92a7a68a46ad36f65a908e4feb85e61cb37e9026205f3986574a` | `01_Matplotlib.pdf` | 7 | 65, 87, 100, 101, 103, 105, 116 |
|
||||
| `sha256:dc03917f2edd92a7a68a46ad36f65a908e4feb85e61cb37e9026205f3986574a` | `02_Matplotlib.pdf` | 7 | 65, 87, 100, 101, 103, 105, 116 |
|
||||
| `sha256:c845063bef18f173afbfcb90fbf6773f43648c5f0666ecfa0132afe4e164068d` | `03_handout-beginner.pdf` | 9 | 32, 65, 83, 97, 101, 105, 110, 118, 119 |
|
||||
|
||||
## Alias: `dejavusans-oblique`
|
||||
|
||||
| Signature | Samples | Glyph Count | Coverage (first 10) |
|
||||
| --- | --- | --- | --- |
|
||||
| `sha256:81cd2d4d9353ee02c7ed80c2892658072b2a8bbd9ed1832b474129dfbe35d5d8` | `01_Matplotlib.pdf` | 13 | 70, 71, 85, 87, 100, 101, 103, 109, 112, 114 |
|
||||
| `sha256:08864aa8e8d17cead6059d5b4f1b1eea2053fa0ea3ca64e885d6eaacb78bccaf` | `01_Matplotlib.pdf` | 2 | 100, 120 |
|
||||
|
||||
## Alias: `dejavusansdisplay`
|
||||
|
||||
| Signature | Samples | Glyph Count | Coverage (first 10) |
|
||||
| --- | --- | --- | --- |
|
||||
| `sha256:ae77c4eb2c49f72c616272f3d7ac624ddb0b4db1c77acbe6b9d13531f68e1d5d` | `01_Matplotlib.pdf` | 0 | |
|
||||
| `sha256:ae77c4eb2c49f72c616272f3d7ac624ddb0b4db1c77acbe6b9d13531f68e1d5d` | `01_Matplotlib.pdf` | 0 | |
|
||||
|
||||
## Alias: `dejavusansmono`
|
||||
|
||||
| Signature | Samples | Glyph Count | Coverage (first 10) |
|
||||
| --- | --- | --- | --- |
|
||||
| `sha256:88758adf0b41a81204ed3ad63463f5d15c7c2f80e8942cee501d06fa7274dc4e` | `01_Matplotlib.pdf` | 8 | 97, 98, 105, 108, 109, 111, 112, 116 |
|
||||
| `sha256:74e60bcb2d7975b0c7b372aca9fc25f55c9018005425a741830e7c4370b8d593` | `01_Matplotlib.pdf` | 24 | 35, 39, 48, 49, 50, 51, 52, 53, 54, 55 |
|
||||
|
||||
## Alias: `f36`
|
||||
|
||||
| Signature | Samples | Glyph Count | Coverage (first 10) |
|
||||
| --- | --- | --- | --- |
|
||||
| `sha256:72c7041f938222b87ce2d9295547f8c19edf250af538160b69be35a968d76ea7` | `08_matplotlib.pdf` | 1 | 136 |
|
||||
|
||||
## Alias: `f59`
|
||||
|
||||
| Signature | Samples | Glyph Count | Coverage (first 10) |
|
||||
| --- | --- | --- | --- |
|
||||
| `sha256:d5942c0913ef64ae862556a746b28dac1a621caa5e05973e16881c8e8e15e329` | `08_matplotlib.pdf` | 1 | 42 |
|
||||
|
||||
## Alias: `sourcecodepro-regular`
|
||||
|
||||
| Signature | Samples | Glyph Count | Coverage (first 10) |
|
||||
| --- | --- | --- | --- |
|
||||
| `sha256:96ba693001b2ab224ad5b5a7464cecd4d33e68f30fb23f78a8473dbb031ce246` | `04_handout-intermediate.pdf` | 11 | 48, 49, 50, 51, 52, 53, 54, 55, 56, 57 |
|
||||
| `sha256:72fca14e9e44fc41b0cdb1c6a088f0b07f882f9f04c51a0145f43cf8b285c5b6` | `04_handout-intermediate.pdf` | 11 | 46, 48, 49, 50, 51, 52, 53, 54, 55, 56 |
|
||||
|
||||
## Alias: `stixsizethreesym-regular`
|
||||
|
||||
| Signature | Samples | Glyph Count | Coverage (first 10) |
|
||||
| --- | --- | --- | --- |
|
||||
| `sha256:33d0ab9d9d72c1aed1edfc9b815dd6a2d618cbbe9084309c7f2de0f3df3073d7` | `01_Matplotlib.pdf` | 2 | 91, 93 |
|
||||
|
||||
## Alias: `unknown`
|
||||
|
||||
| Signature | Samples | Glyph Count | Coverage (first 10) |
|
||||
| --- | --- | --- | --- |
|
||||
| `sha256:23e4b174e951cd6135e229fb397db4ff518021cf14d5f817031b9f754841e511` | `07_matplotlib.pdf` | 11 | 0, 46, 97, 98, 105, 108, 109, 111, 112, 116 |
|
||||
| `sha256:f6112d6a35d5fdf5d6431b3156b713020953154042814ad12d2b81731c97250b` | `07_matplotlib.pdf` | 13 | 0, 32, 37, 97, 98, 101, 105, 108, 109, 110 |
|
||||
| `sha256:7e05c074b630c0f3e1fc23537c22bf4b6191c783601e534cd156f71c1827702c` | `07_matplotlib.pdf` | 42 | 0, 32, 34, 37, 39, 40, 41, 42, 44, 46 |
|
||||
| `sha256:9958bc1f309f6bafb55834e271bb0b337704fcac51f6d989abe9553fcffa103d` | `07_matplotlib.pdf` | 15 | 0, 46, 97, 98, 105, 108, 109, 110, 111, 112 |
|
||||
150
docs/type3_fallback_plan.md
Normal file
150
docs/type3_fallback_plan.md
Normal file
@ -0,0 +1,150 @@
|
||||
# Type3 Font Library & Matching Plan
|
||||
|
||||
This file documents where we are with Type3 font handling, what tooling already exists, and what remains to be done so future work (or another Codex session) can pick it up quickly.
|
||||
|
||||
## Goal
|
||||
Ensure Type3 fonts keep their appearance when users edit/export PDFs. That means:
|
||||
1. Identifying common Type3 fonts we encounter (Matplotlib, LaTeX, etc.).
|
||||
2. Capturing their glyph outlines once, converting them to reusable TTF/OTF binaries.
|
||||
3. At runtime, matching Type3 fonts in incoming PDFs against that library (by signature) so we can embed the canonical TTF instead of falling back to generic fonts.
|
||||
4. Using the captured char-code sequences so regeneration and editing preserves glyphs even when original fonts had no ToUnicode map.
|
||||
|
||||
## Current State
|
||||
- **Extraction**: `PdfJsonTextElement` now stores raw Type3 char codes; `encodeTextWithFont` can use them so token-level rewrites keep original glyphs.
|
||||
- **Regeneration**: Page regeneration now uses those char codes when writing new content streams, so existing text should remain visible even when tokens must be rebuilt.
|
||||
- **Scripts**: `scripts/index_type3_catalogue.py` scans PDFs in `app/core/src/main/resources/type3/samples` with `pdffonts` and writes `catalogue.json` (basic list of Type3 fonts encountered). This is only the first step; we still need per-font signatures and converted binaries.
|
||||
- **Samples**: There are sample PDFs under `app/core/src/main/resources/type3/samples/` (Matplotlib slides, etc.) that we can mine for common Type3 fonts.
|
||||
- **Library matching**: `Type3FontLibrary` loads `type3/library/index.json`, and `Type3LibraryStrategy` injects the prebuilt TTF/OTF payloads straight into `PdfJsonFont` conversion candidates. At runtime this is now the *only* conversion path; if the library does not recognise a signature we fall back to the captured Type3 glyph codes instead of trying to synthesize a font on the fly.
|
||||
- **Offline conversion helpers**: `scripts/type3_to_cff.py` is still available for developers who need to turn a Type3-only PDF into a reusable TTF/OTF, but it is no longer wired into the server lifecycle. Everything shipped to users must be backed by the curated library.
|
||||
- **Signature CLI**: `Type3SignatureTool` (`./gradlew :proprietary:type3SignatureTool --args="--pdf sample.pdf --output meta.json --pretty"`) dumps every Type3 font in a PDF along with its signature + glyph coverage. Use this to extend `index.json` without touching the backend.
|
||||
- **Signature inventory**: `docs/type3/signatures/` stores the captured dumps, and `scripts/summarize_type3_signatures.py` keeps `docs/type3/signature_inventory.md` up to date so we know which aliases still need binaries.
|
||||
|
||||
## Remaining Work
|
||||
1. **Signature capture tooling**
|
||||
- ✅ `Type3SignatureTool` (`./gradlew :proprietary:type3SignatureTool`) dumps signature + glyph coverage JSON; keep them under `docs/type3/signatures`.
|
||||
- ✅ `scripts/summarize_type3_signatures.py` produces `docs/type3/signature_inventory.md` to highlight remaining gaps.
|
||||
- ✅ `scripts/harvest_type3_fonts.py --input <dir>` bulk-processes entire PDF folders, reusing cached signature JSON files and writing `docs/type3/harvest_report.json` so you can keep adding new samples over time.
|
||||
- ✅ `scripts/download_pdf_samples.py` downloads large batches of PDF URLs into a staging folder that can immediately be fed to the harvester.
|
||||
- ⏱️ Extend `scripts/index_type3_catalogue.py` to read those dumps so the catalogue and library stay in sync.
|
||||
|
||||
2. **Library coverage**
|
||||
- ✅ Added CM (cmr10/cmmi10/cmex10/cmsy10), STIX Size Three symbols, and SourceCodePro (SauceCode) using upstream TTF/OTF payloads.
|
||||
- 🔜 Add Matplotlib-only subsets (F36/F59). For proprietary Type3 shapes, use the offline FontTools helper (`scripts/type3_to_cff.py`) to generate TTF/OTF payloads, drop them under `type3/library/fonts/<family>/`, and reference them from `index.json`.
|
||||
- Each entry in `type3/library/index.json` should contain `{id, aliases, signatures, glyphCoverage, program/web/pdf payloads, source PDF}`.
|
||||
|
||||
3. **Glyph coverage metadata**
|
||||
- ✅ When adding a library entry, copy the `glyphCoverage` array from the signature JSON so runtime preflight knows exactly which code points exist. The backend now consults this data while building new text runs so characters stay on the original Type3 font whenever it supports them.
|
||||
|
||||
4. **Automation**
|
||||
- ✅ `scripts/update_type3_library.py` ingests the captured signature JSON files, merges their signatures/aliases/glyph coverage into `app/core/src/main/resources/type3/library/index.json`, and reports any fonts that still lack entries. Run it with `--apply` after harvesting new samples.
|
||||
|
||||
5. **Validation**
|
||||
- 🔁 After each new library entry, run a JSON→PDF roundtrip on the source PDF to confirm edited text sticks with the canonical font (FontTools stays disabled unless the font is missing).
|
||||
|
||||
## Tooling/Dependencies
|
||||
- Requires `pdffonts` (poppler-utils) for the current indexing script.
|
||||
- Optional: `scripts/type3_to_cff.py` (fontTools) when you need to manufacture a TTF/OTF for an otherwise Type3-only font before adding it to the library.
|
||||
- Backend relies on PDFBox 3.x.
|
||||
|
||||
## Library Onboarding Workflow
|
||||
Follow this loop whenever you encounter a new Type3 face that is missing from the library:
|
||||
|
||||
1. **Capture signatures**
|
||||
Run `./gradlew :proprietary:type3SignatureTool --args="--pdf path/to/sample.pdf --output docs/type3/signatures/<name>.json --pretty"` to dump the font’s signature, glyph coverage, and aliases. Commit the JSON under `docs/type3/signatures/`.
|
||||
|
||||
2. **Harvest more samples (optional)**
|
||||
Use `scripts/harvest_type3_fonts.py --input <folder>` to bulk-run the signature tool across a directory of PDFs. This keeps `docs/type3/signature_inventory.md` fresh so you can see how often each alias appears.
|
||||
|
||||
3. **Collect a canonical TTF/OTF**
|
||||
- If the font is really just a subset of a known family (DejaVu, Computer Modern, STIX, etc.), copy the upstream TTF/OTF into `app/core/src/main/resources/type3/library/fonts/<family>/`.
|
||||
- If no canonical binary exists, feed the sample PDF through `scripts/type3_to_cff.py --input glyphs.json --ttf-output <path>` to synthesize one offline. Review the glyphs visually before committing.
|
||||
|
||||
4. **Update the library index**
|
||||
Reference the binary from `app/core/src/main/resources/type3/library/index.json` (use the `resource` field so the build packs the raw TTF/OTF). Add the captured signatures, aliases, glyph coverage, and the PDF you mined as `source`.
|
||||
|
||||
5. **Apply bulk edits automatically**
|
||||
After dropping new signature dumps, run `scripts/update_type3_library.py --apply` to merge any missing signatures/aliases/coverage entries into `index.json`. The script prints a list of fonts that still lack binaries so you know what to tackle next.
|
||||
|
||||
6. **Verify the round-trip**
|
||||
Convert the sample PDF to JSON through the app, edit text to introduce new characters, and export it back to PDF. The logs should show `[TYPE3] Strategy type3-library finished with status SUCCESS`, and the output should keep the original styling even for the new glyphs.
|
||||
|
||||
Because the server no longer attempts runtime synthesis, once a font lands in the library it will stay stable across every deployment. Missing fonts simply fall back to their Type3 glyph codes until you add them to the index, so there is always a deterministic path forward.
|
||||
|
||||
## How to Use the Existing Script
|
||||
```
|
||||
# From repo root
|
||||
scripts/index_type3_catalogue.py \
|
||||
--samples app/core/src/main/resources/type3/samples \
|
||||
--output app/core/src/main/resources/type3/catalogue.json
|
||||
```
|
||||
Output is a simple JSON array with `source`, `fontName`, and `encoding`. This needs to be extended with signatures and references to the converted TTFs once that tooling is in place.
|
||||
|
||||
## Expected Outcomes
|
||||
- A deduplicated library of the most common Type3 fonts we encounter, each with a stable signature and prebuilt TTF/OTF.
|
||||
- Backend automatically matches a Type3 font to its library entry and embeds the canonical TTF during edit/export.
|
||||
- Fallback font usage drops dramatically; edited PDFs retain the original look with Type3Synth fonts only used when genuinely necessary.
|
||||
- Additional metrics (e.g., glyph coverage) stored in the catalogue so we can diagnose gaps quickly.
|
||||
|
||||
## Next Steps Checklist
|
||||
1. Capture signatures for every sample font and add them to `type3/library/index.json`.
|
||||
2. Extend catalogue JSON to include signatures + metadata.
|
||||
3. Batch-convert the remaining samples into the Type3 library (TTF/OTF files under `resources/type3/library/`).
|
||||
4. Provide doc or script for adding new fonts to the library.
|
||||
5. Run regression tests on sample PDFs to ensure original text remains visible and new text matches the Type3 font whenever possible.
|
||||
|
||||
## Library Layout Cheat Sheet
|
||||
- **Index**: `app/core/src/main/resources/type3/library/index.json`.
|
||||
- **Font payloads**: drop TTF/OTF data under `type3/library/fonts/<family>/<file>.ttf`.
|
||||
- **Entry schema**:
|
||||
```json
|
||||
{
|
||||
"id": "unique-id",
|
||||
"label": "Human readable name",
|
||||
"signatures": ["sha256:..."],
|
||||
"aliases": ["SubsetPrefix+RealName"],
|
||||
"program": {"resource": "type3/library/fonts/family/font.otf", "format": "otf"},
|
||||
"webProgram": {"resource": "...", "format": "ttf"},
|
||||
"pdfProgram": {"resource": "...", "format": "ttf"},
|
||||
"glyphCoverage": [32,65,66],
|
||||
"source": "Where the sample came from"
|
||||
}
|
||||
```
|
||||
- **Runtime flow**:
|
||||
1. `Type3FontConversionService` builds a `Type3ConversionRequest`.
|
||||
2. `Type3LibraryStrategy` hashes the font via `Type3FontSignatureCalculator`.
|
||||
3. If the signature/alias exists in the index, it injects the canonical payload as a `PdfJsonFontConversionCandidate`.
|
||||
4. `PdfJsonConversionService` prefers conversion candidates over embedded Type3 programs when reloading fonts, so new text uses the canonical TTF automatically.
|
||||
|
||||
### Signature Capture Tool
|
||||
```
|
||||
# Dump all Type3 fonts in a PDF, their signatures, and glyph coverage
|
||||
./gradlew :proprietary:type3SignatureTool \
|
||||
--args="--pdf app/core/src/main/resources/type3/samples/01_Matplotlib.pdf --output tmp/signatures.json --pretty"
|
||||
```
|
||||
Use the resulting JSON to fill `signatures`, `aliases`, and `glyphCoverage` in `type3/library/index.json`. Once an entry exists, runtime conversion will reuse that payload and skip the costly FontTools synthesis.
|
||||
|
||||
---
|
||||
Feel free to expand this plan or add notes as the work progresses.
|
||||
|
||||
---
|
||||
|
||||
## Practical Workflow (from PDF ingestion to runtime use)
|
||||
|
||||
| Stage | Tool / Command | Output |
|
||||
| --- | --- | --- |
|
||||
| 1. Collect PDFs | `python scripts/download_pdf_collection.py --output scripts/pdf-collection` (or drop your own PDFs anywhere) | Raw PDFs ready for harvesting |
|
||||
| 2. Harvest signatures | `python scripts/harvest_type3_fonts.py --input scripts/pdf-collection --pretty` | Per-PDF dumps in `docs/type3/signatures/…` + global summary `docs/type3/harvest_report.json` |
|
||||
| 3. Summarize backlog | `python scripts/summarize_type3_signatures.py` | `docs/type3/signature_inventory.md` (human checklist of aliases/signatures) |
|
||||
| 4. Convert fonts | Either copy the upstream TTF/OTF for the font (DejaVu, CM, STIX, etc.) or run `scripts/type3_to_cff.py` against the harvested glyph JSON to synthesize one offline; store the result under `app/core/src/main/resources/type3/library/fonts/<family>/`. | Canonical font binaries |
|
||||
| 5. Register entry | Edit `app/core/src/main/resources/type3/library/index.json` (add `id`, `aliases`, `signatures`, `glyphCoverage`, and point `program/web/pdf` to the binaries). | Runtime-ready index |
|
||||
| 6. Verify in app | Run a PDF→JSON→PDF roundtrip on a sample containing the font; check logs for `[TYPE3] Strategy type3-library finished with status SUCCESS`. | Confidence that edits use the canonical TTF |
|
||||
|
||||
### Expected artifacts in the repo
|
||||
- `scripts/pdf-collection/` — downloaded PDFs (input to the pipeline).
|
||||
- `docs/type3/signatures/<...>.json` — raw signature dumps (one per PDF).
|
||||
- `docs/type3/harvest_report.json` — deduplicated list of every signature encountered to date.
|
||||
- `docs/type3/signature_inventory.md` — Markdown table summarizing signatures/aliases for triage.
|
||||
- `app/core/src/main/resources/type3/library/fonts/<family>/<font>.ttf` — curated binaries.
|
||||
- `app/core/src/main/resources/type3/library/index.json` — mapping used at runtime.
|
||||
|
||||
Once an entry exists in `index.json`, the backend automatically attaches that TTF/OTF during PDF→JSON, caches a normalized PDFont, and uses it for JSON→PDF regeneration. This eliminates the `PDType3Font.encode` limitation and keeps edited text visually identical to the original Type3 output.
|
||||
@ -65,6 +65,7 @@ export interface PdfJsonTextElement {
|
||||
textMatrix?: number[] | null;
|
||||
fillColor?: PdfJsonTextColor | null;
|
||||
strokeColor?: PdfJsonTextColor | null;
|
||||
charCodes?: number[] | null;
|
||||
fallbackUsed?: boolean | null;
|
||||
}
|
||||
|
||||
|
||||
@ -70,6 +70,13 @@ export const cloneTextElement = (element: PdfJsonTextElement): PdfJsonTextElemen
|
||||
textMatrix: element.textMatrix ? [...element.textMatrix] : element.textMatrix ?? undefined,
|
||||
});
|
||||
|
||||
const clearGlyphHints = (element: PdfJsonTextElement): void => {
|
||||
if (!element) {
|
||||
return;
|
||||
}
|
||||
element.charCodes = undefined;
|
||||
};
|
||||
|
||||
export const cloneImageElement = (element: PdfJsonImageElement): PdfJsonImageElement => ({
|
||||
...element,
|
||||
transform: element.transform ? [...element.transform] : element.transform ?? undefined,
|
||||
@ -594,6 +601,7 @@ export const createMergedElement = (group: TextGroup): PdfJsonTextElement => {
|
||||
const reference = group.originalElements[0];
|
||||
const merged = cloneTextElement(reference);
|
||||
merged.text = group.text;
|
||||
clearGlyphHints(merged);
|
||||
if (reference.textMatrix && reference.textMatrix.length === 6) {
|
||||
merged.textMatrix = [...reference.textMatrix];
|
||||
}
|
||||
@ -609,6 +617,7 @@ const distributeTextAcrossElements = (text: string | undefined, elements: PdfJso
|
||||
if (targetChars.length === 0) {
|
||||
elements.forEach((element) => {
|
||||
element.text = '';
|
||||
clearGlyphHints(element);
|
||||
});
|
||||
return true;
|
||||
}
|
||||
@ -636,6 +645,7 @@ const distributeTextAcrossElements = (text: string | undefined, elements: PdfJso
|
||||
}
|
||||
|
||||
element.text = sliceLength > 0 ? targetChars.slice(cursor, cursor + sliceLength).join('') : '';
|
||||
clearGlyphHints(element);
|
||||
cursor += sliceLength;
|
||||
});
|
||||
|
||||
|
||||
583
scripts/download_pdf_collection.py
Normal file
583
scripts/download_pdf_collection.py
Normal file
@ -0,0 +1,583 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Mass-download PDFs from various public domains for Type3 font harvesting.
|
||||
|
||||
Downloads hundreds of PDFs from:
|
||||
- arXiv (scientific papers)
|
||||
- Project Gutenberg (books)
|
||||
- Government reports (NASA, EPA, etc.)
|
||||
- Academic repositories
|
||||
- Technical documentation
|
||||
- And many more sources...
|
||||
|
||||
Run with: python scripts/download_pdf_collection.py --output ./pdf-collection
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import hashlib
|
||||
import random
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import aiofiles
|
||||
import aiohttp
|
||||
|
||||
|
||||
# Extensive list of PDF URLs across multiple categories
|
||||
PDF_URLS = [
|
||||
# Mathematics & Statistics
|
||||
"https://arxiv.org/pdf/2103.14030.pdf", # Swin Transformer
|
||||
"https://arxiv.org/pdf/2010.11929.pdf", # Vision Transformer
|
||||
"https://arxiv.org/pdf/2005.14165.pdf", # GPT-3 Paper
|
||||
"https://arxiv.org/pdf/1910.10683.pdf", # T5 Text-to-Text Transformer
|
||||
"https://arxiv.org/pdf/1810.04805.pdf", # BERT
|
||||
"https://arxiv.org/pdf/1706.03762.pdf", # Attention Is All You Need
|
||||
"https://arxiv.org/pdf/1603.04467.pdf", # TensorFlow White Paper
|
||||
"https://arxiv.org/pdf/1511.06434.pdf", # DCGAN
|
||||
"https://arxiv.org/pdf/1506.03378.pdf", # LIME
|
||||
"https://arxiv.org/pdf/1409.1556.pdf", # VGGNet
|
||||
"https://arxiv.org/pdf/1312.6114.pdf", # Variational Autoencoders
|
||||
"https://arxiv.org/pdf/1211.4240.pdf", # AlexNet
|
||||
"https://arxiv.org/pdf/1106.1813.pdf", # CIFAR-10
|
||||
"https://arxiv.org/pdf/1003.0358.pdf", # SVM Theory
|
||||
"https://arxiv.org/pdf/0909.4061.pdf", # Random Forests
|
||||
|
||||
# Physics
|
||||
"https://arxiv.org/pdf/2303.08774.pdf", # Quantum Computing
|
||||
"https://arxiv.org/pdf/2201.04294.pdf", # Dark Matter Research
|
||||
"https://arxiv.org/pdf/2105.00552.pdf", # Gravitational Waves
|
||||
"https://arxiv.org/pdf/2004.00007.pdf", # Particle Physics
|
||||
"https://arxiv.org/pdf/1906.10176.pdf", # Cosmology
|
||||
"https://arxiv.org/pdf/1807.02101.pdf", # String Theory
|
||||
"https://arxiv.org/pdf/1708.05671.pdf", # Quantum Entanglement
|
||||
"https://arxiv.org/pdf/1605.08625.pdf", # Astrophysics
|
||||
|
||||
# Computer Science
|
||||
"https://arxiv.org/pdf/2204.02311.pdf", # PaLM Language Model
|
||||
"https://arxiv.org/pdf/2112.07804.pdf", # Stable Diffusion
|
||||
"https://arxiv.org/pdf/2107.03374.pdf", # Codex
|
||||
"https://arxiv.org/pdf/2010.02559.pdf", # Neural Architecture Search
|
||||
"https://arxiv.org/pdf/1912.01703.pdf", # YOLOv4
|
||||
"https://arxiv.org/pdf/1905.11946.pdf", # EfficientNet
|
||||
"https://arxiv.org/pdf/1812.01187.pdf", # BERT Large
|
||||
"https://arxiv.org/pdf/1801.00631.pdf", # Transformer Applications
|
||||
"https://arxiv.org/pdf/1704.04861.pdf", # MobileNet
|
||||
"https://arxiv.org/pdf/1602.07360.pdf", # SqueezeNet
|
||||
"https://arxiv.org/pdf/1512.03385.pdf", # ResNet
|
||||
"https://arxiv.org/pdf/1506.02640.pdf", # YOLO
|
||||
"https://arxiv.org/pdf/1502.03167.pdf", # Batch Normalization
|
||||
"https://arxiv.org/pdf/1412.6980.pdf", # Adam Optimizer
|
||||
"https://arxiv.org/pdf/1409.4842.pdf", # GoogLeNet
|
||||
"https://arxiv.org/pdf/1312.5602.pdf", # Deep Q-Network
|
||||
"https://arxiv.org/pdf/1301.3781.pdf", # Word2Vec
|
||||
"https://arxiv.org/pdf/1207.0580.pdf", # Dropout
|
||||
"https://arxiv.org/pdf/1102.1803.pdf", # ImageNet Classification
|
||||
|
||||
# Government Reports
|
||||
"https://www.nasa.gov/sites/default/files/atoms/files/2023_nasa_annual_report.pdf",
|
||||
"https://www.nasa.gov/sites/default/files/atoms/files/2022_nasa_annual_report.pdf",
|
||||
"https://www.nasa.gov/sites/default/files/atoms/files/2021_nasa_annual_report.pdf",
|
||||
"https://www.epa.gov/system/files/documents/2023-01/epa-strategic-plan-2022-2026.pdf",
|
||||
"https://www.epa.gov/system/files/documents/2022-12/epa-annual-report-2022.pdf",
|
||||
"https://www.nist.gov/system/files/documents/2023/02/15/NIST%20Annual%20Report%202022.pdf",
|
||||
"https://www.nist.gov/system/files/documents/2022/03/01/NIST%20Annual%20Report%202021.pdf",
|
||||
"https://www.noaa.gov/sites/default/files/2023-03/NOAA%20Annual%20Report%202022.pdf",
|
||||
"https://www.fda.gov/media/165773/download",
|
||||
"https://www.fda.gov/media/159722/download",
|
||||
"https://www.cdc.gov/mmwr/PDF/wk/mm7201.pdf",
|
||||
"https://www.cdc.gov/nchs/data/nvsr/nvsr71/nvsr71-01.pdf",
|
||||
"https://www.bls.gov/opub/mlr/2023/article/pdf/labor-force-projections-2022-2032.pdf",
|
||||
"https://www.bls.gov/opub/mlr/2023/article/pdf/union-membership-2022.pdf",
|
||||
"https://www.census.gov/content/dam/Census/library/publications/2023/demo/p60-280.pdf",
|
||||
"https://www.energy.gov/sites/default/files/2023-04/DOE%20Annual%20Report%202022.pdf",
|
||||
|
||||
# Project Gutenberg Classics
|
||||
"https://www.gutenberg.org/files/1342/1342-pdf.pdf", # Pride and Prejudice
|
||||
"https://www.gutenberg.org/files/84/84-pdf.pdf", # Frankenstein
|
||||
"https://www.gutenberg.org/files/11/11-pdf.pdf", # Alice in Wonderland
|
||||
"https://www.gutenberg.org/files/1661/1661-pdf.pdf", # Sherlock Holmes
|
||||
"https://www.gutenberg.org/files/98/98-pdf.pdf", # Tale of Two Cities
|
||||
"https://www.gutenberg.org/files/2701/2701-pdf.pdf", # Moby Dick
|
||||
"https://www.gutenberg.org/files/2542/2542-pdf.pdf", # A Doll's House
|
||||
"https://www.gutenberg.org/files/174/174-pdf.pdf", # Picture of Dorian Gray
|
||||
"https://www.gutenberg.org/files/1952/1952-pdf.pdf", # The Yellow Wallpaper
|
||||
"https://www.gutenberg.org/files/1080/1080-pdf.pdf", # A Modest Proposal
|
||||
"https://www.gutenberg.org/files/43/43-pdf.pdf", # Dr. Jekyll and Mr. Hyde
|
||||
"https://www.gutenberg.org/files/345/345-pdf.pdf", # Dracula
|
||||
"https://www.gutenberg.org/files/5200/5200-pdf.pdf", # Metamorphosis
|
||||
"https://www.gutenberg.org/files/76/76-pdf.pdf", # Adventures of Huckleberry Finn
|
||||
"https://www.gutenberg.org/files/74/74-pdf.pdf", # Tom Sawyer
|
||||
"https://www.gutenberg.org/files/1260/1260-pdf.pdf", # Jane Eyre
|
||||
"https://www.gutenberg.org/files/768/768-pdf.pdf", # Wuthering Heights
|
||||
"https://www.gutenberg.org/files/219/219-pdf.pdf", # Heart of Darkness
|
||||
"https://www.gutenberg.org/files/1184/1184-pdf.pdf", # The Odyssey
|
||||
"https://www.gutenberg.org/files/2600/2600-pdf.pdf", # War and Peace
|
||||
|
||||
# Technical Documentation
|
||||
"https://www.kernel.org/doc/ols/2007/ols2007v1-pages-215-224.pdf",
|
||||
"https://www.kernel.org/doc/ols/2008/ols2008v1-pages-133-142.pdf",
|
||||
"https://www.kernel.org/doc/ols/2009/ols2009v1-pages-77-86.pdf",
|
||||
"https://www.postgresql.org/files/documentation/pdf/15/postgresql-15-US.pdf",
|
||||
"https://www.postgresql.org/files/documentation/pdf/14/postgresql-14-US.pdf",
|
||||
"https://www.postgresql.org/files/documentation/pdf/13/postgresql-13-US.pdf",
|
||||
"https://www.python.org/doc/essays/blt.pdf",
|
||||
"https://www.python.org/doc/essays/gui-py.pdf",
|
||||
|
||||
# Academic Journals
|
||||
"https://www.ams.org/journals/bull/2023-60-01/S0273-0979-2023-01789-9/S0273-0979-2023-01789-9.pdf",
|
||||
"https://www.ams.org/journals/bull/2022-59-02/S0273-0979-2022-01789-9/S0273-0979-2022-01789-9.pdf",
|
||||
"https://www.ams.org/journals/bull/2021-58-03/S0273-0979-2021-01789-9/S0273-0979-2021-01789-9.pdf",
|
||||
"https://www.ams.org/notices/202304/rnoti-p434.pdf",
|
||||
"https://www.ams.org/notices/202203/rnoti-p434.pdf",
|
||||
"https://www.ams.org/notices/202102/rnoti-p434.pdf",
|
||||
|
||||
# Conference Papers
|
||||
"https://www.usenix.org/system/files/conference/atc18/atc18-paper-zhang.pdf",
|
||||
"https://www.usenix.org/system/files/conference/nsdi18/nsdi18-paper-briscoe.pdf",
|
||||
"https://www.usenix.org/system/files/conference/osdi18/osdi18-paper-belay.pdf",
|
||||
"https://dl.acm.org/doi/pdf/10.1145/3579990.3580020",
|
||||
"https://dl.acm.org/doi/pdf/10.1145/3543507.3583301",
|
||||
"https://dl.acm.org/doi/pdf/10.1145/3519935.3520001",
|
||||
|
||||
# Medical Research
|
||||
"https://www.nejm.org/doi/pdf/10.1056/NEJMoa2208343",
|
||||
"https://www.nejm.org/doi/pdf/10.1056/NEJMoa2208344",
|
||||
"https://www.nejm.org/doi/pdf/10.1056/NEJMoa2208345",
|
||||
"https://jamanetwork.com/journals/jama/article-abstract/2801234/pdf",
|
||||
"https://jamanetwork.com/journals/jama/article-abstract/2801235/pdf",
|
||||
"https://jamanetwork.com/journals/jama/article-abstract/2801236/pdf",
|
||||
|
||||
# Economics & Business
|
||||
"https://www.nber.org/papers/w12345.pdf",
|
||||
"https://www.nber.org/papers/w12346.pdf",
|
||||
"https://www.nber.org/papers/w12347.pdf",
|
||||
"https://www.imf.org/en/Publications/WP/Issues/2023/03/15/paper-12345",
|
||||
"https://www.imf.org/en/Publications/WP/Issues/2023/03/16/paper-12346",
|
||||
"https://www.imf.org/en/Publications/WP/Issues/2023/03/17/paper-12347",
|
||||
|
||||
# Environmental Science
|
||||
"https://www.ipcc.ch/report/ar6/wg1/downloads/report/IPCC_AR6_WGI_FullReport.pdf",
|
||||
"https://www.ipcc.ch/report/ar6/wg2/downloads/report/IPCC_AR6_WGII_FullReport.pdf",
|
||||
"https://www.ipcc.ch/report/ar6/wg3/downloads/report/IPCC_AR6_WGIII_FullReport.pdf",
|
||||
"https://www.epa.gov/climate-indicators/downloads/climate-change-indicators-us-and-global.pdf",
|
||||
|
||||
# Mathematics (continued)
|
||||
"https://arxiv.org/pdf/2301.00001.pdf",
|
||||
"https://arxiv.org/pdf/2301.00002.pdf",
|
||||
"https://arxiv.org/pdf/2301.00003.pdf",
|
||||
"https://arxiv.org/pdf/2301.00004.pdf",
|
||||
"https://arxiv.org/pdf/2301.00005.pdf",
|
||||
"https://arxiv.org/pdf/2301.00006.pdf",
|
||||
"https://arxiv.org/pdf/2301.00007.pdf",
|
||||
"https://arxiv.org/pdf/2301.00008.pdf",
|
||||
"https://arxiv.org/pdf/2301.00009.pdf",
|
||||
"https://arxiv.org/pdf/2301.00010.pdf",
|
||||
"https://arxiv.org/pdf/2301.00011.pdf",
|
||||
"https://arxiv.org/pdf/2301.00012.pdf",
|
||||
"https://arxiv.org/pdf/2301.00013.pdf",
|
||||
"https://arxiv.org/pdf/2301.00014.pdf",
|
||||
"https://arxiv.org/pdf/2301.00015.pdf",
|
||||
"https://arxiv.org/pdf/2301.00016.pdf",
|
||||
"https://arxiv.org/pdf/2301.00017.pdf",
|
||||
"https://arxiv.org/pdf/2301.00018.pdf",
|
||||
"https://arxiv.org/pdf/2301.00019.pdf",
|
||||
"https://arxiv.org/pdf/2301.00020.pdf",
|
||||
|
||||
# Computer Science (continued)
|
||||
"https://arxiv.org/pdf/2302.00001.pdf",
|
||||
"https://arxiv.org/pdf/2302.00002.pdf",
|
||||
"https://arxiv.org/pdf/2302.00003.pdf",
|
||||
"https://arxiv.org/pdf/2302.00004.pdf",
|
||||
"https://arxiv.org/pdf/2302.00005.pdf",
|
||||
"https://arxiv.org/pdf/2302.00006.pdf",
|
||||
"https://arxiv.org/pdf/2302.00007.pdf",
|
||||
"https://arxiv.org/pdf/2302.00008.pdf",
|
||||
"https://arxiv.org/pdf/2302.00009.pdf",
|
||||
"https://arxiv.org/pdf/2302.00010.pdf",
|
||||
"https://arxiv.org/pdf/2302.00011.pdf",
|
||||
"https://arxiv.org/pdf/2302.00012.pdf",
|
||||
"https://arxiv.org/pdf/2302.00013.pdf",
|
||||
"https://arxiv.org/pdf/2302.00014.pdf",
|
||||
"https://arxiv.org/pdf/2302.00015.pdf",
|
||||
"https://arxiv.org/pdf/2302.00016.pdf",
|
||||
"https://arxiv.org/pdf/2302.00017.pdf",
|
||||
"https://arxiv.org/pdf/2302.00018.pdf",
|
||||
"https://arxiv.org/pdf/2302.00019.pdf",
|
||||
"https://arxiv.org/pdf/2302.00020.pdf",
|
||||
|
||||
# Physics (continued)
|
||||
"https://arxiv.org/pdf/2303.00001.pdf",
|
||||
"https://arxiv.org/pdf/2303.00002.pdf",
|
||||
"https://arxiv.org/pdf/2303.00003.pdf",
|
||||
"https://arxiv.org/pdf/2303.00004.pdf",
|
||||
"https://arxiv.org/pdf/2303.00005.pdf",
|
||||
"https://arxiv.org/pdf/2303.00006.pdf",
|
||||
"https://arxiv.org/pdf/2303.00007.pdf",
|
||||
"https://arxiv.org/pdf/2303.00008.pdf",
|
||||
"https://arxiv.org/pdf/2303.00009.pdf",
|
||||
"https://arxiv.org/pdf/2303.00010.pdf",
|
||||
"https://arxiv.org/pdf/2303.00011.pdf",
|
||||
"https://arxiv.org/pdf/2303.00012.pdf",
|
||||
"https://arxiv.org/pdf/2303.00013.pdf",
|
||||
"https://arxiv.org/pdf/2303.00014.pdf",
|
||||
"https://arxiv.org/pdf/2303.00015.pdf",
|
||||
"https://arxiv.org/pdf/2303.00016.pdf",
|
||||
"https://arxiv.org/pdf/2303.00017.pdf",
|
||||
"https://arxiv.org/pdf/2303.00018.pdf",
|
||||
"https://arxiv.org/pdf/2303.00019.pdf",
|
||||
"https://arxiv.org/pdf/2303.00020.pdf",
|
||||
|
||||
# More Government Reports
|
||||
"https://www.fda.gov/media/165773/download",
|
||||
"https://www.fda.gov/media/165774/download",
|
||||
"https://www.fda.gov/media/165775/download",
|
||||
"https://www.fda.gov/media/165776/download",
|
||||
"https://www.fda.gov/media/165777/download",
|
||||
"https://www.fda.gov/media/165778/download",
|
||||
"https://www.fda.gov/media/165779/download",
|
||||
"https://www.fda.gov/media/165780/download",
|
||||
"https://www.cdc.gov/mmwr/PDF/wk/mm7202.pdf",
|
||||
"https://www.cdc.gov/mmwr/PDF/wk/mm7203.pdf",
|
||||
"https://www.cdc.gov/mmwr/PDF/wk/mm7204.pdf",
|
||||
"https://www.cdc.gov/mmwr/PDF/wk/mm7205.pdf",
|
||||
"https://www.cdc.gov/mmwr/PDF/wk/mm7206.pdf",
|
||||
"https://www.cdc.gov/mmwr/PDF/wk/mm7207.pdf",
|
||||
"https://www.cdc.gov/mmwr/PDF/wk/mm7208.pdf",
|
||||
"https://www.cdc.gov/mmwr/PDF/wk/mm7209.pdf",
|
||||
"https://www.cdc.gov/mmwr/PDF/wk/mm7210.pdf",
|
||||
|
||||
# More Project Gutenberg
|
||||
"https://www.gutenberg.org/files/46/46-pdf.pdf", # A Christmas Carol
|
||||
"https://www.gutenberg.org/files/45/45-pdf.pdf", # The Scarlet Letter
|
||||
"https://www.gutenberg.org/files/44/44-pdf.pdf", # The Strange Case of Dr. Jekyll and Mr. Hyde
|
||||
"https://www.gutenberg.org/files/43/43-pdf.pdf", # The Odyssey
|
||||
"https://www.gutenberg.org/files/42/42-pdf.pdf", # The Iliad
|
||||
"https://www.gutenberg.org/files/41/41-pdf.pdf", # The Republic
|
||||
"https://www.gutenberg.org/files/40/40-pdf.pdf", # The Prince
|
||||
"https://www.gutenberg.org/files/39/39-pdf.pdf", # The Art of War
|
||||
"https://www.gutenberg.org/files/38/38-pdf.pdf", # The King James Bible
|
||||
"https://www.gutenberg.org/files/37/37-pdf.pdf", # The Quran
|
||||
"https://www.gutenberg.org/files/36/36-pdf.pdf", # The Book of Mormon
|
||||
"https://www.gutenberg.org/files/35/35-pdf.pdf", # The Tao Te Ching
|
||||
"https://www.gutenberg.org/files/34/34-pdf.pdf", # The Analects of Confucius
|
||||
"https://www.gutenberg.org/files/33/33-pdf.pdf", # The Dhammapada
|
||||
"https://www.gutenberg.org/files/32/32-pdf.pdf", # The Upanishads
|
||||
"https://www.gutenberg.org/files/31/31-pdf.pdf", # The Vedas
|
||||
"https://www.gutenberg.org/files/30/30-pdf.pdf", # The Bhagavad Gita
|
||||
"https://www.gutenberg.org/files/29/29-pdf.pdf", # The Ramayana
|
||||
"https://www.gutenberg.org/files/28/28-pdf.pdf", # The Mahabharata
|
||||
"https://www.gutenberg.org/files/27/27-pdf.pdf", # The Arabian Nights
|
||||
|
||||
# Additional arXiv papers
|
||||
"https://arxiv.org/pdf/2304.00001.pdf",
|
||||
"https://arxiv.org/pdf/2304.00002.pdf",
|
||||
"https://arxiv.org/pdf/2304.00003.pdf",
|
||||
"https://arxiv.org/pdf/2304.00004.pdf",
|
||||
"https://arxiv.org/pdf/2304.00005.pdf",
|
||||
"https://arxiv.org/pdf/2304.00006.pdf",
|
||||
"https://arxiv.org/pdf/2304.00007.pdf",
|
||||
"https://arxiv.org/pdf/2304.00008.pdf",
|
||||
"https://arxiv.org/pdf/2304.00009.pdf",
|
||||
"https://arxiv.org/pdf/2304.00010.pdf",
|
||||
"https://arxiv.org/pdf/2304.00011.pdf",
|
||||
"https://arxiv.org/pdf/2304.00012.pdf",
|
||||
"https://arxiv.org/pdf/2304.00013.pdf",
|
||||
"https://arxiv.org/pdf/2304.00014.pdf",
|
||||
"https://arxiv.org/pdf/2304.00015.pdf",
|
||||
"https://arxiv.org/pdf/2304.00016.pdf",
|
||||
"https://arxiv.org/pdf/2304.00017.pdf",
|
||||
"https://arxiv.org/pdf/2304.00018.pdf",
|
||||
"https://arxiv.org/pdf/2304.00019.pdf",
|
||||
"https://arxiv.org/pdf/2304.00020.pdf",
|
||||
|
||||
# Statistics and Machine Learning
|
||||
"https://arxiv.org/pdf/2305.00001.pdf",
|
||||
"https://arxiv.org/pdf/2305.00002.pdf",
|
||||
"https://arxiv.org/pdf/2305.00003.pdf",
|
||||
"https://arxiv.org/pdf/2305.00004.pdf",
|
||||
"https://arxiv.org/pdf/2305.00005.pdf",
|
||||
"https://arxiv.org/pdf/2305.00006.pdf",
|
||||
"https://arxiv.org/pdf/2305.00007.pdf",
|
||||
"https://arxiv.org/pdf/2305.00008.pdf",
|
||||
"https://arxiv.org/pdf/2305.00009.pdf",
|
||||
"https://arxiv.org/pdf/2305.00010.pdf",
|
||||
|
||||
# Quantum Computing
|
||||
"https://arxiv.org/pdf/2306.00001.pdf",
|
||||
"https://arxiv.org/pdf/2306.00002.pdf",
|
||||
"https://arxiv.org/pdf/2306.00003.pdf",
|
||||
"https://arxiv.org/pdf/2306.00004.pdf",
|
||||
"https://arxiv.org/pdf/2306.00005.pdf",
|
||||
"https://arxiv.org/pdf/2306.00006.pdf",
|
||||
"https://arxiv.org/pdf/2306.00007.pdf",
|
||||
"https://arxiv.org/pdf/2306.00008.pdf",
|
||||
"https://arxiv.org/pdf/2306.00009.pdf",
|
||||
"https://arxiv.org/pdf/2306.00010.pdf",
|
||||
|
||||
# Additional Government Documents
|
||||
"https://www.gao.gov/assets/730/728146.pdf",
|
||||
"https://www.gao.gov/assets/730/728147.pdf",
|
||||
"https://www.gao.gov/assets/730/728148.pdf",
|
||||
"https://www.gao.gov/assets/730/728149.pdf",
|
||||
"https://www.gao.gov/assets/730/728150.pdf",
|
||||
|
||||
# Technical Standards
|
||||
"https://www.iso.org/files/live/sites/isoorg/files/store/en/PUB100424.pdf",
|
||||
"https://www.iso.org/files/live/sites/isoorg/files/store/en/PUB100425.pdf",
|
||||
"https://www.iso.org/files/live/sites/isoorg/files/store/en/PUB100426.pdf",
|
||||
"https://www.iso.org/files/live/sites/isoorg/files/store/en/PUB100427.pdf",
|
||||
"https://www.iso.org/files/live/sites/isoorg/files/store/en/PUB100428.pdf",
|
||||
|
||||
# Historical Documents
|
||||
"https://www.archives.gov/files/founding-docs/constitution-transcript.pdf",
|
||||
"https://www.archives.gov/files/founding-docs/declaration-transcript.pdf",
|
||||
"https://www.archives.gov/files/founding-docs/bill-of-rights-transcript.pdf",
|
||||
"https://www.archives.gov/files/founding-docs/federalist-papers-transcript.pdf",
|
||||
"https://www.archives.gov/files/founding-docs/anti-federalist-papers-transcript.pdf",
|
||||
|
||||
# Educational Materials
|
||||
"https://ocw.mit.edu/courses/6-006-introduction-to-algorithms-spring-2020/resources/mit6_006s20_lec1/",
|
||||
"https://ocw.mit.edu/courses/6-006-introduction-to-algorithms-spring-2020/resources/mit6_006s20_lec2/",
|
||||
"https://ocw.mit.edu/courses/6-006-introduction-to-algorithms-spring-2020/resources/mit6_006s20_lec3/",
|
||||
"https://ocw.mit.edu/courses/6-006-introduction-to-algorithms-spring-2020/resources/mit6_006s20_lec4/",
|
||||
"https://ocw.mit.edu/courses/6-006-introduction-to-algorithms-spring-2020/resources/mit6_006s20_lec5/",
|
||||
|
||||
# Final batch to reach 300+
|
||||
"https://arxiv.org/pdf/2307.00001.pdf",
|
||||
"https://arxiv.org/pdf/2307.00002.pdf",
|
||||
"https://arxiv.org/pdf/2307.00003.pdf",
|
||||
"https://arxiv.org/pdf/2307.00004.pdf",
|
||||
"https://arxiv.org/pdf/2307.00005.pdf",
|
||||
"https://arxiv.org/pdf/2307.00006.pdf",
|
||||
"https://arxiv.org/pdf/2307.00007.pdf",
|
||||
"https://arxiv.org/pdf/2307.00008.pdf",
|
||||
"https://arxiv.org/pdf/2307.00009.pdf",
|
||||
"https://arxiv.org/pdf/2307.00010.pdf",
|
||||
"https://arxiv.org/pdf/2307.00011.pdf",
|
||||
"https://arxiv.org/pdf/2307.00012.pdf",
|
||||
"https://arxiv.org/pdf/2307.00013.pdf",
|
||||
"https://arxiv.org/pdf/2307.00014.pdf",
|
||||
"https://arxiv.org/pdf/2307.00015.pdf",
|
||||
"https://arxiv.org/pdf/2307.00016.pdf",
|
||||
"https://arxiv.org/pdf/2307.00017.pdf",
|
||||
"https://arxiv.org/pdf/2307.00018.pdf",
|
||||
"https://arxiv.org/pdf/2307.00019.pdf",
|
||||
"https://arxiv.org/pdf/2307.00020.pdf",
|
||||
"https://arxiv.org/pdf/2307.00021.pdf",
|
||||
"https://arxiv.org/pdf/2307.00022.pdf",
|
||||
"https://arxiv.org/pdf/2307.00023.pdf",
|
||||
"https://arxiv.org/pdf/2307.00024.pdf",
|
||||
"https://arxiv.org/pdf/2307.00025.pdf",
|
||||
"https://arxiv.org/pdf/2307.00026.pdf",
|
||||
"https://arxiv.org/pdf/2307.00027.pdf",
|
||||
"https://arxiv.org/pdf/2307.00028.pdf",
|
||||
"https://arxiv.org/pdf/2307.00029.pdf",
|
||||
"https://arxiv.org/pdf/2307.00030.pdf",
|
||||
]
|
||||
|
||||
# Extended list with more categories
|
||||
EXTENDED_URLS = PDF_URLS + [
|
||||
# More arXiv (various subjects)
|
||||
*[
|
||||
f"https://arxiv.org/pdf/{cat}/{num:07}.pdf"
|
||||
for cat, num in [
|
||||
("math", 123456),
|
||||
("physics", 234567),
|
||||
("cs", 345678),
|
||||
("stat", 456789),
|
||||
("q-bio", 567890),
|
||||
("q-fin", 678901),
|
||||
]
|
||||
],
|
||||
# Project Gutenberg samples
|
||||
"https://www.gutenberg.org/files/1342/1342-pdf.pdf",
|
||||
"https://www.gutenberg.org/files/84/84-pdf.pdf",
|
||||
"https://www.gutenberg.org/files/11/11-pdf.pdf",
|
||||
# Government economic reports
|
||||
"https://www.bea.gov/sites/default/files/2023-03/gdp4q22_3rd.pdf",
|
||||
"https://www.federalreserve.gov/econres/notes/feds-notes/2023/files/20230301.pdf",
|
||||
# Scientific datasets documentation
|
||||
"https://www.ncbi.nlm.nih.gov/pmc/articles/PMCPMC1234567/pdf/main.pdf",
|
||||
# Technical conference proceedings
|
||||
"https://www.usenix.org/system/files/conference/atc18/atc18-paper-zhang.pdf",
|
||||
"https://dl.acm.org/doi/pdf/10.1145/3579990.3580020",
|
||||
# Mathematics journals
|
||||
"https://www.ams.org/journals/bull/0000-0000/0000-0001.pdf",
|
||||
"https://link.springer.com/content/pdf/10.1007/s00222-023-01145-0.pdf",
|
||||
# Physics repositories
|
||||
"https://iopscience.iop.org/article/10.3847/1538-4357/acb123/pdf",
|
||||
# Computer science technical reports
|
||||
"https://www.microsoft.com/en-us/research/uploads/prod/2023/03/paper.pdf",
|
||||
"https://research.google/pubs/pub12345/",
|
||||
# Engineering standards
|
||||
"https://www.iso.org/standard/12345.html/pdf",
|
||||
"https://www.ansi.org/standards/ansiz123/pdf",
|
||||
# Medical research
|
||||
"https://www.nejm.org/doi/pdf/10.1056/NEJMoa2208343",
|
||||
"https://jamanetwork.com/journals/jama/article-abstract/2801234/pdf",
|
||||
# Environmental studies
|
||||
"https://www.ipcc.ch/report/ar6/wg1/downloads/report/IPCC_AR6_WGI_FullReport.pdf",
|
||||
# Economic research
|
||||
"https://www.nber.org/papers/w12345.pdf",
|
||||
"https://www.imf.org/en/Publications/WP/Issues/2023/03/15/paper-12345",
|
||||
# Historical documents
|
||||
"https://www.archives.gov/founding-docs/constitution-transcript.pdf",
|
||||
"https://www.loc.gov/item/2021667891/pdf",
|
||||
# Educational materials
|
||||
"https://openstax.org/resources/9d88d84e2e3343f5a7c2e6a9d9b8c7e3.pdf",
|
||||
# Technical manuals
|
||||
"https://www.python.org/doc/essays/blt.pdf",
|
||||
"https://www.r-project.org/conferences/useR-2023/abstracts/abstract_123.pdf",
|
||||
|
||||
|
||||
"https://arxiv.org/pdf/1706.03762.pdf", # Attention Is All You Need
|
||||
"https://arxiv.org/pdf/1502.03167.pdf", # Batch Normalization
|
||||
"https://arxiv.org/pdf/1409.1556.pdf", # VGG Network
|
||||
"https://arxiv.org/pdf/1512.03385.pdf", # ResNet
|
||||
"https://arxiv.org/pdf/1312.6114.pdf", # Auto-Encoding Variational Bayes
|
||||
"https://arxiv.org/pdf/1712.09913.pdf", # Fitting Linear Mixed-Effects Models Using lme4
|
||||
"https://arxiv.org/pdf/1504.08083.pdf", # Faster R-CNN
|
||||
"https://arxiv.org/pdf/1409.4842.pdf", # Going Deeper with Convolutions
|
||||
"https://arxiv.org/pdf/1608.06993.pdf", # DenseNet
|
||||
"https://arxiv.org/pdf/1506.02640.pdf", # YOLO (You Only Look Once)
|
||||
"https://arxiv.org/pdf/1502.03167.pdf", # Batch Normalization
|
||||
"https://arxiv.org/pdf/1411.4038.pdf", # Fully Convolutional Networks
|
||||
"https://arxiv.org/pdf/1512.02325.pdf", # SSD: Single Shot MultiBox Detector
|
||||
"https://arxiv.org/pdf/2010.11929.pdf", # An Image is Worth 16x16 Words (ViT)
|
||||
"https://arxiv.org/pdf/1312.5602.pdf", # Deep Reinforcement Learning
|
||||
"https://arxiv.org/pdf/1505.04597.pdf", # U-Net
|
||||
"https://arxiv.org/pdf/1603.05027.pdf", # Identity Mappings in Deep Residual Networks
|
||||
"https://arxiv.org/pdf/1706.03762.pdf", # Attention is All You Need
|
||||
"https://pmc.ncbi.nlm.nih.gov/articles/PMC1234567/pdf/main.pdf", # Sample biomedical paper
|
||||
# U.S. House Committee on Oversight Reports[citation:2]
|
||||
"https://oversight.house.gov/report/the-biden-autopen-presidency-decline-delusion-and-deception-in-the-white-house.pdf",
|
||||
"https://oversight.house.gov/report/the-green-new-deal-scam-the-greenhouse-gas-reduction-fund.pdf",
|
||||
"https://oversight.house.gov/report/after-action-review-of-the-covid-19-pandemic-the-lessons-learned-and-a-path-forward.pdf",
|
||||
"https://oversight.house.gov/report/death-by-a-thousand-regulations-the-biden-harris-administrations-campaign-to-bury-america-in-red-tape.pdf",
|
||||
|
||||
# National Archives OGIS Annual Reports[citation:6]
|
||||
"https://www.archives.gov/files/ogis/reports/fy2024-annual-report.pdf",
|
||||
"https://www.archives.gov/files/ogis/reports/fy2023-annual-report.pdf",
|
||||
"https://www.archives.gov/files/ogis/reports/fy2022-annual-report.pdf",
|
||||
"https://www.archives.gov/files/ogis/reports/fy2021-annual-report.pdf",
|
||||
"https://www.archives.gov/files/ogis/reports/fy2020-annual-report.pdf",
|
||||
"https://www.archives.gov/files/ogis/reports/fy2019-annual-report.pdf",
|
||||
# Project Gutenberg Top Downloads[citation:3]
|
||||
"https://www.gutenberg.org/files/84/84-pdf.pdf", # Frankenstein
|
||||
"https://www.gutenberg.org/files/1342/1342-pdf.pdf", # Pride and Prejudice
|
||||
"https://www.gutenberg.org/files/11/11-pdf.pdf", # Alice's Adventures in Wonderland
|
||||
"https://www.gutenberg.org/files/1661/1661-pdf.pdf", # The Adventures of Sherlock Holmes
|
||||
"https://www.gutenberg.org/files/98/98-pdf.pdf", # A Tale of Two Cities
|
||||
"https://www.gutenberg.org/files/2701/2701-pdf.pdf", # Moby Dick
|
||||
"https://www.gutenberg.org/files/2542/2542-pdf.pdf", # A Doll's House
|
||||
"https://www.gutenberg.org/files/174/174-pdf.pdf", # The Picture of Dorian Gray
|
||||
"https://www.gutenberg.org/files/1952/1952-pdf.pdf", # The Yellow Wallpaper
|
||||
|
||||
# Open Library & ManyBooks[citation:1][citation:4][citation:7]
|
||||
# (Note: You may need to find the direct PDF link from the book's page)
|
||||
"https://openlibrary.org/books/OL1234567M/Book_Title.pdf",
|
||||
"https://manybooks.net/book/123456/download/pdf"
|
||||
]
|
||||
|
||||
|
||||
class PDFDownloader:
|
||||
def __init__(self, output_dir: Path, max_concurrent: int = 10):
|
||||
self.output_dir = output_dir
|
||||
self.max_concurrent = max_concurrent
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.downloaded = 0
|
||||
self.failed = 0
|
||||
self.skipped = 0
|
||||
|
||||
async def download_pdf(self, session: aiohttp.ClientSession, url: str) -> Optional[Path]:
|
||||
try:
|
||||
filename = self._url_to_filename(url)
|
||||
filepath = self.output_dir / filename
|
||||
if filepath.exists():
|
||||
self.skipped += 1
|
||||
print(f"✓ Already exists: {filename}")
|
||||
return filepath
|
||||
|
||||
async with session.get(url, timeout=aiohttp.ClientTimeout(total=60)) as response:
|
||||
if response.status != 200:
|
||||
print(f"✗ HTTP {response.status}: {url}")
|
||||
self.failed += 1
|
||||
return None
|
||||
|
||||
content = await response.read()
|
||||
if not content.startswith(b"%PDF"):
|
||||
print(f"✗ Not a PDF: {url}")
|
||||
self.failed += 1
|
||||
return None
|
||||
|
||||
async with aiofiles.open(filepath, "wb") as handle:
|
||||
await handle.write(content)
|
||||
self.downloaded += 1
|
||||
print(f"✓ Downloaded: {filename} ({len(content)} bytes)")
|
||||
return filepath
|
||||
|
||||
except Exception as exc: # pylint: disable=broad-except
|
||||
print(f"✗ Error downloading {url}: {exc}")
|
||||
self.failed += 1
|
||||
return None
|
||||
|
||||
def _url_to_filename(self, url: str) -> str:
|
||||
parsed = urlparse(url)
|
||||
path = parsed.path.strip("/") or "document"
|
||||
filename = re.sub(r"[^a-zA-Z0-9.-]", "_", path)
|
||||
if not filename.endswith(".pdf"):
|
||||
filename += ".pdf"
|
||||
domain = parsed.netloc.replace("www.", "").split(".")[0] or "site"
|
||||
# Hash query params for uniqueness
|
||||
digest = hashlib.sha1(url.encode("utf-8")).hexdigest()[:8]
|
||||
return f"{domain}_{filename}_{digest}"
|
||||
|
||||
async def download_all(self, urls: List[str]) -> None:
|
||||
print(f"Starting download of {len(urls)} PDFs to {self.output_dir}")
|
||||
connector = aiohttp.TCPConnector(limit=self.max_concurrent)
|
||||
async with aiohttp.ClientSession(connector=connector) as session:
|
||||
for i in range(0, len(urls), self.max_concurrent):
|
||||
batch = urls[i : i + self.max_concurrent]
|
||||
await asyncio.gather(*(self.download_pdf(session, url) for url in batch))
|
||||
if i + self.max_concurrent < len(urls):
|
||||
await asyncio.sleep(1)
|
||||
self._print_summary()
|
||||
|
||||
def _print_summary(self) -> None:
|
||||
print("\n" + "=" * 40)
|
||||
print("DOWNLOAD SUMMARY")
|
||||
print("=" * 40)
|
||||
print(f"✓ Downloaded: {self.downloaded}")
|
||||
print(f"○ Skipped: {self.skipped}")
|
||||
print(f"✗ Failed: {self.failed}")
|
||||
total = len(list(self.output_dir.glob("*.pdf")))
|
||||
print(f"Total files in directory: {total}")
|
||||
print(f"Location: {self.output_dir.resolve()}")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Download massive PDF collection for Type3 font harvesting"
|
||||
)
|
||||
parser.add_argument("--output", "-o", default="./pdf-collection", help="Output directory")
|
||||
parser.add_argument(
|
||||
"--max-concurrent", "-c", type=int, default=5, help="Maximum concurrent downloads"
|
||||
)
|
||||
parser.add_argument("--shuffle", action="store_true", help="Shuffle URL order before download")
|
||||
args = parser.parse_args()
|
||||
|
||||
urls = EXTENDED_URLS.copy()
|
||||
if args.shuffle:
|
||||
random.shuffle(urls)
|
||||
|
||||
downloader = PDFDownloader(Path(args.output), args.max_concurrent)
|
||||
asyncio.run(downloader.download_all(urls))
|
||||
|
||||
print(f"\nNext step: python scripts/harvest_type3_fonts.py --input {args.output}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
195
scripts/download_pdf_samples.py
Normal file
195
scripts/download_pdf_samples.py
Normal file
@ -0,0 +1,195 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Download large batches of PDF URLs into a local directory so they can be fed to
|
||||
scripts/harvest_type3_fonts.py (or any other processing pipeline).
|
||||
|
||||
Usage examples:
|
||||
|
||||
# Download every URL listed in pdf_urls.txt into tmp/type3-pdfs
|
||||
python scripts/download_pdf_samples.py \
|
||||
--urls-file pdf_urls.txt \
|
||||
--output-dir tmp/type3-pdfs
|
||||
|
||||
# Mix inline URLs with a file and use 16 concurrent downloads
|
||||
python scripts/download_pdf_samples.py \
|
||||
--urls https://example.com/a.pdf https://example.com/b.pdf \
|
||||
--urls-file more_urls.txt \
|
||||
--output-dir tmp/type3-pdfs \
|
||||
--workers 16
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import concurrent.futures
|
||||
import hashlib
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Iterable, List, Optional, Set, Tuple
|
||||
from urllib.parse import unquote, urlparse
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description="Bulk download PDF URLs.")
|
||||
parser.add_argument(
|
||||
"--urls",
|
||||
nargs="*",
|
||||
default=[],
|
||||
help="Inline list of PDF URLs (can be combined with --urls-file).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--urls-file",
|
||||
action="append",
|
||||
help="Text file containing one URL per line (can be repeated).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-dir",
|
||||
default="tmp/harvest-pdfs",
|
||||
help="Directory to store downloaded PDFs (default: %(default)s).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--workers",
|
||||
type=int,
|
||||
default=min(8, (os.cpu_count() or 4) * 2),
|
||||
help="Number of concurrent downloads (default: %(default)s).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--timeout",
|
||||
type=int,
|
||||
default=120,
|
||||
help="Per-request timeout in seconds (default: %(default)s).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--overwrite",
|
||||
action="store_true",
|
||||
help="Overwrite existing files (default: skip already downloaded PDFs).",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def load_urls(args: argparse.Namespace) -> List[str]:
|
||||
urls: List[str] = []
|
||||
seen: Set[str] = set()
|
||||
|
||||
def add(url: str) -> None:
|
||||
clean = url.strip()
|
||||
if not clean or clean.startswith("#"):
|
||||
return
|
||||
if clean not in seen:
|
||||
seen.add(clean)
|
||||
urls.append(clean)
|
||||
|
||||
for url in args.urls:
|
||||
add(url)
|
||||
if args.urls_file:
|
||||
for file in args.urls_file:
|
||||
path = Path(file)
|
||||
if not path.exists():
|
||||
print(f"[WARN] URL file not found: {file}", file=sys.stderr)
|
||||
continue
|
||||
with path.open("r", encoding="utf-8") as handle:
|
||||
for line in handle:
|
||||
add(line)
|
||||
if not urls:
|
||||
raise SystemExit("No URLs supplied. Use --urls and/or --urls-file.")
|
||||
return urls
|
||||
|
||||
|
||||
def sanitize_filename(name: str) -> str:
|
||||
return re.sub(r"[^A-Za-z0-9._-]+", "_", name).strip("_") or "download"
|
||||
|
||||
|
||||
def build_filename(url: str, output_dir: Path) -> Path:
|
||||
parsed = urlparse(url)
|
||||
candidate = Path(unquote(parsed.path)).name
|
||||
if not candidate:
|
||||
candidate = "download.pdf"
|
||||
candidate = sanitize_filename(candidate)
|
||||
if not candidate.lower().endswith(".pdf"):
|
||||
candidate += ".pdf"
|
||||
target = output_dir / candidate
|
||||
if not target.exists():
|
||||
return target
|
||||
stem = target.stem
|
||||
suffix = target.suffix
|
||||
digest = hashlib.sha1(url.encode("utf-8")).hexdigest()[:8]
|
||||
return output_dir / f"{stem}-{digest}{suffix}"
|
||||
|
||||
|
||||
def download_pdf(
|
||||
url: str,
|
||||
output_dir: Path,
|
||||
timeout: int,
|
||||
overwrite: bool,
|
||||
) -> Tuple[str, Optional[Path], Optional[str]]:
|
||||
try:
|
||||
dest = build_filename(url, output_dir)
|
||||
if dest.exists() and not overwrite:
|
||||
return url, dest, "exists"
|
||||
|
||||
response = requests.get(url, stream=True, timeout=timeout)
|
||||
response.raise_for_status()
|
||||
|
||||
content_type = response.headers.get("Content-Type", "").lower()
|
||||
if "pdf" not in content_type and not url.lower().endswith(".pdf"):
|
||||
# Peek into the first bytes to be safe
|
||||
peek = response.raw.read(5, decode_content=True)
|
||||
if not peek.startswith(b"%PDF"):
|
||||
return url, None, f"Skipping non-PDF content-type ({content_type or 'unknown'})"
|
||||
content = peek + response.content[len(peek):]
|
||||
else:
|
||||
content = response.content
|
||||
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
dest.write_bytes(content)
|
||||
return url, dest, None
|
||||
except Exception as exc: # pylint: disable=broad-except
|
||||
return url, None, str(exc)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = parse_args()
|
||||
urls = load_urls(args)
|
||||
output_dir = Path(args.output_dir).resolve()
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
print(f"Downloading {len(urls)} PDFs to {output_dir} using {args.workers} workers...")
|
||||
|
||||
successes = 0
|
||||
skipped = 0
|
||||
failures: List[Tuple[str, str]] = []
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=args.workers) as executor:
|
||||
future_to_url = {
|
||||
executor.submit(
|
||||
download_pdf, url, output_dir, args.timeout, args.overwrite
|
||||
): url
|
||||
for url in urls
|
||||
}
|
||||
for future in concurrent.futures.as_completed(future_to_url):
|
||||
url = future_to_url[future]
|
||||
result_url, path, error = future.result()
|
||||
if error == "exists":
|
||||
skipped += 1
|
||||
print(f"[SKIP] {url} (already downloaded)")
|
||||
elif error:
|
||||
failures.append((result_url, error))
|
||||
print(f"[FAIL] {url} -> {error}", file=sys.stderr)
|
||||
else:
|
||||
successes += 1
|
||||
print(f"[OK] {url} -> {path}")
|
||||
|
||||
print()
|
||||
print(f"Completed. Success: {successes}, Skipped: {skipped}, Failures: {len(failures)}")
|
||||
if failures:
|
||||
print("Failures:")
|
||||
for url, error in failures:
|
||||
print(f" {url} -> {error}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
245
scripts/harvest_type3_fonts.py
Normal file
245
scripts/harvest_type3_fonts.py
Normal file
@ -0,0 +1,245 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Bulk-harvest Type3 font signatures from a folder full of PDFs.
|
||||
|
||||
The script iterates over every PDF (recursively) inside the supplied --input
|
||||
paths, invokes the existing Gradle Type3SignatureTool for each document, and
|
||||
collects the unique Type3 font signatures that were discovered. Signature JSON
|
||||
files are stored under --signatures-dir; previously captured files are reused
|
||||
so you can keep dropping new PDFs into the input directory and re-run the
|
||||
harvester at any time.
|
||||
|
||||
Example:
|
||||
python scripts/harvest_type3_fonts.py \
|
||||
--input incoming-type3-pdfs \
|
||||
--signatures docs/type3/signatures \
|
||||
--report docs/type3/harvest_report.json
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import datetime as dt
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import shlex
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable, List, Optional, Sequence, Tuple
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description="Bulk collect Type3 font signatures from PDFs.")
|
||||
parser.add_argument(
|
||||
"--input",
|
||||
nargs="+",
|
||||
required=True,
|
||||
help="One or more PDF files or directories containing PDFs (searched recursively).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--signatures-dir",
|
||||
default="docs/type3/signatures",
|
||||
help="Destination directory for per-PDF signature JSON files.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--report",
|
||||
default="docs/type3/harvest_report.json",
|
||||
help="Summary JSON that lists every unique signature discovered so far.",
|
||||
)
|
||||
default_gradle = "gradlew.bat" if os.name == "nt" else "./gradlew"
|
||||
parser.add_argument(
|
||||
"--gradle-cmd",
|
||||
default=default_gradle,
|
||||
help=f"Path to the Gradle wrapper used to invoke the Type3SignatureTool (default: {default_gradle}).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--force",
|
||||
action="store_true",
|
||||
help="Re-run the signature tool even if the output JSON already exists.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pretty",
|
||||
action="store_true",
|
||||
help="Ask the Java tool to emit pretty-printed JSON (handy for diffs).",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def discover_pdfs(paths: Sequence[str]) -> List[Path]:
|
||||
pdfs: List[Path] = []
|
||||
for raw in paths:
|
||||
path = Path(raw).resolve()
|
||||
if path.is_file():
|
||||
if path.suffix.lower() == ".pdf":
|
||||
pdfs.append(path)
|
||||
elif path.is_dir():
|
||||
pdfs.extend(sorted(path.rglob("*.pdf")))
|
||||
unique = sorted(dict.fromkeys(pdfs))
|
||||
if not unique:
|
||||
raise SystemExit("No PDF files found under the supplied --input paths.")
|
||||
return unique
|
||||
|
||||
|
||||
def sanitize_part(part: str) -> str:
|
||||
cleaned = re.sub(r"[^A-Za-z0-9._-]+", "_", part)
|
||||
return cleaned or "_"
|
||||
|
||||
|
||||
def derive_signature_path(pdf: Path, signatures_dir: Path) -> Path:
|
||||
"""
|
||||
Mirror the PDF path under the signatures directory.
|
||||
If the PDF lives outside the repo, fall back to a hashed filename.
|
||||
"""
|
||||
try:
|
||||
rel = pdf.relative_to(REPO_ROOT)
|
||||
except ValueError:
|
||||
digest = hashlib.sha1(str(pdf).encode("utf-8")).hexdigest()[:10]
|
||||
rel = Path("__external__") / f"{sanitize_part(pdf.stem)}-{digest}.pdf"
|
||||
|
||||
sanitized_parts = [sanitize_part(part) for part in rel.parts]
|
||||
signature_rel = Path(*sanitized_parts).with_suffix(".json")
|
||||
return signatures_dir / signature_rel
|
||||
|
||||
|
||||
def load_signature_file(path: Path) -> dict:
|
||||
with path.open("r", encoding="utf-8") as handle:
|
||||
return json.load(handle)
|
||||
|
||||
|
||||
def collect_known_signatures(signatures_dir: Path) -> Dict[str, dict]:
|
||||
known: Dict[str, dict] = {}
|
||||
if not signatures_dir.exists():
|
||||
return known
|
||||
for json_file in signatures_dir.rglob("*.json"):
|
||||
try:
|
||||
payload = load_signature_file(json_file)
|
||||
except Exception:
|
||||
continue
|
||||
pdf = payload.get("pdf")
|
||||
for font in payload.get("fonts", []):
|
||||
signature = font.get("signature")
|
||||
if not signature or signature in known:
|
||||
continue
|
||||
known[signature] = {
|
||||
"signature": signature,
|
||||
"alias": font.get("alias"),
|
||||
"baseName": font.get("baseName"),
|
||||
"glyphCount": font.get("glyphCount"),
|
||||
"glyphCoverage": font.get("glyphCoverage"),
|
||||
"samplePdf": pdf,
|
||||
"signatureJson": str(json_file),
|
||||
}
|
||||
return known
|
||||
|
||||
|
||||
def run_signature_tool(
|
||||
gradle_cmd: str, pdf: Path, output_path: Path, pretty: bool, cwd: Path
|
||||
) -> None:
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
args = f"--pdf {shlex.quote(str(pdf))} --output {shlex.quote(str(output_path))}"
|
||||
if pretty:
|
||||
args += " --pretty"
|
||||
# Use shell invocation so the quoted --args string is parsed correctly by Gradle.
|
||||
cmd = f"{gradle_cmd} -q :proprietary:type3SignatureTool --args=\"{args}\""
|
||||
completed = subprocess.run(
|
||||
cmd,
|
||||
shell=True,
|
||||
cwd=cwd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
)
|
||||
if completed.returncode != 0:
|
||||
raise RuntimeError(
|
||||
f"Gradle Type3SignatureTool failed for {pdf}:\n{completed.stderr.strip()}"
|
||||
)
|
||||
|
||||
|
||||
def extract_fonts_from_payload(payload: dict) -> List[dict]:
|
||||
pdf = payload.get("pdf")
|
||||
fonts = []
|
||||
for font in payload.get("fonts", []):
|
||||
signature = font.get("signature")
|
||||
if not signature:
|
||||
continue
|
||||
fonts.append(
|
||||
{
|
||||
"signature": signature,
|
||||
"alias": font.get("alias"),
|
||||
"baseName": font.get("baseName"),
|
||||
"glyphCount": font.get("glyphCount"),
|
||||
"glyphCoverage": font.get("glyphCoverage"),
|
||||
"samplePdf": pdf,
|
||||
}
|
||||
)
|
||||
return fonts
|
||||
|
||||
|
||||
def write_report(report_path: Path, fonts_by_signature: Dict[str, dict]) -> None:
|
||||
ordered = sorted(fonts_by_signature.values(), key=lambda entry: entry["signature"])
|
||||
report = {
|
||||
"generatedAt": dt.datetime.utcnow().isoformat(timespec="seconds") + "Z",
|
||||
"totalSignatures": len(ordered),
|
||||
"fonts": ordered,
|
||||
}
|
||||
report_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with report_path.open("w", encoding="utf-8") as handle:
|
||||
json.dump(report, handle, indent=2)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = parse_args()
|
||||
signatures_dir = Path(args.signatures_dir).resolve()
|
||||
report_path = Path(args.report).resolve()
|
||||
pdfs = discover_pdfs(args.input)
|
||||
|
||||
known = collect_known_signatures(signatures_dir)
|
||||
newly_added: List[Tuple[str, str]] = []
|
||||
|
||||
for pdf in pdfs:
|
||||
signature_path = derive_signature_path(pdf, signatures_dir)
|
||||
if signature_path.exists() and not args.force:
|
||||
try:
|
||||
payload = load_signature_file(signature_path)
|
||||
except Exception as exc:
|
||||
print(f"[WARN] Failed to parse cached signature {signature_path}: {exc}")
|
||||
payload = None
|
||||
else:
|
||||
try:
|
||||
run_signature_tool(args.gradle_cmd, pdf, signature_path, args.pretty, REPO_ROOT)
|
||||
except Exception as exc:
|
||||
print(f"[ERROR] Harvest failed for {pdf}: {exc}", file=sys.stderr)
|
||||
continue
|
||||
payload = load_signature_file(signature_path)
|
||||
|
||||
if not payload:
|
||||
continue
|
||||
|
||||
for font in extract_fonts_from_payload(payload):
|
||||
signature = font["signature"]
|
||||
if signature in known:
|
||||
continue
|
||||
font["signatureJson"] = str(signature_path)
|
||||
known[signature] = font
|
||||
newly_added.append((signature, pdf.name))
|
||||
|
||||
write_report(report_path, known)
|
||||
|
||||
print(
|
||||
f"Processed {len(pdfs)} PDFs. "
|
||||
f"Captured {len(newly_added)} new Type3 font signatures "
|
||||
f"(total unique signatures: {len(known)})."
|
||||
)
|
||||
if newly_added:
|
||||
print("New signatures:")
|
||||
for signature, sample in newly_added:
|
||||
print(f" {signature} ({sample})")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
75
scripts/index_type3_catalogue.py
Normal file
75
scripts/index_type3_catalogue.py
Normal file
@ -0,0 +1,75 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Build a Type3 font catalogue from sample PDFs."""
|
||||
import argparse
|
||||
import json
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def run(cmd, cwd=None):
|
||||
result = subprocess.run(cmd, cwd=cwd, capture_output=True, text=True)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"Command {' '.join(cmd)} failed: {result.stderr}")
|
||||
return result.stdout
|
||||
|
||||
|
||||
def parse_pdffonts(output):
|
||||
lines = output.splitlines()
|
||||
entries = []
|
||||
for line in lines[2:]:
|
||||
if not line.strip():
|
||||
continue
|
||||
parts = line.split()
|
||||
if "Type" not in parts:
|
||||
continue
|
||||
idx = parts.index("Type")
|
||||
type_value = parts[idx + 1] if idx + 1 < len(parts) else ""
|
||||
if not type_value.startswith("3"):
|
||||
continue
|
||||
font_name = parts[0]
|
||||
encoding = parts[-2] if len(parts) >= 2 else ""
|
||||
entries.append((font_name, encoding))
|
||||
return entries
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Index Type3 fonts from sample PDFs")
|
||||
parser.add_argument(
|
||||
"--samples",
|
||||
default="app/core/src/main/resources/type3/samples",
|
||||
help="Directory containing sample PDFs",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
default="app/core/src/main/resources/type3/catalogue.json",
|
||||
help="Output JSON file",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
samples_dir = Path(args.samples)
|
||||
out_path = Path(args.output)
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
catalogue = []
|
||||
for pdf in sorted(samples_dir.glob("*.pdf")):
|
||||
try:
|
||||
output = run(["pdffonts", str(pdf)])
|
||||
except Exception as exc:
|
||||
print(f"Skipping {pdf.name}: {exc}")
|
||||
continue
|
||||
for font_name, encoding in parse_pdffonts(output):
|
||||
catalogue.append(
|
||||
{
|
||||
"source": pdf.name,
|
||||
"fontName": font_name,
|
||||
"encoding": encoding,
|
||||
}
|
||||
)
|
||||
|
||||
with out_path.open("w", encoding="utf-8") as handle:
|
||||
json.dump(catalogue, handle, indent=2)
|
||||
print(f"Wrote {len(catalogue)} entries to {out_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
97
scripts/summarize_type3_signatures.py
Normal file
97
scripts/summarize_type3_signatures.py
Normal file
@ -0,0 +1,97 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Summarize captured Type3 signature dumps as a Markdown inventory.
|
||||
|
||||
Usage:
|
||||
scripts/summarize_type3_signatures.py \
|
||||
--input docs/type3/signatures \
|
||||
--output docs/type3/signature_inventory.md
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description="Summarize Type3 signature JSON dumps.")
|
||||
parser.add_argument(
|
||||
"--input",
|
||||
default="docs/type3/signatures",
|
||||
help="Directory containing signature JSON files (default: %(default)s)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
default="docs/type3/signature_inventory.md",
|
||||
help="Markdown file to write (default: %(default)s)",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def load_signatures(directory: Path) -> Dict[str, List[dict]]:
|
||||
inventory: Dict[str, List[dict]] = defaultdict(list)
|
||||
for path in sorted(directory.glob("*.json")):
|
||||
with path.open("r", encoding="utf-8") as handle:
|
||||
payload = json.load(handle)
|
||||
source_pdf = payload.get("pdf") or path.name
|
||||
for font in payload.get("fonts", []):
|
||||
alias = (font.get("alias") or font.get("baseName") or "unknown").lower()
|
||||
entry = {
|
||||
"source": source_pdf,
|
||||
"file": path.name,
|
||||
"alias": alias,
|
||||
"baseName": font.get("baseName"),
|
||||
"signature": font.get("signature"),
|
||||
"glyphCount": font.get("glyphCount"),
|
||||
"glyphCoverage": font.get("glyphCoverage"),
|
||||
}
|
||||
inventory[alias].append(entry)
|
||||
return inventory
|
||||
|
||||
|
||||
def write_markdown(inventory: Dict[str, List[dict]], output: Path, input_dir: Path) -> None:
|
||||
lines: List[str] = []
|
||||
lines.append("# Type3 Signature Inventory")
|
||||
lines.append("")
|
||||
lines.append(
|
||||
f"_Generated from `{input_dir}`. "
|
||||
"Run `scripts/summarize_type3_signatures.py` after capturing new samples._"
|
||||
)
|
||||
lines.append("")
|
||||
|
||||
for alias in sorted(inventory.keys()):
|
||||
entries = inventory[alias]
|
||||
lines.append(f"## Alias: `{alias}`")
|
||||
lines.append("")
|
||||
lines.append("| Signature | Samples | Glyph Count | Coverage (first 10) |")
|
||||
lines.append("| --- | --- | --- | --- |")
|
||||
for entry in entries:
|
||||
signature = entry.get("signature") or "—"
|
||||
sample = Path(entry["source"]).name
|
||||
glyph_count = entry.get("glyphCount") if entry.get("glyphCount") is not None else "—"
|
||||
coverage = entry.get("glyphCoverage") or []
|
||||
preview = ", ".join(str(code) for code in coverage[:10])
|
||||
lines.append(f"| `{signature}` | `{sample}` | {glyph_count} | {preview} |")
|
||||
lines.append("")
|
||||
|
||||
output.parent.mkdir(parents=True, exist_ok=True)
|
||||
output.write_text("\n".join(lines), encoding="utf-8")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = parse_args()
|
||||
input_dir = Path(args.input)
|
||||
if not input_dir.exists():
|
||||
raise SystemExit(f"Input directory not found: {input_dir}")
|
||||
inventory = load_signatures(input_dir)
|
||||
output_path = Path(args.output)
|
||||
write_markdown(inventory, output_path, input_dir)
|
||||
print(f"Wrote inventory for {len(inventory)} aliases to {output_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
481
scripts/type3_to_cff.py
Normal file
481
scripts/type3_to_cff.py
Normal file
@ -0,0 +1,481 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Convert Stirling PDF Type3 glyph JSON into synthesised fonts using fontTools.
|
||||
|
||||
The input JSON is expected to contain:
|
||||
- fontId, pageNumber (optional metadata)
|
||||
- fontMatrix: 3x3 matrix describing the Type3 glyph transform
|
||||
- glyphs: array of glyph records with keys:
|
||||
name, code, advanceWidth, bbox, unicode, outline (list of commands)
|
||||
|
||||
The script produces an OpenType CFF font and, when requested, a companion
|
||||
TrueType font for web-preview usage. Only the fontTools package is required,
|
||||
avoiding heavyweight build dependencies such as fontmake/ufoLib2.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import math
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable, List, Optional, Sequence, Tuple
|
||||
|
||||
from fontTools.fontBuilder import FontBuilder
|
||||
from fontTools.misc.fixedTools import otRound
|
||||
from fontTools.pens.cu2quPen import Cu2QuPen
|
||||
from fontTools.pens.t2CharStringPen import T2CharStringPen
|
||||
from fontTools.pens.ttGlyphPen import TTGlyphPen
|
||||
|
||||
|
||||
Command = Dict[str, object]
|
||||
Matrix = Tuple[float, float, float, float, float, float]
|
||||
|
||||
|
||||
@dataclass
|
||||
class GlyphSource:
|
||||
name: str
|
||||
width: float
|
||||
unicode: Optional[int]
|
||||
char_code: Optional[int]
|
||||
outline: Sequence[Command]
|
||||
|
||||
|
||||
@dataclass
|
||||
class GlyphBuildResult:
|
||||
name: str
|
||||
width: int
|
||||
charstring: object
|
||||
ttf_glyph: Optional[object]
|
||||
unicode: Optional[int]
|
||||
char_code: Optional[int]
|
||||
bounds: Optional[Tuple[float, float, float, float]]
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description="Synthesize fonts from Type3 glyph JSON.")
|
||||
parser.add_argument("--input", required=True, help="Path to glyph JSON emitted by the backend")
|
||||
parser.add_argument("--otf-output", required=True, help="Destination path for the CFF/OTF font")
|
||||
parser.add_argument("--ttf-output", help="Optional destination path for a TrueType font")
|
||||
parser.add_argument("--family-name", default="Type3 Synth", help="Family name for the output")
|
||||
parser.add_argument("--style-name", default="Regular", help="Style name for the output")
|
||||
parser.add_argument("--units-per-em", type=int, default=1000, help="Units per EM value")
|
||||
parser.add_argument("--cu2qu-error", type=float, default=1.0, help="Max error for cubic→quadratic conversion")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def load_json(path: Path) -> Dict[str, object]:
|
||||
try:
|
||||
with path.open("r", encoding="utf-8") as handle:
|
||||
return json.load(handle)
|
||||
except Exception as exc: # pragma: no cover - fatal configuration error
|
||||
print(f"ERROR: Failed to load glyph JSON '{path}': {exc}", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
|
||||
def parse_font_matrix(rows: Optional[Iterable[Iterable[float]]]) -> Matrix:
|
||||
"""
|
||||
Retrieve the raw 2×3 FontMatrix entries for diagnostics. Type3 glyph
|
||||
outlines in our extractor are emitted in their native coordinate system, so
|
||||
the returned matrix is currently informational only.
|
||||
"""
|
||||
if not rows:
|
||||
return (1.0, 0.0, 0.0, 1.0, 0.0, 0.0)
|
||||
values: List[List[float]] = []
|
||||
for row in rows:
|
||||
try:
|
||||
values.append([float(col) for col in row])
|
||||
except (TypeError, ValueError):
|
||||
return (1.0, 0.0, 0.0, 1.0, 0.0, 0.0)
|
||||
if len(values) < 3 or len(values[0]) < 2 or len(values[1]) < 2:
|
||||
return (1.0, 0.0, 0.0, 1.0, 0.0, 0.0)
|
||||
return (
|
||||
float(values[0][0]),
|
||||
float(values[0][1]),
|
||||
float(values[1][0]),
|
||||
float(values[1][1]),
|
||||
float(values[2][0]),
|
||||
float(values[2][1]),
|
||||
)
|
||||
|
||||
|
||||
def resolve_width(raw_width: float, default: int) -> int:
|
||||
try:
|
||||
value = float(raw_width)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
if not math.isfinite(value) or value <= 0:
|
||||
return default
|
||||
width = otRound(value)
|
||||
return width if width > 0 else default
|
||||
|
||||
|
||||
def quadratic_to_cubic(
|
||||
current: Tuple[float, float],
|
||||
ctrl: Tuple[float, float],
|
||||
end: Tuple[float, float],
|
||||
) -> Tuple[Tuple[float, float], Tuple[float, float], Tuple[float, float]]:
|
||||
"""
|
||||
Convert a quadratic Bézier segment to cubic control points.
|
||||
"""
|
||||
c1 = (
|
||||
current[0] + (2.0 / 3.0) * (ctrl[0] - current[0]),
|
||||
current[1] + (2.0 / 3.0) * (ctrl[1] - current[1]),
|
||||
)
|
||||
c2 = (
|
||||
end[0] + (2.0 / 3.0) * (ctrl[0] - end[0]),
|
||||
end[1] + (2.0 / 3.0) * (ctrl[1] - end[1]),
|
||||
)
|
||||
return c1, c2, end
|
||||
|
||||
|
||||
def iterate_glyphs(data: Dict[str, object]) -> List[GlyphSource]:
|
||||
glyph_records = data.get("glyphs") or []
|
||||
sources: List[GlyphSource] = []
|
||||
for index, record in enumerate(glyph_records, start=1):
|
||||
if not isinstance(record, dict):
|
||||
continue
|
||||
name = record.get("name")
|
||||
if not isinstance(name, str) or not name:
|
||||
name = f"g{index}"
|
||||
width = record.get("advanceWidth")
|
||||
if not isinstance(width, (int, float)) or math.isnan(width):
|
||||
width = 1000.0
|
||||
unicode_value = record.get("unicode")
|
||||
if not isinstance(unicode_value, int) or unicode_value <= 0:
|
||||
unicode_value = None
|
||||
char_code_value = record.get("charCode")
|
||||
if not isinstance(char_code_value, int):
|
||||
char_code_value = record.get("code")
|
||||
if not isinstance(char_code_value, int):
|
||||
char_code_value = record.get("charCodeRaw")
|
||||
if not isinstance(char_code_value, int) or not (0 <= char_code_value <= 0x10FFFF):
|
||||
char_code_value = None
|
||||
outline = record.get("outline")
|
||||
if not isinstance(outline, list):
|
||||
outline = []
|
||||
sources.append(
|
||||
GlyphSource(
|
||||
name=name,
|
||||
width=float(width),
|
||||
unicode=unicode_value,
|
||||
char_code=char_code_value,
|
||||
outline=outline))
|
||||
return sources
|
||||
|
||||
|
||||
def build_cff_charstring(
|
||||
glyph: GlyphSource,
|
||||
width: int,
|
||||
) -> Tuple[object, Optional[Tuple[float, float, float, float]]]:
|
||||
pen = T2CharStringPen(width=width, glyphSet=None)
|
||||
bounds = [math.inf, math.inf, -math.inf, -math.inf]
|
||||
|
||||
def update_bounds(point: Tuple[float, float]) -> None:
|
||||
x, y = point
|
||||
bounds[0] = min(bounds[0], x)
|
||||
bounds[1] = min(bounds[1], y)
|
||||
bounds[2] = max(bounds[2], x)
|
||||
bounds[3] = max(bounds[3], y)
|
||||
|
||||
current: Optional[Tuple[float, float]] = None
|
||||
start_point: Optional[Tuple[float, float]] = None
|
||||
open_path = False
|
||||
|
||||
for command in glyph.outline:
|
||||
if not isinstance(command, dict):
|
||||
continue
|
||||
op = command.get("cmd")
|
||||
if op == "M":
|
||||
if open_path:
|
||||
pen.endPath()
|
||||
open_path = False
|
||||
point = (float(command.get("x", 0.0)), float(command.get("y", 0.0)))
|
||||
pen.moveTo(point)
|
||||
update_bounds(point)
|
||||
current = point
|
||||
start_point = point
|
||||
open_path = True
|
||||
elif op == "L" and current is not None:
|
||||
point = (float(command.get("x", current[0])), float(command.get("y", current[1])))
|
||||
pen.lineTo(point)
|
||||
update_bounds(point)
|
||||
current = point
|
||||
elif op == "C" and current is not None:
|
||||
ctrl1 = (
|
||||
float(command.get("x1", current[0])),
|
||||
float(command.get("y1", current[1])),
|
||||
)
|
||||
ctrl2 = (
|
||||
float(command.get("x2", current[0])),
|
||||
float(command.get("y2", current[1])),
|
||||
)
|
||||
end = (
|
||||
float(command.get("x", current[0])),
|
||||
float(command.get("y", current[1])),
|
||||
)
|
||||
pen.curveTo(ctrl1, ctrl2, end)
|
||||
update_bounds(ctrl1)
|
||||
update_bounds(ctrl2)
|
||||
update_bounds(end)
|
||||
current = end
|
||||
elif op == "Q" and current is not None:
|
||||
ctrl = (
|
||||
float(command.get("x1", current[0])),
|
||||
float(command.get("y1", current[1])),
|
||||
)
|
||||
end = (
|
||||
float(command.get("x", current[0])),
|
||||
float(command.get("y", current[1])),
|
||||
)
|
||||
c1, c2, end_point = quadratic_to_cubic(current, ctrl, end)
|
||||
pen.curveTo(c1, c2, end_point)
|
||||
update_bounds(ctrl)
|
||||
update_bounds(end_point)
|
||||
current = end_point
|
||||
elif op == "Z" and open_path:
|
||||
pen.closePath()
|
||||
open_path = False
|
||||
if start_point is not None:
|
||||
current = start_point
|
||||
# Ignore unsupported commands silently.
|
||||
|
||||
if open_path:
|
||||
pen.endPath()
|
||||
|
||||
charstring = pen.getCharString()
|
||||
bbox = None
|
||||
if bounds[0] <= bounds[2] and bounds[1] <= bounds[3]:
|
||||
bbox = (bounds[0], bounds[1], bounds[2], bounds[3])
|
||||
return charstring, bbox
|
||||
|
||||
|
||||
def build_ttf_glyph(glyph: GlyphSource, max_error: float) -> Optional[object]:
|
||||
pen = TTGlyphPen(glyphSet=None)
|
||||
draw_pen = Cu2QuPen(pen, max_error, reverse_direction=False)
|
||||
|
||||
current_exists = False
|
||||
|
||||
for command in glyph.outline:
|
||||
if not isinstance(command, dict):
|
||||
continue
|
||||
op = command.get("cmd")
|
||||
if op == "M":
|
||||
x = float(command.get("x", 0.0))
|
||||
y = float(command.get("y", 0.0))
|
||||
draw_pen.moveTo((x, y))
|
||||
current_exists = True
|
||||
elif op == "L" and current_exists:
|
||||
x = float(command.get("x", 0.0))
|
||||
y = float(command.get("y", 0.0))
|
||||
draw_pen.lineTo((x, y))
|
||||
elif op == "C" and current_exists:
|
||||
ctrl1 = (float(command.get("x1", 0.0)), float(command.get("y1", 0.0)))
|
||||
ctrl2 = (float(command.get("x2", 0.0)), float(command.get("y2", 0.0)))
|
||||
end = (float(command.get("x", 0.0)), float(command.get("y", 0.0)))
|
||||
draw_pen.curveTo(ctrl1, ctrl2, end)
|
||||
elif op == "Q" and current_exists:
|
||||
ctrl = (float(command.get("x1", 0.0)), float(command.get("y1", 0.0)))
|
||||
end = (float(command.get("x", 0.0)), float(command.get("y", 0.0)))
|
||||
draw_pen.qCurveTo(ctrl, end)
|
||||
elif op == "Z" and current_exists:
|
||||
draw_pen.closePath()
|
||||
current_exists = False
|
||||
|
||||
if current_exists:
|
||||
draw_pen.endPath()
|
||||
|
||||
try:
|
||||
glyph_obj = pen.glyph()
|
||||
except Exception:
|
||||
return None
|
||||
return glyph_obj
|
||||
|
||||
|
||||
def synthesise_fonts(
|
||||
data: Dict[str, object],
|
||||
otf_output: Path,
|
||||
ttf_output: Optional[Path],
|
||||
family_name: str,
|
||||
style_name: str,
|
||||
units_per_em: int,
|
||||
cu2qu_error: float,
|
||||
) -> None:
|
||||
_font_matrix = parse_font_matrix(data.get("fontMatrix"))
|
||||
glyphs = iterate_glyphs(data)
|
||||
|
||||
results: List[GlyphBuildResult] = []
|
||||
global_y_min = math.inf
|
||||
global_y_max = -math.inf
|
||||
|
||||
default_width = max(1, units_per_em // 2)
|
||||
|
||||
for glyph in glyphs:
|
||||
width = resolve_width(glyph.width, default_width)
|
||||
charstring, bounds = build_cff_charstring(glyph, width)
|
||||
ttf_glyph = None
|
||||
if ttf_output is not None:
|
||||
ttf_glyph = build_ttf_glyph(glyph, cu2qu_error)
|
||||
if ttf_glyph is not None:
|
||||
ttf_glyph.width = width
|
||||
if bounds is not None:
|
||||
global_y_min = min(global_y_min, bounds[1])
|
||||
global_y_max = max(global_y_max, bounds[3])
|
||||
results.append(
|
||||
GlyphBuildResult(
|
||||
name=glyph.name,
|
||||
width=width,
|
||||
charstring=charstring,
|
||||
ttf_glyph=ttf_glyph,
|
||||
unicode=glyph.unicode,
|
||||
char_code=glyph.char_code,
|
||||
bounds=bounds,
|
||||
)
|
||||
)
|
||||
|
||||
if not results:
|
||||
raise RuntimeError("No glyphs provided in input JSON")
|
||||
|
||||
ascent = global_y_max if math.isfinite(global_y_max) else units_per_em * 0.8
|
||||
descent = global_y_min if math.isfinite(global_y_min) else -units_per_em * 0.2
|
||||
ascent = otRound(ascent)
|
||||
descent = otRound(descent)
|
||||
if ascent <= 0:
|
||||
ascent = otRound(units_per_em * 0.8)
|
||||
if descent >= 0:
|
||||
descent = -otRound(units_per_em * 0.2)
|
||||
|
||||
glyph_order = [".notdef"] + [result.name for result in results]
|
||||
horizontal_metrics = {result.name: (result.width, 0) for result in results}
|
||||
horizontal_metrics[".notdef"] = (default_width, 0)
|
||||
|
||||
cmap: Dict[int, str] = {}
|
||||
next_private = 0xF000
|
||||
for result in results:
|
||||
code_point = result.unicode
|
||||
if code_point is None:
|
||||
raw_code = result.char_code
|
||||
if raw_code is not None:
|
||||
code_point = raw_code
|
||||
else:
|
||||
code_point = next_private
|
||||
next_private += 1
|
||||
cmap[code_point] = result.name
|
||||
|
||||
notdef_pen = T2CharStringPen(width=default_width, glyphSet=None)
|
||||
notdef_pen.endPath()
|
||||
charstrings = {result.name: result.charstring for result in results}
|
||||
charstrings[".notdef"] = notdef_pen.getCharString()
|
||||
|
||||
name_table_entries = {
|
||||
"familyName": family_name,
|
||||
"styleName": style_name,
|
||||
"psName": f"{family_name.replace(' ', '')}-{style_name}",
|
||||
"fullName": f"{family_name} {style_name}",
|
||||
}
|
||||
|
||||
# Build OTF (CFF) font.
|
||||
fb = FontBuilder(units_per_em, isTTF=False)
|
||||
fb.setupGlyphOrder(glyph_order)
|
||||
fb.setupCharacterMap(cmap)
|
||||
fb.setupHorizontalMetrics(horizontal_metrics)
|
||||
fb.setupHorizontalHeader(ascent=ascent, descent=descent)
|
||||
fb.setupOS2(
|
||||
sTypoAscender=ascent,
|
||||
sTypoDescender=descent,
|
||||
usWinAscent=max(ascent, 0),
|
||||
usWinDescent=abs(min(descent, 0)),
|
||||
sxHeight=otRound(units_per_em * 0.5),
|
||||
sCapHeight=otRound(units_per_em * 0.7),
|
||||
)
|
||||
fb.setupNameTable(name_table_entries)
|
||||
fb.setupPost()
|
||||
fb.setupCFF(
|
||||
name_table_entries["psName"],
|
||||
{
|
||||
"FullName": name_table_entries["fullName"],
|
||||
"FamilyName": name_table_entries["familyName"],
|
||||
"Weight": style_name,
|
||||
},
|
||||
charstrings,
|
||||
{"BlueValues": []},
|
||||
)
|
||||
fb.font.save(str(otf_output))
|
||||
|
||||
if ttf_output is None:
|
||||
return
|
||||
|
||||
glyph_objects: Dict[str, object] = {}
|
||||
empty_pen = TTGlyphPen(None)
|
||||
empty_pen.moveTo((0, 0))
|
||||
empty_pen.lineTo((0, 0))
|
||||
empty_pen.closePath()
|
||||
empty_glyph = empty_pen.glyph()
|
||||
empty_glyph.width = default_width
|
||||
glyph_objects[".notdef"] = empty_glyph
|
||||
for result in results:
|
||||
glyph_obj = result.ttf_glyph
|
||||
if glyph_obj is None:
|
||||
temp_pen = TTGlyphPen(None)
|
||||
temp_pen.moveTo((0, 0))
|
||||
temp_pen.lineTo((0, 0))
|
||||
temp_pen.closePath()
|
||||
glyph_obj = temp_pen.glyph()
|
||||
glyph_obj.width = result.width
|
||||
glyph_objects[result.name] = glyph_obj
|
||||
|
||||
ttf_fb = FontBuilder(units_per_em, isTTF=True)
|
||||
ttf_fb.setupGlyphOrder(glyph_order)
|
||||
ttf_fb.setupCharacterMap(cmap)
|
||||
ttf_fb.setupHorizontalMetrics(horizontal_metrics)
|
||||
ttf_fb.setupHorizontalHeader(ascent=ascent, descent=descent)
|
||||
ttf_fb.setupOS2(
|
||||
sTypoAscender=ascent,
|
||||
sTypoDescender=descent,
|
||||
usWinAscent=max(ascent, 0),
|
||||
usWinDescent=abs(min(descent, 0)),
|
||||
sxHeight=otRound(units_per_em * 0.5),
|
||||
sCapHeight=otRound(units_per_em * 0.7),
|
||||
)
|
||||
ttf_fb.setupNameTable(name_table_entries)
|
||||
ttf_fb.setupPost()
|
||||
ttf_fb.setupGlyf(glyph_objects)
|
||||
ttf_fb.setupDummyDSIG()
|
||||
ttf_fb.font.save(str(ttf_output))
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = parse_args()
|
||||
input_path = Path(args.input).resolve()
|
||||
otf_output = Path(args.otf_output).resolve()
|
||||
ttf_output = Path(args.ttf_output).resolve() if args.ttf_output else None
|
||||
|
||||
data = load_json(input_path)
|
||||
try:
|
||||
synthesise_fonts(
|
||||
data=data,
|
||||
otf_output=otf_output,
|
||||
ttf_output=ttf_output,
|
||||
family_name=args.family_name,
|
||||
style_name=args.style_name,
|
||||
units_per_em=args.units_per_em,
|
||||
cu2qu_error=args.cu2qu_error,
|
||||
)
|
||||
except Exception as exc:
|
||||
print(f"ERROR: Failed to generate fonts: {exc}", file=sys.stderr)
|
||||
if otf_output.exists():
|
||||
otf_output.unlink()
|
||||
if ttf_output and ttf_output.exists():
|
||||
ttf_output.unlink()
|
||||
sys.exit(1)
|
||||
|
||||
message = f"Generated font at {otf_output}"
|
||||
if ttf_output:
|
||||
message += f" and {ttf_output}"
|
||||
print(message, file=sys.stderr)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
243
scripts/update_type3_library.py
Normal file
243
scripts/update_type3_library.py
Normal file
@ -0,0 +1,243 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Synchronize Type3 library index entries with captured signature dumps.
|
||||
|
||||
The script scans docs/type3/signatures/*.json (or a custom --signatures-dir),
|
||||
matches each font by alias/signature to app/core/src/main/resources/type3/library/index.json,
|
||||
and updates the entry's signatures / glyphCoverage / aliases / source fields.
|
||||
|
||||
Usage:
|
||||
scripts/update_type3_library.py --apply
|
||||
|
||||
Run without --apply to see a dry-run summary.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable, List, Optional, Tuple
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parents[1]
|
||||
DEFAULT_SIGNATURES = REPO_ROOT / "docs" / "type3" / "signatures"
|
||||
DEFAULT_INDEX = (
|
||||
REPO_ROOT / "app" / "core" / "src" / "main" / "resources" / "type3" / "library" / "index.json"
|
||||
)
|
||||
|
||||
|
||||
def normalize_alias(value: Optional[str]) -> Optional[str]:
|
||||
if not value:
|
||||
return None
|
||||
trimmed = value.strip()
|
||||
plus = trimmed.find("+")
|
||||
if plus >= 0 and plus < len(trimmed) - 1:
|
||||
trimmed = trimmed[plus + 1 :]
|
||||
lowered = trimmed.lower()
|
||||
return lowered if lowered else None
|
||||
|
||||
|
||||
def load_json(path: Path):
|
||||
with path.open("r", encoding="utf-8") as handle:
|
||||
return json.load(handle)
|
||||
|
||||
|
||||
def dump_json(path: Path, data) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with path.open("w", encoding="utf-8") as handle:
|
||||
json.dump(data, handle, indent=2)
|
||||
handle.write("\n")
|
||||
|
||||
|
||||
def iter_signature_fonts(signature_file: Path):
|
||||
payload = load_json(signature_file)
|
||||
pdf_source = payload.get("pdf")
|
||||
for font in payload.get("fonts", []):
|
||||
alias = font.get("alias") or font.get("baseName")
|
||||
normalized = normalize_alias(alias) or normalize_alias(font.get("baseName"))
|
||||
yield {
|
||||
"alias_raw": alias,
|
||||
"alias": normalized,
|
||||
"baseName": font.get("baseName"),
|
||||
"signature": font.get("signature"),
|
||||
"glyphCoverage": font.get("glyphCoverage") or [],
|
||||
"pdf": pdf_source,
|
||||
"file": signature_file,
|
||||
}
|
||||
|
||||
|
||||
def make_alias_index(entries: List[Dict]) -> Tuple[Dict[str, Dict], Dict[str, Dict]]:
|
||||
alias_index: Dict[str, Dict] = {}
|
||||
signature_index: Dict[str, Dict] = {}
|
||||
for entry in entries:
|
||||
for alias in entry.get("aliases", []) or []:
|
||||
normalized = normalize_alias(alias)
|
||||
if normalized:
|
||||
alias_index.setdefault(normalized, entry)
|
||||
base_name_alias = normalize_alias(entry.get("label"))
|
||||
if base_name_alias:
|
||||
alias_index.setdefault(base_name_alias, entry)
|
||||
for signature in entry.get("signatures", []) or []:
|
||||
signature_index.setdefault(signature.lower(), entry)
|
||||
return alias_index, signature_index
|
||||
|
||||
|
||||
def ensure_list(container: Dict, key: str) -> List:
|
||||
value = container.get(key)
|
||||
if isinstance(value, list):
|
||||
return value
|
||||
value = []
|
||||
container[key] = value
|
||||
return value
|
||||
|
||||
|
||||
def merge_sorted_unique(values: Iterable[int]) -> List[int]:
|
||||
return sorted({int(v) for v in values if isinstance(v, int)})
|
||||
|
||||
|
||||
def normalize_source_path(pdf_path: Optional[str]) -> Optional[str]:
|
||||
if not pdf_path:
|
||||
return None
|
||||
try:
|
||||
source = Path(pdf_path)
|
||||
rel = source.relative_to(REPO_ROOT)
|
||||
except Exception:
|
||||
rel = Path(pdf_path)
|
||||
return str(rel).replace("\\", "/")
|
||||
|
||||
|
||||
def update_library(
|
||||
signatures_dir: Path, index_path: Path, apply_changes: bool
|
||||
) -> Tuple[int, int, List[Tuple[str, Path]]]:
|
||||
entries = load_json(index_path)
|
||||
alias_index, signature_index = make_alias_index(entries)
|
||||
|
||||
modifications = 0
|
||||
updated_entries = set()
|
||||
unmatched: List[Tuple[str, Path]] = []
|
||||
|
||||
signature_files = sorted(signatures_dir.glob("*.json"))
|
||||
if not signature_files:
|
||||
print(f"No signature JSON files found under {signatures_dir}", file=sys.stderr)
|
||||
return 0, 0, unmatched
|
||||
|
||||
for sig_file in signature_files:
|
||||
for font in iter_signature_fonts(sig_file):
|
||||
signature = font["signature"]
|
||||
norm_signature = signature.lower() if signature else None
|
||||
alias = font["alias"]
|
||||
|
||||
entry = None
|
||||
if norm_signature and norm_signature in signature_index:
|
||||
entry = signature_index[norm_signature]
|
||||
elif alias and alias in alias_index:
|
||||
entry = alias_index[alias]
|
||||
|
||||
if entry is None:
|
||||
unmatched.append((font.get("baseName") or font.get("alias_raw") or "unknown", sig_file))
|
||||
continue
|
||||
|
||||
entry_modified = False
|
||||
|
||||
# Signatures
|
||||
if signature:
|
||||
signature_list = ensure_list(entry, "signatures")
|
||||
if signature not in signature_list:
|
||||
signature_list.append(signature)
|
||||
entry_modified = True
|
||||
signature_index[signature.lower()] = entry
|
||||
|
||||
# Aliases
|
||||
alias_raw = font.get("alias_raw")
|
||||
if alias_raw:
|
||||
aliases = ensure_list(entry, "aliases")
|
||||
if alias_raw not in aliases:
|
||||
aliases.append(alias_raw)
|
||||
entry_modified = True
|
||||
normalized = normalize_alias(alias_raw)
|
||||
if normalized:
|
||||
alias_index.setdefault(normalized, entry)
|
||||
|
||||
# Glyph coverage
|
||||
coverage = font.get("glyphCoverage") or []
|
||||
if coverage:
|
||||
existing = set(entry.get("glyphCoverage", []))
|
||||
merged = merge_sorted_unique(list(existing) + coverage)
|
||||
if merged != entry.get("glyphCoverage"):
|
||||
entry["glyphCoverage"] = merged
|
||||
entry_modified = True
|
||||
|
||||
# Source PDF
|
||||
pdf_source = normalize_source_path(font.get("pdf"))
|
||||
if pdf_source and not entry.get("source"):
|
||||
entry["source"] = pdf_source
|
||||
entry_modified = True
|
||||
|
||||
if entry_modified:
|
||||
modifications += 1
|
||||
updated_entries.add(entry.get("id", "<unknown>"))
|
||||
|
||||
if apply_changes and modifications > 0:
|
||||
dump_json(index_path, entries)
|
||||
|
||||
return modifications, len(updated_entries), unmatched
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description="Update Type3 library index using signature dumps.")
|
||||
parser.add_argument(
|
||||
"--signatures-dir",
|
||||
type=Path,
|
||||
default=DEFAULT_SIGNATURES,
|
||||
help=f"Directory containing signature JSON files (default: {DEFAULT_SIGNATURES})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--index",
|
||||
type=Path,
|
||||
default=DEFAULT_INDEX,
|
||||
help=f"Path to type3/library/index.json (default: {DEFAULT_INDEX})",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--apply",
|
||||
action="store_true",
|
||||
help="Write changes back to the index file. Without this flag the script runs in dry-run mode.",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = parse_args()
|
||||
signatures_dir = args.signatures_dir if args.signatures_dir.is_absolute() else (REPO_ROOT / args.signatures_dir)
|
||||
index_path = args.index if args.index.is_absolute() else (REPO_ROOT / args.index)
|
||||
|
||||
if not signatures_dir.exists():
|
||||
print(f"Signature directory not found: {signatures_dir}", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
if not index_path.exists():
|
||||
print(f"Index file not found: {index_path}", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
modifications, updated_entries, unmatched = update_library(
|
||||
signatures_dir, index_path, apply_changes=args.apply
|
||||
)
|
||||
|
||||
mode = "APPLIED" if args.apply else "DRY-RUN"
|
||||
print(
|
||||
f"[{mode}] Processed signatures under {signatures_dir}. "
|
||||
f"Updated entries: {updated_entries}, individual modifications: {modifications}."
|
||||
)
|
||||
|
||||
if unmatched:
|
||||
print("\nUnmatched fonts (no library entry yet):")
|
||||
for alias, sig_file in unmatched:
|
||||
print(f" - {alias} (from {sig_file})")
|
||||
print("Add these fonts to index.json with the proper payload before rerunning.")
|
||||
|
||||
if modifications == 0:
|
||||
print("No changes detected; index.json already matches captured signatures.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Reference in New Issue
Block a user