1
2
3
4 __version__=''' $Id: pdfutils.py 3660 2010-02-08 18:17:33Z damian $ '''
5 __doc__=''
6
7
8
9 import os
10 from reportlab import rl_config
11 from reportlab.lib.utils import getStringIO, ImageReader
12
13 LINEEND = '\015\012'
14
16 for i in xrange(0,len(src),chunkSize):
17 dst.append(src[i:i+chunkSize])
18 return dst
19
20
21
22
23
24
25
26 _mode2cs = {'RGB':'RGB', 'CMYK': 'CMYK', 'L': 'G'}
27 _mode2bpp = {'RGB': 3, 'CMYK':4, 'L':1}
29 import zlib
30 img = ImageReader(filename)
31 if IMG is not None: IMG.append(img)
32
33 imgwidth, imgheight = img.getSize()
34 raw = img.getRGBData()
35
36 code = []
37 append = code.append
38
39 append('BI')
40 append('/W %s /H %s /BPC 8 /CS /%s /F [/A85 /Fl]' % (imgwidth, imgheight,_mode2cs[img.mode]))
41 append('ID')
42
43 assert len(raw) == imgwidth * imgheight*_mode2bpp[img.mode], "Wrong amount of data for image"
44 compressed = zlib.compress(raw)
45 encoded = _AsciiBase85Encode(compressed)
46
47
48 _chunker(encoded,code)
49
50 append('EI')
51 return code
52
73
74
76 """Preprocesses one or more image files.
77
78 Accepts either a filespec ('C:\mydir\*.jpg') or a list
79 of image filenames, crunches them all to save time. Run this
80 to save huge amounts of time when repeatedly building image
81 documents."""
82
83 import types, glob
84
85 if type(spec) is types.StringType:
86 filelist = glob.glob(spec)
87 else:
88 filelist = spec
89
90 for filename in filelist:
91 if cachedImageExists(filename):
92 if rl_config.verbose:
93 print 'cached version of %s already exists' % filename
94 else:
95 cacheImageFile(filename)
96
97
99 """Determines if a cached image already exists for a given file.
100
101 Determines if a cached image exists which has the same name
102 and equal or newer date to the given file."""
103 cachedname = os.path.splitext(filename)[0] + '.a85'
104 if os.path.isfile(cachedname):
105
106 original_date = os.stat(filename)[8]
107 cached_date = os.stat(cachedname)[8]
108 if original_date > cached_date:
109 return 0
110 else:
111 return 1
112 else:
113 return 0
114
115
116
117
118
119
120
121
122 try:
123 from _rl_accel import escapePDF, _instanceEscapePDF
124 _escape = escapePDF
125 except ImportError:
126 try:
127 from reportlab.lib._rl_accel import escapePDF, _instanceEscapePDF
128 _escape = escapePDF
129 except ImportError:
130 _instanceEscapePDF=None
131 if rl_config.sys_version>='2.1':
132 _ESCAPEDICT={}
133 for c in xrange(0,256):
134 if c<32 or c>=127:
135 _ESCAPEDICT[chr(c)]= '\\%03o' % c
136 elif c in (ord('\\'),ord('('),ord(')')):
137 _ESCAPEDICT[chr(c)] = '\\'+chr(c)
138 else:
139 _ESCAPEDICT[chr(c)] = chr(c)
140 del c
141
143 return ''.join(map(lambda c, d=_ESCAPEDICT: d[c],s))
144 else:
146 """Escapes some PDF symbols (in fact, parenthesis).
147 PDF escapes are almost like Python ones, but brackets
148 need slashes before them too. Uses Python's repr function
149 and chops off the quotes first."""
150 return repr(s)[1:-1].replace('(','\(').replace(')','\)')
151
153 """Normalizes different line end character(s).
154
155 Ensures all instances of CR, LF and CRLF end up as
156 the specified one."""
157
158 return (text
159 .replace('\015\012', unlikely)
160 .replace('\015', unlikely)
161 .replace(text, '\012', unlikely)
162 .replace(text, unlikely, desired))
163
164
166 """Encodes input using ASCII-Hex coding.
167
168 This is a verbose encoding used for binary data within
169 a PDF file. One byte binary becomes two bytes of ASCII.
170 Helper function used by images."""
171 output = getStringIO()
172 for char in input:
173 output.write('%02x' % ord(char))
174 output.write('>')
175 return output.getvalue()
176
177
179 """Decodes input using ASCII-Hex coding.
180
181 Not used except to provide a test of the inverse function."""
182
183
184 stripped = ''.join(input.split())
185 assert stripped[-1] == '>', 'Invalid terminator for Ascii Hex Stream'
186 stripped = stripped[:-1]
187 assert len(stripped) % 2 == 0, 'Ascii Hex stream has odd number of bytes'
188
189 return ''.join([chr(int(stripped[i:i+2],16)) for i in xrange(0,len(stripped),2)])
190
191 if 1:
193 """Encodes input using ASCII-Base85 coding.
194
195 This is a compact encoding used for binary data within
196 a PDF file. Four bytes of binary data become five bytes of
197 ASCII. This is the default method used for encoding images."""
198
199 whole_word_count, remainder_size = divmod(len(input), 4)
200 cut = 4 * whole_word_count
201 body, lastbit = input[0:cut], input[cut:]
202
203 out = [].append
204 for i in xrange(whole_word_count):
205 offset = i*4
206 b1 = ord(body[offset])
207 b2 = ord(body[offset+1])
208 b3 = ord(body[offset+2])
209 b4 = ord(body[offset+3])
210
211 if b1<128:
212 num = (((((b1<<8)|b2)<<8)|b3)<<8)|b4
213 else:
214 num = 16777216L * b1 + 65536 * b2 + 256 * b3 + b4
215
216 if num == 0:
217
218 out('z')
219 else:
220
221 temp, c5 = divmod(num, 85)
222 temp, c4 = divmod(temp, 85)
223 temp, c3 = divmod(temp, 85)
224 c1, c2 = divmod(temp, 85)
225 assert ((85**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85*c4) + c5 == num, 'dodgy code!'
226 out(chr(c1+33))
227 out(chr(c2+33))
228 out(chr(c3+33))
229 out(chr(c4+33))
230 out(chr(c5+33))
231
232
233
234
235
236
237 if remainder_size > 0:
238 while len(lastbit) < 4:
239 lastbit = lastbit + '\000'
240 b1 = ord(lastbit[0])
241 b2 = ord(lastbit[1])
242 b3 = ord(lastbit[2])
243 b4 = ord(lastbit[3])
244
245 num = 16777216L * b1 + 65536 * b2 + 256 * b3 + b4
246
247
248 temp, c5 = divmod(num, 85)
249 temp, c4 = divmod(temp, 85)
250 temp, c3 = divmod(temp, 85)
251 c1, c2 = divmod(temp, 85)
252
253
254
255 lastword = chr(c1+33) + chr(c2+33) + chr(c3+33) + chr(c4+33) + chr(c5+33)
256
257 out(lastword[0:remainder_size + 1])
258
259
260 out('~>')
261 return ''.join(out.__self__)
262
264 """Decodes input using ASCII-Base85 coding.
265
266 This is not used - Acrobat Reader decodes for you
267 - but a round trip is essential for testing."""
268
269 stripped = ''.join(input.split())
270
271 assert stripped[-2:] == '~>', 'Invalid terminator for Ascii Base 85 Stream'
272 stripped = stripped[:-2]
273
274
275 stripped = stripped.replace('z','!!!!!')
276
277 whole_word_count, remainder_size = divmod(len(stripped), 5)
278
279
280 cut = 5 * whole_word_count
281 body, lastbit = stripped[0:cut], stripped[cut:]
282
283 out = [].append
284 for i in xrange(whole_word_count):
285 offset = i*5
286 c1 = ord(body[offset]) - 33
287 c2 = ord(body[offset+1]) - 33
288 c3 = ord(body[offset+2]) - 33
289 c4 = ord(body[offset+3]) - 33
290 c5 = ord(body[offset+4]) - 33
291
292 num = ((85L**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85*c4) + c5
293
294 temp, b4 = divmod(num,256)
295 temp, b3 = divmod(temp,256)
296 b1, b2 = divmod(temp, 256)
297
298 assert num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!'
299 out(chr(b1))
300 out(chr(b2))
301 out(chr(b3))
302 out(chr(b4))
303
304
305 if remainder_size > 0:
306 while len(lastbit) < 5:
307 lastbit = lastbit + '!'
308 c1 = ord(lastbit[0]) - 33
309 c2 = ord(lastbit[1]) - 33
310 c3 = ord(lastbit[2]) - 33
311 c4 = ord(lastbit[3]) - 33
312 c5 = ord(lastbit[4]) - 33
313 num = (((85*c1+c2)*85+c3)*85+c4)*85L + (c5
314 +(0,0,0xFFFFFF,0xFFFF,0xFF)[remainder_size])
315 temp, b4 = divmod(num,256)
316 temp, b3 = divmod(temp,256)
317 b1, b2 = divmod(temp, 256)
318 assert num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!'
319
320
321
322
323
324
325 if remainder_size == 2:
326 lastword = chr(b1)
327 elif remainder_size == 3:
328 lastword = chr(b1) + chr(b2)
329 elif remainder_size == 4:
330 lastword = chr(b1) + chr(b2) + chr(b3)
331 else:
332 lastword = ''
333 out(lastword)
334
335
336 return ''.join(out.__self__)
337
338 try:
339 from _rl_accel import _AsciiBase85Encode
340 except ImportError:
341 try:
342 from reportlab.lib._rl_accel import _AsciiBase85Encode
343 except ImportError:
344 _AsciiBase85Encode = _AsciiBase85EncodePYTHON
345
346 try:
347 from _rl_accel import _AsciiBase85Decode
348 except ImportError:
349 try:
350 from reportlab.lib._rl_accel import _AsciiBase85Decode
351 except ImportError:
352 _AsciiBase85Decode = _AsciiBase85DecodePYTHON
353
354 -def _wrap(input, columns=60):
355 "Wraps input at a given column size by inserting LINEEND characters."
356 output = []
357 length = len(input)
358 i = 0
359 pos = columns * i
360 while pos < length:
361 output.append(input[pos:pos+columns])
362 i = i + 1
363 pos = columns * i
364
365 if len(output[-1])==1:
366 output[-2:] = [output[-2][:-1],output[-2][-1]+output[-1]]
367 return LINEEND.join(output)
368
369
370
371
372
373
374
375
376
377
378
379
381 "Read width, height and number of components from open JPEG file."
382
383 import struct
384 from pdfdoc import PDFError
385
386
387
388 validMarkers = [0xC0, 0xC1, 0xC2]
389
390
391 noParamMarkers = \
392 [ 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0x01 ]
393
394
395 unsupportedMarkers = \
396 [ 0xC3, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCD, 0xCE, 0xCF ]
397
398
399 done = 0
400 while not done:
401 x = struct.unpack('B', image.read(1))
402 if x[0] == 0xFF:
403 x = struct.unpack('B', image.read(1))
404
405
406 if x[0] in validMarkers:
407 image.seek(2, 1)
408 x = struct.unpack('B', image.read(1))
409 if x[0] != 8:
410 raise PDFError('JPEG must have 8 bits per component')
411 y = struct.unpack('BB', image.read(2))
412 height = (y[0] << 8) + y[1]
413 y = struct.unpack('BB', image.read(2))
414 width = (y[0] << 8) + y[1]
415 y = struct.unpack('B', image.read(1))
416 color = y[0]
417 return width, height, color
418 elif x[0] in unsupportedMarkers:
419 raise PDFError('JPEG Unsupported JPEG marker: %0.2x' % x[0])
420 elif x[0] not in noParamMarkers:
421
422
423 x = struct.unpack('BB', image.read(2))
424 image.seek( (x[0] << 8) + x[1] - 2, 1)
425
428 assert k, 'Argument k should be a non empty string'
429 self._k = k
430 self._klen = len(k)
431 self._n = int(n) or 7
432
434 return self.__rotate(_AsciiBase85Encode(''.join(map(chr,self.__fusc(map(ord,s))))),self._n)
435
437 return ''.join(map(chr,self.__fusc(map(ord,_AsciiBase85Decode(self.__rotate(s,-self._n))))))
438
440 l = len(s)
441 if n<0: n = l+n
442 n %= l
443 if not n: return s
444 return s[-n:]+s[:l-n]
445
447 slen = len(s)
448 return map(lambda x,y: x ^ y,s,map(ord,((int(slen/self._klen)+1)*self._k)[:slen]))
449