Package reportlab :: Package pdfbase :: Module pdfutils
[frames] | no frames]

Source Code for Module reportlab.pdfbase.pdfutils

  1  #Copyright ReportLab Europe Ltd. 2000-2004 
  2  #see license.txt for license details 
  3  #history http://www.reportlab.co.uk/cgi-bin/viewcvs.cgi/public/reportlab/trunk/reportlab/pdfbase/pdfutils.py 
  4  __version__=''' $Id: pdfutils.py 3660 2010-02-08 18:17:33Z damian $ ''' 
  5  __doc__='' 
  6  # pdfutils.py - everything to do with images, streams, 
  7  # compression, and some constants 
  8   
  9  import os 
 10  from reportlab import rl_config 
 11  from reportlab.lib.utils import getStringIO, ImageReader 
 12   
 13  LINEEND = '\015\012' 
 14   
15 -def _chunker(src,dst=[],chunkSize=60):
16 for i in xrange(0,len(src),chunkSize): 17 dst.append(src[i:i+chunkSize]) 18 return dst
19 20 ########################################################## 21 # 22 # Image compression helpers. Preprocessing a directory 23 # of images will offer a vast speedup. 24 # 25 ########################################################## 26 _mode2cs = {'RGB':'RGB', 'CMYK': 'CMYK', 'L': 'G'} 27 _mode2bpp = {'RGB': 3, 'CMYK':4, 'L':1}
28 -def makeA85Image(filename,IMG=None):
29 import zlib 30 img = ImageReader(filename) 31 if IMG is not None: IMG.append(img) 32 33 imgwidth, imgheight = img.getSize() 34 raw = img.getRGBData() 35 36 code = [] 37 append = code.append 38 # this describes what is in the image itself 39 append('BI') 40 append('/W %s /H %s /BPC 8 /CS /%s /F [/A85 /Fl]' % (imgwidth, imgheight,_mode2cs[img.mode])) 41 append('ID') 42 #use a flate filter and Ascii Base 85 43 assert len(raw) == imgwidth * imgheight*_mode2bpp[img.mode], "Wrong amount of data for image" 44 compressed = zlib.compress(raw) #this bit is very fast... 45 encoded = _AsciiBase85Encode(compressed) #...sadly this may not be 46 47 #append in blocks of 60 characters 48 _chunker(encoded,code) 49 50 append('EI') 51 return code
52
53 -def cacheImageFile(filename, returnInMemory=0, IMG=None):
54 "Processes image as if for encoding, saves to a file with .a85 extension." 55 56 cachedname = os.path.splitext(filename)[0] + '.a85' 57 if filename==cachedname: 58 if cachedImageExists(filename): 59 from reportlab.lib.utils import open_for_read 60 if returnInMemory: return filter(None,open_for_read(cachedname).read().split(LINEEND)) 61 else: 62 raise IOError, 'No such cached image %s' % filename 63 else: 64 code = makeA85Image(filename,IMG) 65 if returnInMemory: return code 66 67 #save it to a file 68 f = open(cachedname,'wb') 69 f.write(LINEEND.join(code)+LINEEND) 70 f.close() 71 if rl_config.verbose: 72 print 'cached image as %s' % cachedname
73 74
75 -def preProcessImages(spec):
76 """Preprocesses one or more image files. 77 78 Accepts either a filespec ('C:\mydir\*.jpg') or a list 79 of image filenames, crunches them all to save time. Run this 80 to save huge amounts of time when repeatedly building image 81 documents.""" 82 83 import types, glob 84 85 if type(spec) is types.StringType: 86 filelist = glob.glob(spec) 87 else: #list or tuple OK 88 filelist = spec 89 90 for filename in filelist: 91 if cachedImageExists(filename): 92 if rl_config.verbose: 93 print 'cached version of %s already exists' % filename 94 else: 95 cacheImageFile(filename)
96 97
98 -def cachedImageExists(filename):
99 """Determines if a cached image already exists for a given file. 100 101 Determines if a cached image exists which has the same name 102 and equal or newer date to the given file.""" 103 cachedname = os.path.splitext(filename)[0] + '.a85' 104 if os.path.isfile(cachedname): 105 #see if it is newer 106 original_date = os.stat(filename)[8] 107 cached_date = os.stat(cachedname)[8] 108 if original_date > cached_date: 109 return 0 110 else: 111 return 1 112 else: 113 return 0
114 115 116 ############################################################## 117 # 118 # PDF Helper functions 119 # 120 ############################################################## 121 122 try: 123 from _rl_accel import escapePDF, _instanceEscapePDF 124 _escape = escapePDF 125 except ImportError: 126 try: 127 from reportlab.lib._rl_accel import escapePDF, _instanceEscapePDF 128 _escape = escapePDF 129 except ImportError: 130 _instanceEscapePDF=None 131 if rl_config.sys_version>='2.1': 132 _ESCAPEDICT={} 133 for c in xrange(0,256): 134 if c<32 or c>=127: 135 _ESCAPEDICT[chr(c)]= '\\%03o' % c 136 elif c in (ord('\\'),ord('('),ord(')')): 137 _ESCAPEDICT[chr(c)] = '\\'+chr(c) 138 else: 139 _ESCAPEDICT[chr(c)] = chr(c) 140 del c 141 #Michael Hudson donated this
142 - def _escape(s):
143 return ''.join(map(lambda c, d=_ESCAPEDICT: d[c],s))
144 else:
145 - def _escape(s):
146 """Escapes some PDF symbols (in fact, parenthesis). 147 PDF escapes are almost like Python ones, but brackets 148 need slashes before them too. Uses Python's repr function 149 and chops off the quotes first.""" 150 return repr(s)[1:-1].replace('(','\(').replace(')','\)')
151
152 -def _normalizeLineEnds(text,desired=LINEEND,unlikely='\000\001\002\003'):
153 """Normalizes different line end character(s). 154 155 Ensures all instances of CR, LF and CRLF end up as 156 the specified one.""" 157 158 return (text 159 .replace('\015\012', unlikely) 160 .replace('\015', unlikely) 161 .replace(text, '\012', unlikely) 162 .replace(text, unlikely, desired))
163 164
165 -def _AsciiHexEncode(input):
166 """Encodes input using ASCII-Hex coding. 167 168 This is a verbose encoding used for binary data within 169 a PDF file. One byte binary becomes two bytes of ASCII. 170 Helper function used by images.""" 171 output = getStringIO() 172 for char in input: 173 output.write('%02x' % ord(char)) 174 output.write('>') 175 return output.getvalue()
176 177
178 -def _AsciiHexDecode(input):
179 """Decodes input using ASCII-Hex coding. 180 181 Not used except to provide a test of the inverse function.""" 182 183 #strip out all whitespace 184 stripped = ''.join(input.split()) 185 assert stripped[-1] == '>', 'Invalid terminator for Ascii Hex Stream' 186 stripped = stripped[:-1] #chop off terminator 187 assert len(stripped) % 2 == 0, 'Ascii Hex stream has odd number of bytes' 188 189 return ''.join([chr(int(stripped[i:i+2],16)) for i in xrange(0,len(stripped),2)])
190 191 if 1: # for testing always define this
192 - def _AsciiBase85EncodePYTHON(input):
193 """Encodes input using ASCII-Base85 coding. 194 195 This is a compact encoding used for binary data within 196 a PDF file. Four bytes of binary data become five bytes of 197 ASCII. This is the default method used for encoding images.""" 198 # special rules apply if not a multiple of four bytes. 199 whole_word_count, remainder_size = divmod(len(input), 4) 200 cut = 4 * whole_word_count 201 body, lastbit = input[0:cut], input[cut:] 202 203 out = [].append 204 for i in xrange(whole_word_count): 205 offset = i*4 206 b1 = ord(body[offset]) 207 b2 = ord(body[offset+1]) 208 b3 = ord(body[offset+2]) 209 b4 = ord(body[offset+3]) 210 211 if b1<128: 212 num = (((((b1<<8)|b2)<<8)|b3)<<8)|b4 213 else: 214 num = 16777216L * b1 + 65536 * b2 + 256 * b3 + b4 215 216 if num == 0: 217 #special case 218 out('z') 219 else: 220 #solve for five base-85 numbers 221 temp, c5 = divmod(num, 85) 222 temp, c4 = divmod(temp, 85) 223 temp, c3 = divmod(temp, 85) 224 c1, c2 = divmod(temp, 85) 225 assert ((85**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85*c4) + c5 == num, 'dodgy code!' 226 out(chr(c1+33)) 227 out(chr(c2+33)) 228 out(chr(c3+33)) 229 out(chr(c4+33)) 230 out(chr(c5+33)) 231 232 # now we do the final bit at the end. I repeated this separately as 233 # the loop above is the time-critical part of a script, whereas this 234 # happens only once at the end. 235 236 #encode however many bytes we have as usual 237 if remainder_size > 0: 238 while len(lastbit) < 4: 239 lastbit = lastbit + '\000' 240 b1 = ord(lastbit[0]) 241 b2 = ord(lastbit[1]) 242 b3 = ord(lastbit[2]) 243 b4 = ord(lastbit[3]) 244 245 num = 16777216L * b1 + 65536 * b2 + 256 * b3 + b4 246 247 #solve for c1..c5 248 temp, c5 = divmod(num, 85) 249 temp, c4 = divmod(temp, 85) 250 temp, c3 = divmod(temp, 85) 251 c1, c2 = divmod(temp, 85) 252 253 #print 'encoding: %d %d %d %d -> %d -> %d %d %d %d %d' % ( 254 # b1,b2,b3,b4,num,c1,c2,c3,c4,c5) 255 lastword = chr(c1+33) + chr(c2+33) + chr(c3+33) + chr(c4+33) + chr(c5+33) 256 #write out most of the bytes. 257 out(lastword[0:remainder_size + 1]) 258 259 #terminator code for ascii 85 260 out('~>') 261 return ''.join(out.__self__)
262
263 - def _AsciiBase85DecodePYTHON(input):
264 """Decodes input using ASCII-Base85 coding. 265 266 This is not used - Acrobat Reader decodes for you 267 - but a round trip is essential for testing.""" 268 #strip all whitespace 269 stripped = ''.join(input.split()) 270 #check end 271 assert stripped[-2:] == '~>', 'Invalid terminator for Ascii Base 85 Stream' 272 stripped = stripped[:-2] #chop off terminator 273 274 #may have 'z' in it which complicates matters - expand them 275 stripped = stripped.replace('z','!!!!!') 276 # special rules apply if not a multiple of five bytes. 277 whole_word_count, remainder_size = divmod(len(stripped), 5) 278 #print '%d words, %d leftover' % (whole_word_count, remainder_size) 279 #assert remainder_size != 1, 'invalid Ascii 85 stream!' 280 cut = 5 * whole_word_count 281 body, lastbit = stripped[0:cut], stripped[cut:] 282 283 out = [].append 284 for i in xrange(whole_word_count): 285 offset = i*5 286 c1 = ord(body[offset]) - 33 287 c2 = ord(body[offset+1]) - 33 288 c3 = ord(body[offset+2]) - 33 289 c4 = ord(body[offset+3]) - 33 290 c5 = ord(body[offset+4]) - 33 291 292 num = ((85L**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85*c4) + c5 293 294 temp, b4 = divmod(num,256) 295 temp, b3 = divmod(temp,256) 296 b1, b2 = divmod(temp, 256) 297 298 assert num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!' 299 out(chr(b1)) 300 out(chr(b2)) 301 out(chr(b3)) 302 out(chr(b4)) 303 304 #decode however many bytes we have as usual 305 if remainder_size > 0: 306 while len(lastbit) < 5: 307 lastbit = lastbit + '!' 308 c1 = ord(lastbit[0]) - 33 309 c2 = ord(lastbit[1]) - 33 310 c3 = ord(lastbit[2]) - 33 311 c4 = ord(lastbit[3]) - 33 312 c5 = ord(lastbit[4]) - 33 313 num = (((85*c1+c2)*85+c3)*85+c4)*85L + (c5 314 +(0,0,0xFFFFFF,0xFFFF,0xFF)[remainder_size]) 315 temp, b4 = divmod(num,256) 316 temp, b3 = divmod(temp,256) 317 b1, b2 = divmod(temp, 256) 318 assert num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!' 319 #print 'decoding: %d %d %d %d %d -> %d -> %d %d %d %d' % ( 320 # c1,c2,c3,c4,c5,num,b1,b2,b3,b4) 321 322 #the last character needs 1 adding; the encoding loses 323 #data by rounding the number to x bytes, and when 324 #divided repeatedly we get one less 325 if remainder_size == 2: 326 lastword = chr(b1) 327 elif remainder_size == 3: 328 lastword = chr(b1) + chr(b2) 329 elif remainder_size == 4: 330 lastword = chr(b1) + chr(b2) + chr(b3) 331 else: 332 lastword = '' 333 out(lastword) 334 335 #terminator code for ascii 85 336 return ''.join(out.__self__)
337 338 try: 339 from _rl_accel import _AsciiBase85Encode # builtin or on the path 340 except ImportError: 341 try: 342 from reportlab.lib._rl_accel import _AsciiBase85Encode # where we think it should be 343 except ImportError: 344 _AsciiBase85Encode = _AsciiBase85EncodePYTHON 345 346 try: 347 from _rl_accel import _AsciiBase85Decode # builtin or on the path 348 except ImportError: 349 try: 350 from reportlab.lib._rl_accel import _AsciiBase85Decode # where we think it should be 351 except ImportError: 352 _AsciiBase85Decode = _AsciiBase85DecodePYTHON 353
354 -def _wrap(input, columns=60):
355 "Wraps input at a given column size by inserting LINEEND characters." 356 output = [] 357 length = len(input) 358 i = 0 359 pos = columns * i 360 while pos < length: 361 output.append(input[pos:pos+columns]) 362 i = i + 1 363 pos = columns * i 364 #avoid HP printer problem 365 if len(output[-1])==1: 366 output[-2:] = [output[-2][:-1],output[-2][-1]+output[-1]] 367 return LINEEND.join(output)
368 369 370 ######################################################################### 371 # 372 # JPEG processing code - contributed by Eric Johnson 373 # 374 ######################################################################### 375 376 # Read data from the JPEG file. We should probably be using PIL to 377 # get this information for us -- but this way is more fun! 378 # Returns (width, height, color components) as a triple 379 # This is based on Thomas Merz's code from GhostScript (viewjpeg.ps)
380 -def readJPEGInfo(image):
381 "Read width, height and number of components from open JPEG file." 382 383 import struct 384 from pdfdoc import PDFError 385 386 #Acceptable JPEG Markers: 387 # SROF0=baseline, SOF1=extended sequential or SOF2=progressive 388 validMarkers = [0xC0, 0xC1, 0xC2] 389 390 #JPEG markers without additional parameters 391 noParamMarkers = \ 392 [ 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0x01 ] 393 394 #Unsupported JPEG Markers 395 unsupportedMarkers = \ 396 [ 0xC3, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCD, 0xCE, 0xCF ] 397 398 #read JPEG marker segments until we find SOFn marker or EOF 399 done = 0 400 while not done: 401 x = struct.unpack('B', image.read(1)) 402 if x[0] == 0xFF: #found marker 403 x = struct.unpack('B', image.read(1)) 404 #print "Marker: ", '%0.2x' % x[0] 405 #check marker type is acceptable and process it 406 if x[0] in validMarkers: 407 image.seek(2, 1) #skip segment length 408 x = struct.unpack('B', image.read(1)) #data precision 409 if x[0] != 8: 410 raise PDFError('JPEG must have 8 bits per component') 411 y = struct.unpack('BB', image.read(2)) 412 height = (y[0] << 8) + y[1] 413 y = struct.unpack('BB', image.read(2)) 414 width = (y[0] << 8) + y[1] 415 y = struct.unpack('B', image.read(1)) 416 color = y[0] 417 return width, height, color 418 elif x[0] in unsupportedMarkers: 419 raise PDFError('JPEG Unsupported JPEG marker: %0.2x' % x[0]) 420 elif x[0] not in noParamMarkers: 421 #skip segments with parameters 422 #read length and skip the data 423 x = struct.unpack('BB', image.read(2)) 424 image.seek( (x[0] << 8) + x[1] - 2, 1)
425
426 -class _fusc:
427 - def __init__(self,k, n):
428 assert k, 'Argument k should be a non empty string' 429 self._k = k 430 self._klen = len(k) 431 self._n = int(n) or 7
432
433 - def encrypt(self,s):
434 return self.__rotate(_AsciiBase85Encode(''.join(map(chr,self.__fusc(map(ord,s))))),self._n)
435
436 - def decrypt(self,s):
437 return ''.join(map(chr,self.__fusc(map(ord,_AsciiBase85Decode(self.__rotate(s,-self._n))))))
438
439 - def __rotate(self,s,n):
440 l = len(s) 441 if n<0: n = l+n 442 n %= l 443 if not n: return s 444 return s[-n:]+s[:l-n]
445
446 - def __fusc(self,s):
447 slen = len(s) 448 return map(lambda x,y: x ^ y,s,map(ord,((int(slen/self._klen)+1)*self._k)[:slen]))
449