;;;; ;;;; Java 1.0 classfile parser/unparser. ;;;; ;;;; This is just the beginning; I'm letting it out now because ;;;; someone might have an actual use for it even as it is. ;;;; ;;;; Copyright 1997, 2002 by Darius Bacon ;;;; http://wry.me/~darius ;;;; ;;;; ;;;; This takes a classfile and parses its contents into a Scheme ;;;; datastructure with all the parts directly accessible; or conversely, ;;;; encodes such a structure as a classfile. ;;;; ;;;; The layouts are transcribed directly from the spec. We have a handler ;;;; for each primitive or compound datatype used; to parse or unparse a ;;;; structure, we interpret its layout using the handlers. ;;;; ;;;; 1 July 2002 update: ;;;; - added parse-attributes ;;;; - fixed a bug in constant-pool parsing ;;;; ;Types: ;u1 ;u2 ;u4 ;cx = u2 index into constant pool, with optional required-type list ;cp-array count-type element-type ;array count-type element-type ;record (name type)* ;union tag-type (name type)* ;also need: ; - converters for floats and doubles ; - new type: list of Type within byte array ; (or else just read it `from outside') ; - new type: maybe Type, else byte array ; - use the cx stuff in checking attributes, somehow ; consider attaching type data to all objects, at least logically... ; The readers and writers still have enough in common that ; further factoring may be worthwhile. ;; Unix specific: (define eof-object (call-with-input-file "/dev/null" read-char)) ;; UTS specific: ;(define error @error) (define (char->ascii c) (char->integer c)) (define (ascii->char a) (integer->char a)) ;; Or Scheme48/scsh: ; ,open ascii ;;; ;;; Utilities ;;; ;; Functional composition. (define (compose f g) (lambda (x) (f (g x)))) ;; Call (PROC k VEC[k]) for each k in VEC's domain in increasing order. (define (vector-for-each proc vec) (let loop ((i 0)) (cond ((< i (vector-length vec)) (proc i (vector-ref vec i)) (loop (+ i 1)))))) ;;; ;;; Primitive datatype converters. ;;; (define word32 (expt 2 32)) (define (u4->int u4) (if (< u4 (quotient word32 2)) u4 (- (- word32 u4)))) (define (int->u4 n) (if (<= 0 n) n (- word32 (- n)))) (define word64 (expt 2 64)) (define (u8-record->long u8-record) (let ((hi (record-ref u8-record 'high_bytes)) (lo (record-ref u8-record 'low_bytes))) (let ((u8 (+ (* word32 hi) lo))) (if (< u8 (quotient word64 2)) u8 (- (- word64 u8)))))) (define (long->u8-record n) (let ((u8 (if (<= 0 n) n (- word64 (- n))))) (let ((hi (quotient u8 word32)) (lo (remainder u8 word32))) (make-record 'high_bytes hi 'low_bytes lo)))) ;FIXME: doesn't even pretend to convert to a real number (define (u8-record->double u8-record) (let ((hi (record-ref u8-record 'high_bytes)) (lo (record-ref u8-record 'low_bytes))) (let ((u8 (+ (* word32 hi) lo))) (if (< u8 (quotient word64 2)) u8 (- (- word64 u8)))))) ;FIXME (define (double->u8-record n) (let ((u8 (if (<= 0 n) n (- word64 (- n))))) (let ((hi (quotient u8 word32)) (lo (remainder u8 word32))) (make-record 'high_bytes hi 'low_bytes lo)))) (define (utf8->string utf8) ; Actually this only works with all-ascii utf8's... (list->string (map ascii->char (vector->list utf8)))) (define (string->utf8 string) ; Ditto (list->vector (map char->ascii (string->list string)))) ;;; ;;; Fundamental I/O. ;;; ;; An input is a procedure of no arguments returning the next unsigned ;; byte, or eof. (define (port-input port) (lambda () (let ((c (read-char port))) (if (eof-object? c) c (char->ascii c))))) ;; An output is a procedure consuming an unsigned byte. (define (port-output port) (lambda (u1) (write-char (ascii->char u1) port))) (define (array-input byte-vector) (let ((i 0)) (lambda () (if (= i (vector-length byte-vector)) eof-object (let ((b (vector-ref byte-vector i))) (set! i (+ i 1)) b))))) ;;; ;;; Behavior for each type. ;;; ;;; Each datatype foo needs methods walk-foo, foo-reader, and foo-writer. ;;; In pseudo-ML notation, the types are: ;;; walk-foo: (type->type) * type -> type ;;; foo-reader: . args -> input -> parsed ;;; foo-writer: . args -> parsed * output -> unit ;;; ;;; (walk-foo fn type) replaces each type-slot T in TYPE with (FN T). ;;; (Nondestructively.) A type-slot of foo is a field of foo that is ;;; itself a type. ;;; ;;; foo-reader and foo-writer get called with an arg list consisting of ;;; the slots of a foo type, except that type-slots are replaced with ;;; reader/writer procedures, respectively. E.g. in foo-reader, a ;;; type-slot is replaced with an input->parsed procedure that builds ;;; an object of that type. ;;; (define (walk-atom fn type) type) ;; u1 (define walk-u1 walk-atom) (define (read-u1) (lambda (input) (input))) (define (write-u1) (lambda (u1 output) (output u1))) ;; u2 (define walk-u2 walk-atom) (define (read-u2) (lambda (input) (let* ((b1 (input)) (b0 (input))) (+ (* 256 b1) b0)))) (define (write-u2) (lambda (u2 output) (let ((b0 (remainder u2 256)) (b1 (quotient u2 256))) (output b1) (output b0)))) ;; u4 (define walk-u4 walk-atom) (define (read-u4) (lambda (input) (let* ((b3 (input)) (b2 (input)) (b1 (input)) (b0 (input))) (+ (* 256 (+ (* 256 (+ (* 256 b3) b2)) b1)) b0)))) (define (write-u4) (lambda (u4 output) (let* ((b0 (remainder u4 256)) (u3 (quotient u4 256)) (b1 (remainder u3 256)) (u2 (quotient u3 256)) (b2 (remainder u2 256)) (b3 (quotient u2 256))) (output b3) (output b2) (output b1) (output b0)))) ;; cx ;; Same as u2 index into the constant pool. ;; Later we'll probably support type restrictions. (define walk-cx walk-atom) (define (read-cx . args) (read-u2)) (define (write-cx . args) (write-u2)) ;; array (define (print x) (write x) (newline)) (define (read-vec read-count read-element input) (let loop ((count (read-count input))) (if (= 0 count) '() (let ((element (read-element input))) (cons element (loop (- count 1))))))) (define (write-vec write-count write-element array output) (write-count (vector-length array) output) (vector-for-each (lambda (index element) (write-element index element output)) array)) (define (walk-array fn array-type) `(array ,@(map fn (cdr array-type)))) (define (array-reader read-count read-element) (lambda (input) (list->vector (read-vec read-count read-element input)))) (define (array-writer write-count write-element) (lambda (array output) (write-vec write-count (lambda (index element output) (write-element element output)) array output))) ;; cp-array ;; ;; This is like `array', but with annoying little complications. The ;; name is for `constant pool array' since that's the only place these ;; complications turn up. They are: ;; - There is a 0th slot with no element in it. ;; - Certain elements take up 2 consecutive slots in the array. ;; - The count field gives the number of slots (not elements). ;; ;; The internal representation is as a vector with a place for each slot. ;; The 0th place has the special symbol *cp*. ;; The 2nd slot of a 2-slot element has the special value double-slot-tag. ;; In parsing, the 2-slot elements are noticed by a special-case hack. (define double-slot-tag (list 'two-slots)) (define (double-slot-value? element) (or (eqv? (union-tag element) Long) (eqv? (union-tag element) Double))) (define (walk-cp-array fn array-type) `(cp-array ,@(map fn (cdr array-type)))) (define (cp-array-reader read-count read-element) (lambda (input) (list->vector (cons '*cp* (let loop ((count (- (read-count input) 1))) (if (= 0 count) '() (let ((element (read-element input))) (if (double-slot-value? element) (cons (cadr element) (cons double-slot-tag (loop (- count 2)))) (cons element (loop (- count 1))))))))))) (define (cp-array-writer write-count write-element) (lambda (cp-array output) (write-count (vector-length cp-array) output) (vector-for-each (lambda (index element) (if (not (or (= index 0) (eq? element double-slot-tag))) (write-element element output))) cp-array))) ;; record (define (walk-record fn record-type) `(record ,@(map (lambda (field-spec) `(,(car field-spec) ,(fn (cadr field-spec)))) (cdr record-type)))) (define (record-reader . fields) (let ((names (map car fields)) (readers (map cadr fields))) (lambda (input) (let loop ((names names) (readers readers)) (if (null? names) '() (let ((element ((car readers) input))) (cons (list (car names) element) (loop (cdr names) (cdr readers))))))))) (define (record-writer . fields) (let ((names (map car fields)) (writers (map cadr fields))) (lambda (record output) (let loop ((names names) (writers writers)) (cond ((not (null? names)) ((car writers) (record-ref record (car names)) output) (loop (cdr names) (cdr writers)))))))) ;; Return the contents of the field named FIELD-NAME in RECORD. (define (record-ref record field-name) (cond ((assq field-name record) => cadr) (else (error "Bug: no such field" (list field-name record))))) ;; Return a new record from field-name/value pairs in property-list form. (define (make-record . pairs) (let loop ((pairs pairs)) (if (null? pairs) '() (cons (cons (car pairs) (cadr pairs)) (loop (cddr pairs)))))) ;; tagged union (define (walk-union fn union-type) `(union ,(fn (cadr union-type)) ,@(map (lambda (pair) `(,(car pair) ,(fn `(record ,@(cdr pair))))) (cddr union-type)))) (define (union-reader read-tag . alternatives) (lambda (input) (let ((tag (read-tag input))) (cond ((assv tag alternatives) => (lambda (pair) (cons (standard-tag-unparser tag) ((cadr pair) input)))) (else (error "Unknown tag in union-reader" tag)))))) (define (union-writer write-tag . alternatives) (lambda (union output) (let ((tag (car union)) (value (cdr union))) (write-tag tag output) (cond ((assv (standard-tag-parser tag) alternatives) => (lambda (pair) ((cadr pair) value output))) (else (error "Unknown tag in union-writer" tag)))))) (define (standard-tag-unparser x) x) (define (standard-tag-parser x) x) (define union-tag car) (define union-value cdr) (define make-union cons) ;; Fields wrapped with a parser/unparser pair (define (walk-wrap fn wrap-type) `(wrap ,(cadr wrap-type) ,(caddr wrap-type) ,(fn (cadddr wrap-type)))) (define (wrap-reader wrap unwrap read-type) (compose wrap read-type)) (define (wrap-writer wrap unwrap write-type) (lambda (wrapped output) (write-type (unwrap wrapped) output))) ;;; ;;; Generic operations ;;; (define type-dispatch (let ((u1 (vector walk-u1 read-u1 write-u1)) (u2 (vector walk-u2 read-u2 write-u2)) (u4 (vector walk-u4 read-u4 write-u4)) (cx (vector walk-cx read-cx write-cx)) (record (vector walk-record record-reader record-writer)) (union (vector walk-union union-reader union-writer)) (array (vector walk-array array-reader array-writer)) (cp-array (vector walk-cp-array cp-array-reader cp-array-writer)) (wrap (vector walk-wrap wrap-reader wrap-writer))) (lambda (type) (case type ((u1) u1) ((u2) u2) ((u4) u4) ((cx) cx) (else (case (car type) ((cx) cx) ((record) record) ((union) union) ((array) array) ((cp-array) cp-array) ((wrap) wrap) (else (error "Bug: unknown type" type)))))))) (define (dispatch-to-walker type) (vector-ref (type-dispatch type) 0)) (define (dispatch-to-reader type) (vector-ref (type-dispatch type) 1)) (define (dispatch-to-writer type) (vector-ref (type-dispatch type) 2)) (define (type-arguments type) (if (pair? type) (cdr type) '())) (define (walk-type fn type) ((dispatch-to-walker type) fn type)) (define margin 0) (define (indent delta) (do ((i margin (- i 1))) ((= 0 i)) (display #\space)) (set! margin (+ margin delta))) (define (report tag reader) (lambda (input) (indent 1) (print `(reading ,tag)) (let ((x (reader input))) (indent -1) (write tag) (write '===>) (write x) (newline) x))) (define (report tag reader) reader) (define (type-reader type) (report type (apply (dispatch-to-reader type) (type-arguments (walk-type type-reader type))))) (define (type-writer type) (apply (dispatch-to-writer type) (type-arguments (walk-type type-writer type)))) (define (read-thing type port) ;better name, please... ((type-reader type) (port-input port))) (define (write-thing thing type port) ((type-writer type) thing (port-output port))) ;;; ;;; Classfile structures ;;; (define Class 7) (define Fieldref 9) (define Methodref 10) (define InterfaceMethodref 11) (define String 8) (define Integer 3) (define Float 4) (define Long 5) (define Double 6) (define NameAndType 12) (define Utf8 1) (define attribute_info `(record (attribute_name_index (cx ,Utf8)) (info (array u4 u1)))) (define field_info `(record (access_flags u2) ;see classfile.c (name_index (cx ,Utf8)) (descriptor_index cx) ;Utf8 field descriptor (attributes (array u2 ,attribute_info)))) (define method_info `(record (access_flags u2) ;see classfile.c (name_index (cx ,Utf8)) (descriptor_index cx) ;Utf8 method descriptor (attributes (array u2 ,attribute_info)))) (define cp_info `(union u1 (,Class (name_index cx)) ;Utf8 classname (,Fieldref (class_index (cx ,Class)) (name_and_type_index (cx ,NameAndType))) (,Methodref (class_index (cx ,Class)) (name_and_type_index (cx ,NameAndType))) (,InterfaceMethodref (class_index (cx ,Class)) (name_and_type_index (cx ,NameAndType))) (,String (string_index (cx ,Utf8))) (,Integer (int (wrap ,u4->int ,int->u4 u4))) (,Float (bytes u4)) (,Long (long (wrap ,u8-record->long ,long->u8-record (record (high_bytes u4) (low_bytes u4))))) (,Double (double (wrap ,u8-record->double ,double->u8-record (record (high_bytes u4) (low_bytes u4))))) (,NameAndType (name_index (cx ,Utf8)) (descriptor_index cx)) ;Utf8 field/method descriptor (,Utf8 (string (wrap ,utf8->string ,string->utf8 (array u2 u1)))))) (define ClassFile `(record (magic u4) (minor_version u2) (major_version u2) (constant_pool (cp-array u2 ,cp_info)) (access_flags u2) ;see classfile.c (this_class cx) ;Class (super_class cx) ;Class or 0 (interfaces (array u2 u2)) (fields (array u2 ,field_info)) (methods (array u2 ,method_info)) (attributes (array u2 ,attribute_info)))) ;;; ;;; Attributes ;;; (define attribute-registry '()) (define (define-attribute-layout name layout) (set! attribute-registry (cons (cons name layout) attribute-registry))) (define (lookup-attribute name) (cond ((assoc name attribute-registry) => cdr) (else #f))) ; need a way to define constraints on type pointed to by cx... (define-attribute-layout "SourceFile" `(record (sourcefile_index (cx ,Utf8)))) (define-attribute-layout "ConstantValue" `(record (constantvalue_index cx))) ;type Entry Type ;long Long ;float Float ;double Double ;int,short,char,byte,boolean ; Integer ;String String (define-attribute-layout "Code" `(record (max_stack u2) (max_locals u2) (code (array u4 u1)) (exception_table (array u2 (record (start_pc u2) (end_pc u2) (handler_pc u2) (catch_type cx)))) ;Class, or 0 (attributes (array u2 ,attribute_info)))) (define-attribute-layout "Exceptions" `(record (exception_index_table (array u2 (cx ,Class))))) (define-attribute-layout "LineNumberTable" `(record (line_number_table (array u2 (record (start_pc u2) (line_number u2)))))) (define-attribute-layout "LocalVariableTable" `(record (local_variable_table (array u2 (record (start_pc u2) (length u2) (name_index (cx ,Utf8)) (descriptor_index cx) ;Utf8 var descriptor (index u2)))))) (define (get-constant-pool classfile) (cadr (assq 'constant_pool classfile))) (define (parse-attributes classfile) (let ((constant-pool (get-constant-pool classfile))) (define (index->string index) (let ((u (vector-ref constant-pool index))) (if (not (= Utf8 (union-tag u))) (error "Bad index for attribute name")) (record-ref (union-value u) 'string))) (define (parse-attribute attr) (let ((name (index->string (record-ref attr 'attribute_name_index))) (info (record-ref attr 'info))) (list name (cond ((lookup-attribute name) => (lambda (type) (walk ((type-reader type) (array-input info))))) (else info))))) (define (walk obj) (cond ((attribute-record? obj) (parse-attribute obj)) ((pair? obj) (map walk obj)) ((vector? obj) (vector-map walk obj)) (else obj))) (walk classfile))) (define (attribute-record? obj) (and (list? obj) (= 2 (length obj)) (pair? (car obj)) (pair? (cadr obj)) (eq? (caar obj) 'attribute_name_index) (eq? (caadr obj) 'info))) (define (vector-map fn vec) (let ((result (make-vector (vector-length vec) #f))) (do ((i (- (vector-length vec) 1) (- i 1))) ((< i 0) result) (vector-set! result i (fn (vector-ref vec i)))))) ;;; ;;; Classfile operations ;;; (define (read-classfile filename) (call-with-input-file filename (lambda (port) (read-thing ClassFile port)))) (define (write-classfile class-record filename) (call-with-output-file filename (lambda (port) (write-thing class-record ClassFile port)))) (define (copy-classfile infile outfile) (write-classfile (read-classfile infile) outfile))