Update of /project/movitz/cvsroot/movitz In directory clnet:/tmp/cvs-serv12893
Modified Files: asm-x86.lisp Log Message: Starting work on disassembler.
--- /project/movitz/cvsroot/movitz/asm-x86.lisp 2008/02/09 09:50:48 1.19 +++ /project/movitz/cvsroot/movitz/asm-x86.lisp 2008/02/13 21:46:51 1.20 @@ -6,7 +6,7 @@ ;;;; Author: Frode Vatvedt Fjeld frodef@acm.org ;;;; Distribution: See the accompanying file COPYING. ;;;; -;;;; $Id: asm-x86.lisp,v 1.19 2008/02/09 09:50:48 ffjeld Exp $ +;;;; $Id: asm-x86.lisp,v 1.20 2008/02/13 21:46:51 ffjeld Exp $ ;;;; ;;;;------------------------------------------------------------------
@@ -210,70 +210,161 @@ (length code))) code))))))
- -(defmacro define-operator (operator lambda-list &body body) +(defmacro define-operator (operator operator-mode lambda-list &body body) (check-type operator keyword) - (let ((defun-name (intern (format nil "~A-~A" 'instruction-encoder operator)))) - `(progn - (defun ,defun-name (operator legacy-prefixes ,@lambda-list) - (declare (ignorable operator legacy-prefixes)) - (let ((operator-mode nil) - (default-rex nil)) - (declare (ignorable operator-mode default-rex)) - (block operator - ,@body - (values nil 'fail)))) - (setf (gethash ',operator *instruction-encoders*) - ',defun-name) - ',operator))) + (labels ((find-forms (body) + (cond + ((atom body) + nil) + ((member (car body) '(reg-modrm)) + (list body)) + (t (mapcan #'find-forms body))))) + (let ((defun-name (intern (format nil "~A-~A" 'instruction-encoder operator)))) + `(progn + (defun ,defun-name (operator legacy-prefixes ,@lambda-list) + (declare (ignorable operator legacy-prefixes)) + (let ((operator-mode ',operator-mode) + (default-rex nil)) + (declare (ignorable operator-mode default-rex)) + (macrolet ((disassembler (&body body) + (declare (ignore body))) + (assembler (&body body) + `(progn ,@body))) + (block operator + ,@body + (values nil 'fail))))) + (setf (gethash ',operator *instruction-encoders*) + ',defun-name) + (macrolet ((disassembler (&body body) + `(progn ,@body)) + (assembler (&body body) + (declare (ignore body)))) + (let ((operator ',operator) + (operator-mode ',operator-mode)) + ,@(find-forms body))) + ',operator)))) + +(defmacro define-operator/none (name lambda-list &body body) + `(define-operator ,name nil ,lambda-list ,@body)) + +(deftype list-of (&rest elements) + (labels ((make-list-of (elements) + (if (null elements) + 'null + `(cons ,(car elements) + ,(make-list-of (cdr elements)))))) + (make-list-of elements))) + +(defparameter *opcode-disassemblers-16* + (make-array 256 :initial-element nil)) + +(defparameter *opcode-disassemblers-32* + (make-array 256 :initial-element nil)) + +(defparameter *opcode-disassemblers-64* + (make-array 256 :initial-element nil)) + +(deftype disassembly-decoder () + '(list-of keyword (or keyword nil) symbol)) + +(defun (setf opcode-disassembler) (decoder opcode operator-mode) + (check-type decoder disassembly-decoder) + (labels ((set-it (table pos) + (check-type pos (integer 0 *)) + (check-type table (simple-vector 256)) + (let ((bit-pos (* 8 (1- (ceiling (integer-length pos) 8))))) + (if (not (plusp bit-pos)) + (progn + (unless (or (eq nil decoder) + (eq nil (svref table pos)) + (equal decoder (svref table pos))) + (warn "Redefining disassembler for opcode #x~X from ~{~S ~}to ~{~S~^ ~}." + opcode (svref table pos) decoder)) + (setf (svref table pos) decoder)) + (set-it (or (svref table (ldb (byte 8 bit-pos) pos)) + (setf (svref table (ldb (byte 8 bit-pos) pos)) + (make-array 256 :initial-element nil))) + (ldb (byte bit-pos 0) pos)))))) + (ecase operator-mode + (:16-bit + (set-it *opcode-disassemblers-16* opcode)) + (:32-bit + (set-it *opcode-disassemblers-32* opcode)) + (:64-bit + (set-it *opcode-disassemblers-64* opcode)) + (:8-bit + (set-it *opcode-disassemblers-16* opcode) + (set-it *opcode-disassemblers-32* opcode) + (set-it *opcode-disassemblers-64* opcode))))) + +(defun disassemble-code (code) + (labels ((lookup-decoder (table opcode) + (let* ((datum (pop-code code)) + (opcode (logior (ash opcode 8) + datum)) + (decoder (svref table datum))) + (typecase decoder + ((simple-vector 256) + (lookup-decoder decoder opcode)) + ((list-of keyword (or keyword null) symbol) + (values decoder + opcode)) + (t (error "No disassembler registered for opcode #x~X." opcode)))))) + (destructuring-bind (operator operator-mode operand-decoder) + (lookup-decoder (ecase *cpu-mode* + (:16-bit *opcode-disassemblers-16*) + (:32-bit *opcode-disassemblers-32*) + (:64-bit *opcode-disassemblers-64*)) + 0) + (values (list* operator (code-call (funcall operand-decoder code operator-mode) code)) + code)))) + +(defmacro define-disassembler (opcode operands operator-mode) + `(disassembler + (setf (opcode-disassembler ,opcode ,operator-mode) (list operator ,operator-mode ',operands))))
(defmacro define-operator/8 (operator lambda-list &body body) - `(define-operator ,operator ,lambda-list - (let ((operator-mode :8-bit) - (default-rex nil)) - (declare (ignorable operator-mode default-rex)) + `(define-operator ,operator :8-bit ,lambda-list + (let ((default-rex nil)) + (declare (ignorable default-rex)) (macrolet ((yield (&rest args) `(return-from operator (encode (encoded-values :operand-size operator-mode ,@args))))) ,@body))))
(defmacro define-operator/16 (operator lambda-list &body body) - `(define-operator ,operator ,lambda-list - (let ((operator-mode :16-bit) - (default-rex nil)) - (declare (ignorable operator-mode default-rex)) + `(define-operator ,operator :16-bit ,lambda-list + (let ((default-rex nil)) + (declare (ignorable default-rex)) (macrolet ((yield (&rest args) `(return-from operator (encode (encoded-values :operand-size operator-mode ,@args))))) ,@body))))
(defmacro define-operator/32 (operator lambda-list &body body) - `(define-operator ,operator ,lambda-list - (let ((operator-mode :32-bit) - (default-rex nil)) - (declare (ignorable operator-mode default-rex)) + `(define-operator ,operator :32-bit ,lambda-list + (let ((default-rex nil)) + (declare (ignorable default-rex)) (macrolet ((yield (&rest args) `(return-from operator (encode (encoded-values :operand-size operator-mode ,@args))))) ,@body))))
(defmacro define-operator/64 (operator lambda-list &body body) - `(define-operator ,operator ,lambda-list - (let ((operator-mode :64-bit) - (default-rex '(:rex.w))) - (declare (ignorable operator-mode default-rex)) + `(define-operator ,operator :64-bit ,lambda-list + (let ((default-rex '(:rex.w))) + (declare (ignorable default-rex)) (macrolet ((yield (&rest args) `(return-from operator (encode (encoded-values :operand-size operator-mode ,@args))))) ,@body))))
(defmacro define-operator/64* (operator lambda-list &body body) - `(define-operator ,operator ,lambda-list - (let ((operator-mode :64-bit) - (default-rex (case *cpu-mode* + `(define-operator ,operator :64-bit ,lambda-list + (let ((default-rex (case *cpu-mode* (:64-bit nil) (t '(:rex.w))))) - (declare (ignorable operator-mode)) + (declare (ignorable default-rex)) ,@body)))
(defmacro define-operator* ((&key |16| |32| |64| dispatch) args &body body) @@ -377,18 +468,20 @@ nil (or reg-scale 1)))))
+(defun register-set-by-mode (mode) + (ecase mode + (:8-bit '(:al :cl :dl :bl :ah :ch :dh :bh)) + (:16-bit '(:ax :cx :dx :bx :sp :bp :si :di)) + (:32-bit '(:eax :ecx :edx :ebx :esp :ebp :esi :edi)) + (:64-bit '(:rax :rcx :rdx :rbx :rsp :rbp :rsi :rdi :r8 :r9 :r10 :r11 :r12 :13 :r14 :r15)) + (:mm '(:mm0 :mm1 :mm2 :mm3 :mm4 :mm5 :mm6 :mm7)) + (:xmm '(:xmm0 :xmm1 :xmm2 :xmm3 :xmm4 :xmm5 :xmm6 :xmm7))))
(defun encode-reg/mem (operand mode) (check-type mode (member nil :8-bit :16-bit :32-bit :64-bit :mm :xmm)) (if (and mode (keywordp operand)) (encoded-values :mod #b11 - :rm (or (position operand (ecase mode - (:8-bit '(:al :cl :dl :bl :ah :ch :dh :bh)) - (:16-bit '(:ax :cx :dx :bx :sp :bp :si :di)) - (:32-bit '(:eax :ecx :edx :ebx :esp :ebp :esi :edi)) - (:64-bit '(:rax :rcx :rdx :rbx :rsp :rbp :rsi :rdi :r8 :r9 :r10 :r11 :r12 :13 :r14 :r15)) - (:mm '(:mm0 :mm1 :mm2 :mm3 :mm4 :mm5 :mm6 :mm7)) - (:xmm '(:xmm0 :xmm1 :xmm2 :xmm3 :xmm4 :xmm5 :xmm6 :xmm7)))) + :rm (or (position operand (register-set-by-mode mode)) (error "Unknown ~(~D~) register ~S." mode operand))) (multiple-value-bind (reg offsets reg2 reg-scale) (parse-indirect-operand operand) @@ -633,9 +726,77 @@ :rm rm16 :address-size :16-bit :displacement (encode-integer offset '(xint 16)))) - (t (error "Huh? reg: ~S, reg2: ~S, scale: ~S, offset: ~S" reg reg2 reg-scale offset)) - ))))))))))) - + (t (error "Huh? reg: ~S, reg2: ~S, scale: ~S, offset: ~S" reg reg2 reg-scale offset))))))))))))) + +(defmacro pop-code (code-place &optional context) + `(let ((x (pop ,code-place))) + (check-type x (unsigned-byte 8) ,(format nil "an octet (context: ~A)" context)) + x)) + +(defmacro code-call (form &optional (code-place (cadr form))) + `(multiple-value-bind (value new-code) + ,form + (setf ,code-place new-code) + value)) + +(defun decode-integer (code type) + "Decode an integer of specified type." + (let* ((bit-size (cadr type)) + (unsigned-integer (loop for b from 0 below bit-size by 8 + sum (ash (pop-code code integer) b)))) + (values (if (or (not (member (car type) '(sint signed-byte))) + (not (logbitp (1- bit-size) unsigned-integer))) + unsigned-integer + (- (ldb (byte bit-size 0) + (1+ (lognot unsigned-integer))))) + code))) + +(defun decode-reg-modrm (code operator-mode) + (ecase *cpu-mode* + (:32-bit + (decode-reg-modrm-32 code operator-mode)))) + +(defun decode-reg-modrm-32 (code &optional (reg-mode :32-bit)) + "Return a list of the REG, and the MOD/RM operands." + (let* ((modrm (pop-code code mod/rm)) + (mod (ldb (byte 2 6) modrm)) + (reg (ldb (byte 3 3) modrm)) + (r/m (ldb (byte 3 0) modrm))) + (values (list (nth reg (register-set-by-mode reg-mode)) + (if (= mod #b11) + (nth r/m (register-set-by-mode reg-mode)) + (flet ((decode-sib () + (let* ((sib (pop-code code sib)) + (ss (ldb (byte 2 6) sib)) + (index (ldb (byte 3 3) sib)) + (base (ldb (byte 3 0) sib))) + (nconc (unless (= index #b100) + (let ((index-reg (nth index (register-set-by-mode :32-bit)))) + (if (= ss #b00) + (list index-reg) + (list (list index-reg (ash 2 ss)))))) + (if (/= base #b101) + (list (nth base (register-set-by-mode :32-bit))) + (ecase mod + (#b00 nil) + ((#b01 #b10) (list :ebp)))))))) + (ecase mod + (#b00 (case r/m + (#b100 (decode-sib)) + (#b101 (code-call (decode-integer code '(uint 32)))) + (t (list (nth r/m (register-set-by-mode :32-bit)))))) + (#b01 (case r/m + (#b100 (nconc(decode-sib) + (list (code-call (decode-integer code '(sint 8)))))) + (t (list (nth r/m (register-set-by-mode :32-bit)) + (code-call (decode-integer code '(sint 8))))))) + (#b10 (case r/m + (#b100 (nconc (decode-sib) + (list (code-call (decode-integer code '(uint 32)))))) + (t (list (nth r/m (register-set-by-mode :32-bit)) + (code-call (decode-integer code '(uint 32))))))))))) + code))) +
(defmacro return-when (form) `(let ((x ,form)) @@ -738,7 +899,12 @@ (encode-reg/mem op-modrm (or reg/mem-mode operator-mode)))))))
(defmacro reg-modrm (op-reg op-modrm opcode &optional reg/mem-mode &rest extras) - `(return-when (encode-reg-modrm operator legacy-prefixes ,op-reg ,op-modrm ,opcode operator-mode default-rex ,reg/mem-mode ,@extras))) + `(progn + (assembler + (return-when (encode-reg-modrm operator legacy-prefixes ,op-reg ,op-modrm ,opcode + operator-mode default-rex ,reg/mem-mode ,@extras))) + (disassembler + (define-disassembler ,opcode decode-reg-modrm operator-mode))))
(defun encode-reg-cr (operator legacy-prefixes op-reg op-cr opcode operator-mode default-rex &rest extras) (let* ((reg-map (ecase operator-mode @@ -858,7 +1024,7 @@
;;;;;;;;;;; Pseudo-instructions
-(define-operator :% (op &rest form) +(define-operator/none :% (op &rest form) (case op (:bytes (return-from operator @@ -994,16 +1160,16 @@ (when (eq operator-mode *cpu-mode*) (modrm dest #xff 2)))
-(define-operator :call-segment (dest) +(define-operator/none :call-segment (dest) (modrm dest #xff 3))
;;;;;;;;;;; CLC, CLD, CLI, CLTS, CMC
-(define-operator :clc () (opcode #xf8)) -(define-operator :cld () (opcode #xfc)) -(define-operator :cli () (opcode #xfa)) -(define-operator :clts () (opcode #x0f06)) -(define-operator :cmc () (opcode #xf5)) +(define-operator/none :clc () (opcode #xf8)) +(define-operator/none :cld () (opcode #xfc)) +(define-operator/none :cli () (opcode #xfa)) +(define-operator/none :clts () (opcode #x0f06)) +(define-operator/none :cmc () (opcode #xf5))
;;;;;;;;;;; CMOVcc
@@ -1125,7 +1291,7 @@
;;;;;;;;;;; CPUID
-(define-operator :cpuid () +(define-operator/none :cpuid () (opcode* #x0fa2))
;;;;;;;;;;; CWD, CDQ @@ -1171,7 +1337,7 @@
;;;;;;;;;;; HLT
-(define-operator :halt () +(define-operator/none :halt () (opcode #xf4))
;;;;;;;;;;; IDIV @@ -1245,18 +1411,18 @@
;;;;;;;;;;; INT
-(define-operator :break () +(define-operator/none :break () (opcode #xcc))
-(define-operator :int (vector) +(define-operator/none :int (vector) (imm vector #xcd (uint 8)))
-(define-operator :into () +(define-operator/none :into () (opcode #xce))
;;;;;;;;;;; INVLPG
-(define-operator :invlpg (address)
[145 lines skipped]