# Inspect OCaml values in GDB via Python scripting.
#
# http://ygrek.org.ua/p/code/mlvalues.py
# (c) 2011 ygrek
#
# Description
# -----------
#
# You will need GDB with python support (>=7.0?).
# This code reimplements Std.dump from extlib and is
# limited to information available in runtime representation
# of OCaml values. Not complete, doesn't handle cycles at all,
# tested only on x64.
#
# At GDB prompt input:
#    source mlvalues.py
# Then inspect any OCaml value:
#    ml_dump <value address or symbol>
# Or inspect the chunk of memory treating contents as values:
#    ml_dump <start address> <number of values>
# Show OCaml heap information:
#    ml_heap
#
# Changelog
# ---------
#
# 2012-04-11
#   Dump the array of OCaml values with ml_dump
#   Inspect closure blocks
#   Tweak formatting
#
# 2012-03-07
#   New command `ml_heap` - shows some general information about OCaml heap:
#     * GC parameters
#     * total heap size
#     * allocated chunks layout
#
# 2011-12-27
#   Show symbol and name for custom values
#   Catch gdb.MemoryError and continue printing
#   Correctly lookup types
#
# 2011-12-24
#   Initial


# This class represents gdb.Value as OCaml value.
# Analogue to stdlib Obj module.
#
# It probably contains more casts then strictly necessary,
# but after all who am I to understand python type system?
# Just a mere OCaml coder. And this is my first python program.
class OCamlValue:

  LAZY_TAG = 246
  CLOSURE_TAG = 247
  OBJECT_TAG = 248
  INFIX_TAG = 249
  FORWARD_TAG = 250
  NO_SCAN_TAG = 251
  ABSTRACT_TAG = 251
  STRING_TAG = 252
  DOUBLE_TAG = 253
  DOUBLE_ARRAY_TAG = 254
  CUSTOM_TAG = 255
  FINAL_TAG = 255
  INT_TAG = 1000
  OUT_OF_HEAP_TAG = 1001
  UNALIGNED_TAG = 1002

  def __init__(self,v):
    # do not lookup types at class level cause this script may be run before
    # the inferior image is loaded and gdb can't know sizeof(long) in advance
    self.intnat = gdb.lookup_type("long")
    self.t_charp = gdb.lookup_type("char").pointer()
    self.t_doublep = gdb.lookup_type("double").pointer()
    if type(v) == type(0):
      self.v = gdb.Value(v)
    else:
      self.v = v.cast(self.intnat)

  def is_int(self):
    return self.v & 1 != 0

  def is_block(self):
    return self.v & 1 == 0

  @staticmethod
  def of_int(x):
    return OCamlValue(gdb.Value((x<<1) + 1))

  @staticmethod
  def of_val(x):
    assert(x & 1 == 0)
    return OCamlValue(gdb.Value(x))

  @staticmethod
  def of_bool(x):
    return OCamlValue.of_int(x != 0)

  def int(self):
    assert self.is_int()
    return self.v >> 1

  def val(self):
    return self.v

  def string(self,enc='ascii'):
    assert self.tag() == OCamlValue.STRING_TAG
    # FIXME caml_string_length
    return self.v.reinterpret_cast(self.t_charp).string(enc)

  def float(self):
    assert self.tag() == OCamlValue.DOUBLE_TAG
    # FIXME test
    return self.v.reinterpret_cast(self.t_doublep).dereference()

  def __cmp__(self,other):
    if self.v == other.v:
      return 0
    else:
      if self.v > other.v:
        return 1
      else:
        return -1

  def __str__(self):
    if self.is_int():
      return ("%d" % self.int())
    else:
      return ("0x%X" % self.val())

  def __repr__(self):
    return "OCamlValue(%s)" % self.__str__()

  def hd(self):
    return (self.v - self.intnat.sizeof).reinterpret_cast(self.intnat.pointer()).dereference()

  def tag(self):
    if self.is_int():
      return OCamlValue.INT_TAG
    else:
      return self.hd() & 0xFF

  def unsafe_field(self,i):
    x = (self.v + i * self.intnat.sizeof).reinterpret_cast(self.intnat.pointer()).dereference()
    return OCamlValue(x)

  def field(self,i):
    assert self.is_block()
    assert self.tag () != OCamlValue.DOUBLE_ARRAY_TAG # FIXME not implemented
    n = self.size()
    if i < 0 or i >= n:
      raise IndexError("field %d size %d" % (i,n))
    return self.unsafe_field(i)
    #t = self.intnat.array(n).pointer()
    #return OCamlValue(self.v.reinterpret_cast(t).dereference()[i])

  def fields(self):
    assert self.is_block()
    assert self.tag () != OCamlValue.DOUBLE_ARRAY_TAG # FIXME not implemented
    a = []
    for i in range(self.size()):
      a.append(self.unsafe_field(i))
    return a

  def size(self):
    assert self.is_block()
    return self.hd() >> 10

  def bsize(self):
    return self.size() * self.intnat.sizeof

  def is_list(self):
    if self.is_int():
      return self.int() == 0
    else:
      return self.size() == 2 and self.tag() == 0 and self.field(1).is_list()

  def get_list(self):
    a = []
    l = self
    while l.is_block():
      a.append(l.field(0))
      l = l.field(1)
    return a

  def resolve(self):
    symbol = gdb.execute('info symbol ' + str(self.val()),False,True).split(' ',1)[0]
    if symbol == "No": # FIXME "No symbol matches"
      return "0x%x" % self.val()
    else:
      return "%s" % symbol

  def show_opaque(self,s):
    print "<%s at 0x%x>" % (s,self.val()),

  def show_all(self,seq,delim,raw=False):
    for i, x in enumerate(seq):
      if i:
        print delim,
      if raw:
        print x.resolve(),
      else:
        x.show()

  def show(self):
    try:
      if self.is_int():
        print "%d" % self.int(),
      elif self.is_list():
        print "[",
        self.show_all(self.get_list(), ';')
        print "]",
      else:
        t = self.tag()
        if t == 0:
          print "(",
          self.show_all(self.fields(), ',')
          print ")",
        elif t == OCamlValue.LAZY_TAG:
          self.show_opaque("lazy")
        elif t == OCamlValue.CLOSURE_TAG:
          print "Closure(",
          self.show_all(self.fields(), ',', raw=True)
          print ")",
        elif t == OCamlValue.OBJECT_TAG:
#	| x when x = Obj.object_tag ->
#		let fields = get_fields [] s in
#		let clasz, id, slots =
#			match fields with
#			| h::h'::t -> h, h', t 
#			| _ -> assert false
#		in
#		(* No information on decoding the class (first field).  So just print
#		* out the ID and the slots. *)
#		"Object #" ^ dump id ^ " (" ^ String.concat ", " (List.map dump slots) ^ ")"
# FIXME todo
          self.show_opaque("object")
        elif t == OCamlValue.INFIX_TAG:
          self.show_opaque("infix")
        elif t == OCamlValue.FORWARD_TAG:
          self.show_opaque("forward")
        elif t < OCamlValue.NO_SCAN_TAG:
          print "Tag%d(" % t,
          self.show_all(self.fields(), ',')
          print ")",
        elif t == OCamlValue.STRING_TAG:
          print self.string().__repr__(),
        elif t == OCamlValue.DOUBLE_TAG:
          print "%f" % self.float(),
        elif t == OCamlValue.ABSTRACT_TAG:
          self.show_opaque("abstract")
        elif t == OCamlValue.CUSTOM_TAG:
          # FIXME better handle builtin caml custom values : int32, int64, etc
          try:
            sym = self.field(0).resolve()
          except:
            sym = '?'
          try:
            name = self.field(0).val().cast(self.t_charp.pointer()).dereference().string()
          except:
            name = ''
            raise
          self.show_opaque("custom " + sym + ' "' + name + '"')
        elif t == OCamlValue.FINAL_TAG:
          self.show_opaque("final")
        elif t == OCamlValue.DOUBLE_ARRAY_TAG:
          print "<float array>",
#        return "[|%s|]" % "; ".join([x.dump() for x in self.fields()])
        else:
          self.show_opaque("unknown tag %d size %d" % (t,self.size()))
    except gdb.MemoryError as exn:
      print '<<gdb.MemoryError : %s>>' % exn,

#  nil = OCamlValue.of_int(0)
#  true = OCamlValue.of_int(1)
#  false = OCamlValue.of_int(0)

class DumpOCamlValue(gdb.Command):
  """Dump OCaml value: ml_dump <value>
     Dump the array of OCaml values: ml_dump <addr> <N>
 
Recursively dump runtime representation of OCaml value """

  def __init__(self):
    gdb.Command.__init__(self, "ml_dump", gdb.COMMAND_DATA, gdb.COMPLETE_SYMBOL, True)

  def invoke(self, arg, from_tty):
    arg_list = gdb.string_to_argv(arg)
    if len(arg_list) < 1:
      print "usage: ml_dump value"
      return
    size_t = gdb.lookup_type("size_t")
    addr = gdb.parse_and_eval(arg_list[0]).cast(size_t.pointer())
    if len(arg_list) > 1:
      for i in range(int(arg_list[1])):
        print "0x%x:" % (addr + i).cast(size_t),
        OCamlValue((addr + i).dereference()).show()
        print ""
    else:
      OCamlValue(addr).show()
      print ""

DumpOCamlValue()

class ShowOCamlHeap(gdb.Command):
  """Show some facts about OCaml heap: ml_heap """

  def __init__(self):
    gdb.Command.__init__(self, "ml_heap", gdb.COMMAND_NONE, gdb.COMPLETE_NONE, False)

  def e(self,x,t="int"):
    return gdb.parse_and_eval("*(("+str(t)+"*)&"+x+")")

  def malloced_size(self,x):
    size_t = gdb.lookup_type("size_t")
    # see caml_aligned_malloc, FIXME Page_size = 4K assumption
    return x + 4*size_t.sizeof + 4*1024

  def invoke(self, arg, from_tty):
    size_t = gdb.lookup_type("size_t")

    print "     major heap size =", self.e("caml_stat_heap_size"), "bytes"
    print " major heap top size =", self.e("caml_stat_top_heap_size"), "bytes"
    print "   total heap chunks =", self.e("caml_stat_heap_chunks")
    print "         gray values =", gdb.parse_and_eval("gray_vals_size") * size_t.sizeof, "bytes"
    print "extra heap resources =", self.e("caml_extra_heap_resources","double")
    print
    print "minor heap :"
    minor_size = self.e("caml_minor_heap_size")
    minor_base = self.e("caml_young_base","size_t")
    print "0x%x %d bytes (0x%x-0x%x)" % \
      (self.e("caml_young_start","size_t"), minor_size, minor_base, minor_base + self.malloced_size(minor_size))
    print
    print "major heap :"
    # the following casting is needed, otherwise gdb may sign-extend values without debug info
    v = gdb.parse_and_eval("*((size_t*)&caml_heap_start)")
    i = 0
    while v != 0:
      p = gdb.Value(v - 4 * size_t.sizeof)
# typedef struct {
#   void *block;           /* address of the malloced block this chunk live in */
#   asize_t alloc;         /* in bytes, used for compaction */
#   asize_t size;          /* in bytes */
#   char *next;
# } heap_chunk_head;
      p = p.cast(size_t.pointer())
      block = p.dereference()
      size = (p + 2).dereference()
      print "%2d) chunk 0x%x %d bytes (0x%x-0x%x)" % (i, v, size, block, block + self.malloced_size(size))
      i = i + 1
      v = (p + 3).dereference()

ShowOCamlHeap()