Theory IRStepObj

section ‹Control-flow Semantics›

theory IRStepObj
  imports
    TreeToGraph
    Graph.Class
begin

subsection ‹Object Heap›

text ‹
The heap model we introduce maps field references to object instances to runtime values.
We use the H[f][p] heap representation. See @{text "\\cite{heap-reps-2011}"}.

We also introduce the DynamicHeap type which allocates new object references sequentially
storing the next free object reference as 'Free'.
›
(* TODO: Record volatile fields?  Include class name of field? *)
text_raw ‹\Snip{heapdef}%›
type_synonym ('a, 'b) Heap = "'a  'b  Value"
type_synonym Free = "nat"
type_synonym ('a, 'b) DynamicHeap = "('a, 'b) Heap × Free"

fun h_load_field :: "'a  'b  ('a, 'b) DynamicHeap  Value" where
  "h_load_field f r (h, n) = h f r"

fun h_store_field :: "'a  'b  Value  ('a, 'b) DynamicHeap  ('a, 'b) DynamicHeap" where
  "h_store_field f r v (h, n) = (h(f := ((h f)(r := v))), n)"

(* Alternatively store classname in ObjRef ? *)
fun h_new_inst :: "(string, objref) DynamicHeap  string  (string, objref) DynamicHeap × Value" where
  "h_new_inst (h, n) className = (h_store_field ''class'' (Some n) (ObjStr className) (h,n+1), (ObjRef (Some n)))"

type_synonym FieldRefHeap = "(string, objref) DynamicHeap"
text_raw ‹\EndSnip›

definition new_heap :: "('a, 'b) DynamicHeap" where
  "new_heap =  ((λf. λp. UndefVal), 0)"

subsection ‹Intraprocedural Semantics›

(* Yoinked from https://www.isa-afp.org/browser_info/Isabelle2012/HOL/List-Index/List_Index.html*)
fun find_index :: "'a  'a list  nat" where
  "find_index _ [] = 0" |
  "find_index v (x # xs) = (if (x=v) then 0 else find_index v xs + 1)"

inductive indexof :: "'a list  nat  'a  bool" where
  "find_index x xs = i  indexof xs i x"

lemma indexof_det:
  "indexof xs i x  indexof xs i' x  i = i'"
  apply (induction rule: indexof.induct)
  by (simp add: indexof.simps)

code_pred (modes: i ⇒ o ⇒ i ⇒ bool) indexof .

notation (latex output)
  indexof ("_!_ = _")

fun phi_list :: "IRGraph  ID  ID list" where
  "phi_list g n = 
    (filter (λx.(is_PhiNode (kind g x)))
      (sorted_list_of_set (usages g n)))"

(* TODO this produces two parse trees after importing Class *)

fun set_phis :: "ID list  Value list  MapState  MapState" where
  "set_phis [] [] m = m" |
  "set_phis (n # ns) (v # vs) m = (set_phis ns vs (m(n := v)))" |
  "set_phis [] (v # vs) m = m" |
  "set_phis (x # ns) [] m = m"

definition
  fun_add :: "('a  'b)  ('a  'b)  ('a  'b)" (infixl "++f" 100) where
  "f1 ++f f2 = (λx. case f2 x of None  f1 x | Some y  y)"

definition upds :: "('a  'b)  'a list  'b list  ('a  'b)" ("_/'(_ [→] _/')" 900) where
  "upds m ns vs = m ++f (map_of (rev (zip ns vs)))"

lemma fun_add_empty:
  "xs ++f (map_of []) = xs"
  unfolding fun_add_def by simp

lemma upds_inc:
  "m(a#as [→] b#bs) = (m(a:=b))(as[→]bs)"
  unfolding upds_def fun_add_def apply simp sorry

lemma upds_compose:
  "a ++f map_of (rev (zip (n # ns) (v # vs))) = a(n := v) ++f map_of (rev (zip ns vs))"
  using upds_inc
  by (metis upds_def)

lemma "set_phis ns vs = (λm. upds m ns vs)"
proof (induction rule: set_phis.induct)
  case (1 m)
  then show ?case unfolding set_phis.simps upds_def
    by (metis Nil_eq_zip_iff Nil_is_rev_conv fun_add_empty)
next
  case (2 n xs v vs m)
  then show ?case unfolding set_phis.simps upds_def
    by (metis upds_compose)
next
  case (3 v vs m)
  then show ?case
    by (metis fun_add_empty rev.simps(1) upds_def set_phis.simps(3) zip_Nil)
next
  case (4 x xs m)
  then show ?case
    by (metis Nil_eq_zip_iff fun_add_empty rev.simps(1) upds_def set_phis.simps(4))
qed

fun is_PhiKind :: "IRGraph  ID  bool" where
  "is_PhiKind g nid = is_PhiNode (kind g nid)"

definition filter_phis :: "IRGraph  ID  ID list" where
  "filter_phis g merge = (filter (is_PhiKind g) (sorted_list_of_set (usages g merge)))"

definition phi_inputs :: "IRGraph  ID list  nat  ID list" where
  "phi_inputs g phis i = (map (λn. (inputs_of (kind g n))!(i + 1)) phis)"


text ‹
Intraprocedural semantics are given as a small-step semantics.

Within the context of a graph, the configuration triple, (ID, MethodState, Heap),
is related to the subsequent configuration.
›

inductive step :: "IRGraph  Params  (ID × MapState × FieldRefHeap)  (ID × MapState × FieldRefHeap)  bool"
  ("_, _  _  _" 55) for g p where

(* TODO this produces two parse trees after importing Class *)
  SequentialNode:
  "is_sequential_node (kind g nid);
    nid' = (successors_of (kind g nid))!0 
     g, p  (nid, m, h)  (nid', m, h)" |

(* TODO condition before FixedGuard is IsNullNode, so FixedGuard only proceeds if this is *false*.
        If any input to a FixedGuard evaluates to True when it's safe to proceed, this 
        implementation won't work *)
  FixedGuardNode:
   "(kind g nid) = (FixedGuardNode cond before next);
     [g, m, p]  cond  val;

     ¬(val_to_bool val) 
      g, p  (nid, m, h)  (next, m, h)" |

   BytecodeExceptionNode:
  "(kind g nid) = (BytecodeExceptionNode args st nid');
    exceptionType = stp_type (stamp g nid);
    (h', ref) = h_new_inst h exceptionType;
    m' = m(nid := ref)
     g, p  (nid, m, h)  (nid', m', h')" |

  IfNode:
  "kind g nid = (IfNode cond tb fb);
    [g, m, p]  cond  val;
    nid' = (if val_to_bool val then tb else fb)
     g, p  (nid, m, h)  (nid', m, h)" |  

  EndNodes:
  "is_AbstractEndNode (kind g nid);
    merge = any_usage g nid;
    is_AbstractMergeNode (kind g merge);

    indexof (inputs_of (kind g merge)) i nid;
    phis = filter_phis g merge;
    inps = phi_inputs g phis i;
    [g, m, p]  inps [↦] vs;

    m' = (m(phis[→]vs))
     g, p  (nid, m, h)  (merge, m', h)" |

  NewArrayNode:
    "kind g nid = (NewArrayNode len st nid');
      [g, m, p]  len  length';

      arrayType = stp_type (stamp g nid);
      (h', ref) = h_new_inst h arrayType;
      ref = ObjRef refNo;
      h'' = h_store_field '''' refNo (intval_new_array length' arrayType) h';

      m' = m(nid := ref)
     g, p  (nid, m, h)  (nid', m', h'')" |

  ArrayLengthNode:
    "kind g nid = (ArrayLengthNode x nid');
      [g, m, p]  x  ObjRef ref;

      h_load_field '''' ref h = arrayVal;
      length' = array_length (arrayVal);

      m' = m(nid := length')
     g, p  (nid, m, h)  (nid', m', h)" |

  LoadIndexedNode:
    "kind g nid = (LoadIndexedNode index guard array nid');
      [g, m, p]  index  indexVal;
      [g, m, p]  array  ObjRef ref;

      h_load_field '''' ref h = arrayVal;
      loaded = intval_load_index arrayVal indexVal;

      m' = m(nid := loaded)
     g, p  (nid, m, h)  (nid', m', h)" |

  StoreIndexedNode:
    "kind g nid = (StoreIndexedNode check val st index guard array nid');
      [g, m, p]  index  indexVal;
      [g, m, p]  array  ObjRef ref;
      [g, m, p]  val  value;

      h_load_field '''' ref h = arrayVal;
      updated = intval_store_index arrayVal indexVal value;
      h' = h_store_field '''' ref updated h;
      m' =  m(nid := updated)
     g, p  (nid, m, h)  (nid', m', h')" |

  NewInstanceNode:
    "kind g nid = (NewInstanceNode nid cname obj nid');
      (h', ref) = h_new_inst h cname; 
      m' = m(nid := ref) 
     g, p  (nid, m, h)  (nid', m', h')" |

  LoadFieldNode:
    "kind g nid = (LoadFieldNode nid f (Some obj) nid');
      [g, m, p]  obj  ObjRef ref;
      m' = m(nid := h_load_field f ref h) 
     g, p  (nid, m, h)  (nid', m', h)" |

  SignedDivNode:
    "kind g nid = (SignedDivNode nid x y zero sb next);
      [g, m, p]  x  v1;
      [g, m, p]  y  v2;
      m' =  m(nid := intval_div v1 v2) 
     g, p  (nid, m, h)  (next, m', h)" |

  SignedRemNode:
    "kind g nid = (SignedRemNode nid x y zero sb next);
      [g, m, p]  x  v1;
      [g, m, p]  y  v2;
      m' =  m(nid := intval_mod v1 v2) 
     g, p  (nid, m, h)  (next, m', h)" |

  StaticLoadFieldNode:
    "kind g nid = (LoadFieldNode nid f None nid');
      m' =  m(nid := h_load_field f None h) 
     g, p  (nid, m, h)  (nid', m', h)" |

  StoreFieldNode:
    "kind g nid = (StoreFieldNode nid f newval _ (Some obj) nid');
      [g, m, p]  newval  val;
      [g, m, p]  obj  ObjRef ref;
      h' = h_store_field f ref val h;
      m' =  m(nid := val) 
     g, p  (nid, m, h)  (nid', m', h')" |

  StaticStoreFieldNode:
    "kind g nid = (StoreFieldNode nid f newval _ None nid');
      [g, m, p]  newval  val;
      h' = h_store_field f None val h;
      m' =  m(nid := val) 
     g, p  (nid, m, h)  (nid', m', h')"

code_pred (modes: i ⇒ i ⇒ i * i * i ⇒ o * o * o ⇒ bool) step .

subsection ‹Interprocedural Semantics›

type_synonym Signature = "string"
type_synonym Program = "Signature  IRGraph"
type_synonym System = "Program × Classes"

(* Performs a dynamic look-up in the list of instantiated classes to retrieve the appropriate 
   IRGraph to run. Takes: 
    - A System containing the Program and list of classes.
    - The fully-qualified name (dynamic type) of the object which the method has been invoked on.
    - The fully-qualified name of the method invoked. 
    - The path from the object the method was invoked on to java.lang.Object.
 *)
function dynamic_lookup :: "System  string  string  string list  IRGraph option" where
  "dynamic_lookup (P,cl) cn mn path = (
      if (cn = ''None''  cn  set (Class.mapJVMFunc class_name cl)  path = [])
        then (P mn)
        else (
              
              let method_index = (find_index (get_simple_signature mn) (CLsimple_signatures cn cl)) in
              let parent = hd path in

          if (method_index = length (CLsimple_signatures cn cl))
            then (dynamic_lookup (P, cl) parent mn (tl path))
            else (P (nth (map method_unique_name (CLget_Methods cn cl)) method_index))
        )
      )
  "
  by auto
termination dynamic_lookup apply (relation "measure (λ(S,cn,mn,path). (length path))") by auto

inductive step_top :: "System  (IRGraph × ID × MapState × Params) list × FieldRefHeap  
                                 (IRGraph × ID × MapState × Params) list × FieldRefHeap  bool"
  ("_  _  _" 55) 
  for S where

  Lift:
  "g, p  (nid, m, h)  (nid', m', h') 
     (S)  ((g,nid,m,p)#stk, h)  ((g,nid',m',p)#stk, h')" |

 InvokeNodeStepStatic:
  "is_Invoke (kind g nid);
    callTarget = ir_callTarget (kind g nid);
    kind g callTarget = (MethodCallTargetNode targetMethod actuals invoke_kind);
    ¬(hasReceiver invoke_kind);
    Some targetGraph = (dynamic_lookup S ''None'' targetMethod []);
    [g, m, p]  actuals [↦] p'
     (S)  ((g,nid,m,p)#stk, h)  ((targetGraph,0,new_map_state,p')#(g,nid,m,p)#stk, h)" |

  InvokeNodeStep:
  "is_Invoke (kind g nid);
    callTarget = ir_callTarget (kind g nid);
    kind g callTarget = (MethodCallTargetNode targetMethod arguments invoke_kind);
    hasReceiver invoke_kind; 
    [g, m, p]  arguments [↦] p';
    ObjRef self = hd p';
    ObjStr cname = (h_load_field ''class'' self h);
    S = (P,cl);
    Some targetGraph = dynamic_lookup S cname targetMethod (class_parents (CLget_JVMClass cname cl))
     (S)  ((g,nid,m,p)#stk, h)  ((targetGraph,0,new_map_state,p')#(g,nid,m,p)#stk, h)" |

(* TODO this produces two parse trees after importing Class *)
  ReturnNode:
  "kind g nid = (ReturnNode (Some expr) _);
    [g, m, p]  expr  v;

    m'c = mc(nidc := v);
    nid'c = (successors_of (kind gc nidc))!0 
     (S)  ((g,nid,m,p)#(gc,nidc,mc,pc)#stk, h)  ((gc,nid'c,m'c,pc)#stk, h)" |

(* TODO this produces two parse trees after importing Class *)
  ReturnNodeVoid:
  "kind g nid = (ReturnNode None _);
    
    nid'c = (successors_of (kind gc nidc))!0 
     (S)  ((g,nid,m,p)#(gc,nidc,mc,pc)#stk, h)  ((gc,nid'c,mc,pc)#stk, h)" |

  UnwindNode:
  "kind g nid = (UnwindNode exception);

    [g, m, p]  exception  e;
     
    kind gc nidc = (InvokeWithExceptionNode _ _ _ _ _ _ exEdge);

    m'c = mc(nidc := e)
   (S)  ((g,nid,m,p)#(gc,nidc,mc,pc)#stk, h)  ((gc,exEdge,m'c,pc)#stk, h)"

code_pred (modes: i ⇒ i ⇒ o ⇒ bool) step_top .

subsection ‹Big-step Execution›

type_synonym Trace = "(IRGraph × ID × MapState × Params) list"

fun has_return :: "MapState  bool" where
  "has_return m = (m 0  UndefVal)"

inductive exec :: "System 
       (IRGraph × ID × MapState × Params) list × FieldRefHeap
       Trace 
       (IRGraph × ID × MapState × Params) list × FieldRefHeap
       Trace 
       bool"
  ("_  _ | _ ⟶* _ | _")
  for P where
  "P  (((g,nid,m,p)#xs),h)  (((g',nid',m',p')#ys),h');
    ¬(has_return m');

    l' = (l @ [(g,nid,m,p)]);

    exec P (((g',nid',m',p')#ys),h') l' next_state l'' 
     exec P (((g,nid,m,p)#xs),h) l next_state l''" 
(* TODO: refactor this stopping condition to be more abstract *)
  |
  "P  (((g,nid,m,p)#xs),h)  (((g',nid',m',p')#ys),h');
    has_return m';

    l' = (l @ [(g,nid,m,p)])
     exec P (((g,nid,m,p)#xs),h) l (((g',nid',m',p')#ys),h') l'"
code_pred (modes: i ⇒ i ⇒ i ⇒ o ⇒ o ⇒ bool as Exec) "exec" .

inductive exec_debug :: "System
      (IRGraph × ID × MapState × Params) list × FieldRefHeap
      nat
      (IRGraph × ID × MapState × Params) list × FieldRefHeap
      bool"
  ("__→*_* _")
  where
  "n > 0;
    p  s  s';
    exec_debug p s' (n - 1) s'' 
     exec_debug p s n s''" |

  "n = 0
     exec_debug p s n s"
code_pred (modes: i ⇒ i ⇒ i ⇒ o ⇒ bool) "exec_debug" .

subsubsection ‹Heap Testing›

definition p3:: Params where
  "p3 = [IntVal 32 3]"

fun graphToSystem :: "IRGraph  System" where
  "graphToSystem graph = ((λx. Some graph), JVMClasses [])"

(* Eg. call eg2_sq with [3] ⟶ 9 *)
values "{(prod.fst(prod.snd (prod.snd (hd (prod.fst res))))) 0 
        | res. (graphToSystem eg2_sq)  ([(eg2_sq,0,new_map_state,p3), (eg2_sq,0,new_map_state,p3)], new_heap) →*2* res}"

definition field_sq :: string where
  "field_sq = ''sq''"

definition eg3_sq :: IRGraph where
  "eg3_sq = irgraph [
    (0, StartNode None 4, VoidStamp),
    (1, ParameterNode 0, default_stamp),
    (3, MulNode 1 1, default_stamp),
    (4, StoreFieldNode 4 field_sq 3 None None 5, VoidStamp),
    (5, ReturnNode (Some 3) None, default_stamp)
   ]"

(* Eg. call eg2_sq with [3] ⟶ heap with object None={sq: 9} *)
values "{h_load_field field_sq None (prod.snd res)
        | res. (graphToSystem eg3_sq)  ([(eg3_sq, 0, new_map_state, p3), (eg3_sq, 0, new_map_state, p3)], new_heap) →*3* res}"

definition eg4_sq :: IRGraph where
  "eg4_sq = irgraph [
    (0, StartNode None 4, VoidStamp),
    (1, ParameterNode 0, default_stamp),
    (3, MulNode 1 1, default_stamp),
    (4, NewInstanceNode 4 ''obj_class'' None 5, ObjectStamp ''obj_class'' True True False),
    (5, StoreFieldNode 5 field_sq 3 None (Some 4) 6, VoidStamp),
    (6, ReturnNode (Some 3) None, default_stamp)
   ]"

(* Eg. call eg2_sq with [3] ⟶ heap with object 0={sq: 9} *)
values "{h_load_field field_sq (Some 0) (prod.snd res) 
        | res. (graphToSystem (eg4_sq))  ([(eg4_sq, 0, new_map_state, p3), (eg4_sq, 0, new_map_state, p3)], new_heap) →*3* res}"

end