inferno-experimental/src/Solver.ml

762 lines
24 KiB
OCaml

(******************************************************************************)
(* *)
(* Inferno *)
(* *)
(* François Pottier, Inria Paris *)
(* *)
(* Copyright Inria. All rights reserved. This file is distributed under the *)
(* terms of the MIT License, as described in the file LICENSE. *)
(* *)
(******************************************************************************)
open Signatures
module Make
(X : TEVAR)
(S : HSTRUCTURE)
(O : OUTPUT with type 'a structure = 'a S.structure)
= struct
(* -------------------------------------------------------------------------- *)
(* The type [tevar] of term variables is provided by [X]. *)
type tevar =
X.tevar
module TeVarMap =
Map.Make(struct include X type t = tevar end)
(* -------------------------------------------------------------------------- *)
(* The type variables that appear in constraints are immutable: they
are integer identifiers. *)
type variable =
int
let fresh : unit -> variable =
Utils.gensym()
module VarTable = Hashtbl
(* -------------------------------------------------------------------------- *)
(* The syntax of constraints. *)
type range =
Lexing.position * Lexing.position
type scheme =
O.tyvar list * O.ty
(**A constraint of type ['a co] is a constraint whose resolution, if
successful, produces a value of type ['a]. The ability for the
resolution process to produce a result can be used to express
elaboration in an elegant way. *)
type _ co =
| CRange : range * 'a co -> 'a co
(**The constraint [CRange (range, c)] is equivalent to the constraint [c],
and is annotated with [range], a range of positions in some piece of
source code. *)
| CTrue : unit co
(**The trivial constraint [CTrue] is always true. *)
| CMap : 'a co * ('a -> 'b) -> 'b co
(**The constraint [CMap (c, f)] is equivalent to the constraint [c]. The
transformation function [f] is used to postprocess the result of solving
this constraint. *)
| CPure : 'a -> 'a co
(**The constraint [CPure x] is always true, and produces the value [x].
It is equivalent to [CMap (CTrue, fun () -> x)]. *)
| CConj : 'a co * 'b co -> ('a * 'b) co
(**[CConj (c1, c2)] is the conjunction of the constraints [c1] and [c2]. *)
| CEq : variable * variable -> unit co
(**[CEq (v1, v2)] is an equality constraint between the type variables [v1]
and [v2]. *)
| CExist : variable * variable S.structure option * 'a co -> 'a co
(**An existential quantification [CExist (v, so, c)] binds the type
variable [v] in the constraint [c]. This type variable carries an
optional equality constraint [so] whose right-hand side is a shallow
term. *)
| CWitness : variable -> O.ty co
(**The constraint [CWitness v] is logically equivalent to
a [true] constraint, so it is always satisfied. Its result is
a decoded type which represents the value assigned to the
type variable [v] at the end of the resolution process. *)
| CInstance : tevar * variable -> O.ty list co
(**An instantiation constraint [CInstance (x, v)] requires the type
variable [v] to be an instance of the type scheme denoted by the
term variable [x]. Its result is a list of types that indicates
how the type scheme was instantiated. *)
| CDef : tevar * variable * 'a co -> 'a co
(**The constraint [CDef (x, v, c)] binds the term variable [x] to
the trivial (monomorphic) type scheme [v] in the constraint [c]. *)
| CLet :
variable list * (tevar * variable) list * 'a co * 'b co ->
(O.tyvar list * (tevar * scheme) list * 'a * 'b) co
(**[CLet rs (List.combine xs vs) c1 c2] first constructs a list
[cs] of constraint abstractions by abstracting the type
variables [vs] in the constraint [c1], under the scope of the
rigid variables [rs]. Then, it binds the term variables [xs] to
the constraint abstractions [cs] in the constraint [c2].
It produces a tuple of 4 results, namely:
- a list [ws] of (decoded) type variables that may appear in [a1], hence
should be universally bound by the client in [a1]. In a typical
scenario, [a1] is a System F term, and the client must build a type
abstraction [\Lambda vs.a1]. Note that [ws] must contain at least
the (decodings of) the rigid variables [rs].
- an association list that maps each term variable in [xs] to the type
scheme that has been assigned to it while solving [c2].
- the value [a1] produced by solving the constraint [c1].
- the value [a2] produced by solving the constraint [c2].
*)
(* -------------------------------------------------------------------------- *)
(* A pretty-printer for constraints, used while debugging. *)
module Printer = struct
[@@@warning "-4"] (* allow fragile pattern matching *)
open PPrint
let string_of_var v =
string_of_int v
let var v =
string (string_of_var v)
let tevar x =
string (X.to_string x)
let annot (x, v) =
tevar x ^^ string " = " ^^ var v
let shallow_equality s =
string " = " ^^ S.pprint var s
(* The grammar of constraints is organized in several layers: [binders],
[conj], [simple]. The [entry] layer is a synonym for [binders] and is
the entry point. *)
(* [CRange] and [CMap] constructors are ignored by this printer. *)
let rec entry : type a . a co -> document = fun c ->
binders c
and binders : type a . a co -> document = fun c ->
let next = conj in
let self = binders in
group @@ match c with
| CRange (_, c) ->
self c
| CMap (c, _) ->
self c
| CExist (v, so, c) ->
string "exi " ^^
var v ^^
optional shallow_equality so ^^
string " in" ^/^
self c
| CDef (x, v, c) ->
string "def " ^^
annot (x, v) ^^
string " in" ^/^
self c
| CLet (rs, xvs, c1, c2) ->
string "let" ^^
(if rs = [] then empty else brackets (separate_map space var rs)) ^^
separate_map (string " and ") annot xvs ^^
string " where" ^^ group (nest 2 (break 1 ^^
next c1 ^^
string " in")) ^/^
self c2
| _ ->
next c
and conj : type a . a co -> document = fun c ->
let self = conj in
let next = simple in
group @@ match c with
| CRange (_, c) ->
self c
| CMap (c, _) ->
self c
| CConj (c1, c2) ->
self c1 ^^ string " *" ^/^ self c2
| _ ->
next c
and simple : type a . a co -> document = fun c ->
let self = simple in
group @@ match c with
| CRange (_, c) ->
self c
| CMap (c, _) ->
self c
| CTrue | CPure _ ->
string "True"
| CWitness v ->
separate space [string "wit"; var v]
| CEq (v1, v2) ->
separate space [var v1; string "="; var v2]
| CInstance (x, v) ->
tevar x ^^ utf8string "" ^^ var v
| CExist _
| CDef _
| CLet _
| CConj _
->
(* Introduce parentheses. *)
surround 2 0 lparen (entry c) rparen
let print ppf c =
ToFormatter.pretty 0.9 80 ppf (entry c);
Format.pp_print_newline ppf ()
end (* Printer *)
let pprint, print =
Printer.(entry, print)
(* -------------------------------------------------------------------------- *)
(* Instantiate the generalization engine, which is parameterized over a
unification structure [S]. The generalization engine [G] contains the
unifier [U]. *)
(* TODO explain why we use [Structure.Option] *)
module OS = Structure.Option(S)
module G = Generalization.Make(OS)
module U = G.U
(* -------------------------------------------------------------------------- *)
(* The solver maintains an environment, an immutable mapping of term
variables to type schemes. *)
type env =
G.scheme TeVarMap.t
exception Unbound of range * tevar
module Env = struct
let empty : env =
TeVarMap.empty
let lookup (env : env) range x =
try
TeVarMap.find x env
with Not_found ->
raise (Unbound (range, x))
let bind x s (env : env) =
(* Shadowing allowed. *)
TeVarMap.add x s env
end (* Env *)
(* -------------------------------------------------------------------------- *)
(* The functions [uvar] and [ubind] look up and extend a mutable mapping of
immutable type variables to unifier variables. Every run of the solver
involves a fresh mutable mapping. [bind] creates a new unifier variable
and registers it with the generalization engine. *)
module UVar (X : sig
val state : G.state
end) = struct
let table =
VarTable.create 8
let uvar (v : variable) : U.variable =
match VarTable.find table v with
| None ->
(* This should never happen. *)
Printf.ksprintf failwith "Unbound variable %d" v
| Some uv ->
uv
let ubind (v : variable) rigidity so =
assert (not (VarTable.mem table v));
let uv = G.fresh X.state rigidity (OS.map uvar so) in
VarTable.add table v (Some uv);
uv
end (* UVar *)
(* -------------------------------------------------------------------------- *)
(* Decoding types. *)
let decode_variable (x : U.variable) : O.tyvar =
O.inject (G.Data.id (G.U.get x))
(* A type decoder is a function that transforms a unifier variable into an
output type. We choose to decode types in an eager manner; that is, we
take care of invoking the decoder, so that the client never needs to
perform this task. As a result, we do not even need to expose the decoder
to the client (although we could, if desired). *)
type 'v decoder =
'v -> O.ty
(* TODO *)
module D =
Decoder.Make (G.Data) (U) (struct
type ty = O.ty
let variable id =
O.variable (O.inject id)
let structure (s : O.ty U.structure) : O.ty =
O.structure (OS.project_nonleaf (G.Data.project s))
let mu id t =
O.mu (O.inject id) t
end)
(* Because [O.ty] is a nominal representation of types, a type is decoded in
the same way, regardless of how many type binders we have entered. This
makes it possible for a decoder to exploit memoization. Thanks to this
property, the type decoding process requires only linear time and space
(in the acyclic case), regardless of how many calls to the decoder are
performed. *)
(* The function [new_decoder] creates and returns a new decoder. A decoder has
internal state (a memoization table). If [rectypes] is on, a cyclic decoder
is created and returned; otherwise, an acyclic decoder is created and
returned. *)
let new_decoder ~rectypes : U.variable decoder =
if rectypes then D.new_cyclic_decoder() else D.new_acyclic_decoder()
(* The function [decode_scheme] is parameterized by a type decoder, [decode]. *)
let decode_scheme (decode : U.variable decoder) (s : G.scheme) : scheme =
List.map decode_variable (G.quantifiers s),
decode (G.body s)
(* -------------------------------------------------------------------------- *)
(* The exceptions [Unify] and [Cycle], raised by the unifier, must be caught
and re-raised in a slightly different format, as the unifier does not know
about ranges.
A cyclic decoder is used even if [rectypes] is [false]. Indeed, recursive
types can appear before the occurs check has been performed. *)
exception Unify of range * O.ty * O.ty
exception Cycle of range * O.ty
let unify range v1 v2 =
try
U.unify v1 v2
with U.Unify (v1, v2) ->
let decode = new_decoder ~rectypes:true in
raise (Unify (range, decode v1, decode v2))
let exit range ~rectypes state vs =
try
G.exit ~rectypes state vs
with G.Cycle v ->
let decode = new_decoder ~rectypes:true in
raise (Cycle (range, decode v))
(* -------------------------------------------------------------------------- *)
(* The toplevel constraint that is passed to the solver must have been
constructed by [let0], otherwise we risk encountering type variables that
we cannot register. Indeed, [G.register] must not be called unless
[G.enter] has been invoked first.
(If we accepted an arbitrary constraint from the user and silently
wrapped it in [let0], what would we do with the toplevel quantifiers?) *)
(* The function [ok] determines whether a constraint is a suitable argument
to the function [solve]. It is used only inside an assertion. *)
let rec ok : type a . a co -> bool =
function
| CRange (_, c) ->
ok c
| CTrue | CPure _ ->
true
| CMap (c, _) ->
ok c
| CLet (_rs, _xvs, _c1, c2) ->
(* The left-hand constraint [c1] does not need to be [ok], since it is
examined after a call to [G.enter]. *)
ok c2
| CConj (c1, c2) ->
ok c1 && ok c2
| CEq _
| CExist _
| CWitness _
| CInstance _
| CDef _ ->
(* These forms are not [ok], as they involve (free or binding
occurrences of) type variables. *)
false
(* -------------------------------------------------------------------------- *)
(* The solver is parameterized by a flag [rectypes], which indicates whether
equirecursive types are permitted. *)
(* [solve] expects a constraint of type [a co] and solves it, producing a
result of type [a]. It proceeds in two phases:
- Resolution: the constraint is traversed and solved;
- Elaboration: if the resolution was successful, then a global solution
exists, and a result can be computed (bottom-up) out of it.
The internal recursive solution [solve] implements the first phase.
It returns a closure of type ['a on_sol], which implements the second phase:
it describes how to compute a result once resolution is successful. *)
(* A value of type ['a on_sol] is a delayed computation that can be run
only after the solver has succeeded in finding a global solution. It
produces a result of type ['a]. Defunctionalizing the solver would
reveal that a value of type ['a on_sol] is in fact a tree of closures
and that its evaluation is performed bottom-up. *)
type 'a on_sol =
On_sol of (unit -> 'a) [@@unboxed]
let solve ~(rectypes : bool) (type a) (c : a co) : a =
(* Check that [c] is a well-formed toplevel constraint. If it is not,
then the user is at fault. *)
if not (ok c) then
invalid_arg "solve: ill-formed toplevel constraint";
(* Initialize the mutable state of the generalization engine. *)
let state = G.init() in
(* Initialize a mutable mapping of type variables to unifier variables. *)
let open UVar (struct let state = state end) in
(* Initialize a fresh type decoder. *)
let decode = new_decoder ~rectypes in
(* The internal recursive function [solve] is parameterized with an
environment (which maps term variables to type schemes) and with a
range (the range annotation that was most recently encountered on the
way down). *)
let rec solve : type a . env -> range -> a co -> a on_sol =
fun env range c -> match c with
| CRange (range, c) ->
solve env range c
| CTrue ->
On_sol (fun () -> ())
| CPure x ->
On_sol (fun () -> x)
| CMap (c, f) ->
let (On_sol r) = solve env range c in
On_sol (fun () -> f (r ()))
| CConj (c1, c2) ->
let (On_sol r1) = solve env range c1 in
let (On_sol r2) = solve env range c2 in
On_sol (fun () ->
(* Even though we recommend using semantic actions without side
effects, it seems wise to impose left-to-right evaluation of
the semantic actions. *)
let a1 = r1() in
let a2 = r2() in
a1, a2
)
| CEq (v, w) ->
unify range (uvar v) (uvar w);
On_sol (fun () -> ())
| CExist (v, s, c) ->
ignore (ubind v G.Flexible s);
solve env range c
| CWitness v ->
On_sol (fun () -> decode (uvar v))
| CInstance (x, w) ->
(* The environment provides a type scheme for [x]. *)
let s = Env.lookup env range x in
(* Instantiating this type scheme yields a variable [v], which we
unify with [w]. It also yields a list of witnesses, which we
record, as they will be useful during the decoding phase. *)
let witnesses, v = G.instantiate state s in
unify range v (uvar w);
On_sol (fun () -> List.map decode witnesses)
| CDef (x, v, c) ->
let env = Env.bind x (G.trivial (uvar v)) env in
solve env range c
| CLet (rs, xvs, c1, c2) ->
(* Warn the generalization engine that we are entering the left-hand
side of a [let] construct. *)
G.enter state;
(* Register the rigid prefix [rs] with th generalization engine. *)
let _ = List.map (fun v -> ubind v G.Rigid None) rs in
(* Register the variables [vs] with the generalization engine, just as
if they were existentially bound in [c1]. This is what they are,
basically, but they also serve as named entry points. *)
let vs = List.map (fun (_, v) -> ubind v G.Flexible None) xvs in
(* Solve the constraint [c1]. *)
let (On_sol r1) = solve env range c1 in
(* Ask the generalization engine to perform an occurs check, to adjust
the ranks of the type variables in the young generation (i.e., all
of the type variables that were registered since the call to
[G.enter] above), and to construct a list [ss] of type schemes for
our entry points. The generalization engine also produces a list
[generalizable] of the young variables that should be universally
quantified here. *)
let generalizable, ss = exit range ~rectypes state vs in
(* Extend the environment [env] and compute the type schemes [xss]. *)
let env, xss =
List.fold_right2 (fun (x, _) s (env, xss) ->
(Env.bind x s env, (x, s) :: xss)
) xvs ss (env, [])
in
(* Proceed to solve [c2] in the extended environment. *)
let (On_sol r2) = solve env range c2 in
On_sol (fun () ->
List.map decode_variable generalizable,
List.map (fun (x, s) -> (x, decode_scheme decode s)) xss,
r1 (),
r2 ())
in
let env = Env.empty
and range = Lexing.(dummy_pos, dummy_pos) in
(* Phase 1: solve the constraint. *)
let (On_sol r) = solve env range c in
(* Phase 2: elaborate. *)
r()
(* -------------------------------------------------------------------------- *)
(* Combinators for building constraints. *)
(* The type ['a co] forms an applicative functor. *)
let pure x =
CPure x
let (let+) c f = CMap(c, f)
let (and+) c1 c2 = CConj(c1, c2)
(* The type ['a co] does not form a monad. Indeed, there is no way of defining
a [bind] combinator. *)
(* -------------------------------------------------------------------------- *)
(* Existential quantification. *)
type ('a, 'r) binder =
('a -> 'r co) -> 'r co
let (let@) m f =
m f
let witness v =
CWitness v
let exist_aux t f =
(* Create a fresh constraint variable [v]. *)
let v = fresh () in
(* Pass [v] to the client, *)
let c = f v in
(* and wrap the resulting constraint [c] in an existential quantifier. *)
CExist (v, t, c)
let exist f =
exist_aux None f
let construct t f =
exist_aux (Some t) f
let lift f v1 t2 =
construct t2 (fun v2 ->
f v1 v2
)
(* -------------------------------------------------------------------------- *)
(* Deep types. *)
type deep_ty =
| DeepVar of variable
| DeepStructure of deep_ty S.structure
(* Conversion of deep types to shallow types. *)
(* Our API is so constrained that this seems extremely difficult to implement
from the outside. So, we provide it, for the user's convenience. In fact,
even here, inside the abstraction, implementing this conversion is slightly
tricky. *)
let build dty f =
(* Accumulate a list of the fresh variables that we create. *)
let vs = ref [] in
(* [convert] converts a deep type to a variable. *)
let rec convert dty =
match dty with
| DeepVar v ->
v
| DeepStructure s ->
(* First recursively convert our children, then allocate a fresh
variable [v] to stand for the root. Record its existence in the
list [vs]. *)
let v = fresh () in
let s =
(* This definition modifies [vs], so it should not be inlined away. *)
S.map convert s in
vs := (v, s) :: !vs;
v
in
(* Convert the deep type [dty] and pass the variable that stands for its
root to the user function [f]. *)
let c = f (convert dty) in
(* Then, create a bunch of existential quantifiers, in an arbitrary order. *)
List.fold_left (fun rc (v, s) -> CExist (v, Some s, rc)) c !vs
(* -------------------------------------------------------------------------- *)
(* Equations. *)
let (--) v1 v2 =
CEq (v1, v2)
let (---) v t =
lift (--) v t
(* If [construct] was not exposed, [lift] could also be defined (outside this
module) in terms of [exist] and [---], as follows. This definition seems
slower, though; its impact on the test suite is quite large. *)
let _other_lift f v1 t2 =
exist (fun v2 ->
let+ () = v2 --- t2
and+ v = f v1 v2
in v
)
(* -------------------------------------------------------------------------- *)
(* Instantiation constraints. *)
let instance x v =
CInstance (x, v)
(* [instance_ x v] is equivalent to [let+ _ = instance x v in ()]. *)
let instance_ x v =
CMap (instance x v, ignore)
(* -------------------------------------------------------------------------- *)
(* Constraint abstractions. *)
(* The [CDef] form is so trivial that it deserves its own syntax. Viewing it
as a special case of [CLet] would be more costly (by a constant factor). *)
let def x v c =
CDef (x, v, c)
(* The auxiliary function [single] asserts that its argument [xs] is a
singleton list, and extracts its unique element. *)
let single xs =
match xs with
| [ x ] ->
x
| _ ->
assert false
let letrn
: 'client_tyvar list
-> tevar list
-> (('client_tyvar * variable) list
-> (tevar * variable) list
-> 'a co)
-> 'b co
-> (O.tyvar list * (tevar * scheme) list * 'a * 'b) co
= fun alphas xs f1 c2 ->
let rs = List.map (fun _a -> fresh ()) alphas in
(* For each term variable [x], create a fresh type variable [v], as in
[CExist]. *)
let xvs = List.map (fun x ->
x, fresh ()
) xs in
let c1 = f1 (List.combine alphas rs) xvs in
let+ (generalizable, ss, v1, v2) =
CLet (rs, xvs, c1, c2)
in
(generalizable, ss, v1, v2)
(* [letr1] is a special case of [letrn], where only one term variable
is bound. *)
let letr1
: 'client_tyvar list
-> tevar
-> (('client_tyvar * variable) list -> variable -> 'a co)
-> 'b co
-> (O.tyvar list * 'a * 'b) co
= fun alphas x f1 c2 ->
let+ (generalizable, _ss, v1, v2) =
letrn alphas [x]
(fun rzs xzs ->
let (_,z) = single xzs in
f1 rzs z)
c2
in
(generalizable, v1, v2)
(* The general form of [CLet] involves two constraints, the left-hand side and
the right-hand side, yet it defines a *family* of constraint abstractions,
which become bound to the term variables [xs]. *)
let letn xs f1 c2 =
let+ (generalizable, xss, s1, s2) =
letrn [] xs (fun _ xvs -> f1 (List.map snd xvs)) c2 in
let ss = List.map snd xss in
(generalizable, ss, s1, s2)
(* [let1] is a special case of [letn], where only one term variable is bound. *)
let let1 x f1 c2 =
let+ (generalizable, ss, v1, v2) =
letn [ x ] (fun vs -> f1 (single vs)) c2
in (generalizable, single ss, v1, v2)
(* [let0] is a special case of [letn], where no term variable is bound, and
the right-hand side is [CTrue]. The constraint produced by [let0] is [ok]
by construction; in other words, it is a suitable argument to [solve]. *)
let let0 c1 =
let+ (generalizable, _, v1, ()) =
letn [] (fun _ -> c1) CTrue
in (generalizable, v1)
(* -------------------------------------------------------------------------- *)
(* Correlation with the source code. *)
let correlate range c =
CRange (range, c)
(* -------------------------------------------------------------------------- *)
end (* Make *)