1.5. Sorting Lists via Insertion Sort
File:
InsertSort.ml
The task of finding the minimal and the second-minimal element in a list can be made much simpler and fast, if the list is pre-processed, namely, sorted. Indeed, for a sorted list we can just take it first or a second element, knowing that it will be what we need. Below, we will see our first implementation of sorting a list.
1.5.1. Insertion sort implementation
The following OCaml code implements the sorting procedure:
let insert_sort ls =
let rec walk xs acc =
match xs with
| [] -> acc
| h :: t ->
let rec insert elem prefix =
match prefix with
| [] -> [elem]
| h :: t as l ->
if h < elem
then h :: (insert elem t)
else (elem :: l)
in
let acc' = insert h acc in
walk t acc'
in
walk ls []
Notice that there are two recursive auxiliary function in it: walk
and insert
. They play the following roles:
The outer
walk
traverses the entire lists and for each next element, inserts it at a correct position to the prefix viainsert
, which is already assumed ordered.The inner
insert
traverses the sorted prefix (calledprefix
) and inserts an elementelem
to a correct position.
1.5.2. Correctness of sorting
In order to reason about the correctness of sorting, we first need to say what its specification is, i.e., what is a correctly sorted list. This notion is described by the following definition:
let rec sorted ls =
match ls with
| [] -> true
| h :: t -> List.for_all (fun e -> e >= h) t && sorted t
A list res
is a correctly sorted version of a list ls
if
it’s (a) sorted and (b) has all the same elements as res
, which we
can define as follows:
let same_elems ls1 ls2 =
List.for_all (fun e ->
List.find_all (fun e' -> e = e') ls2 =
List.find_all (fun e' -> e = e') ls1
) ls1 &&
List.for_all (fun e ->
List.find_all (fun e' -> e = e') ls2 =
List.find_all (fun e' -> e = e') ls1
) ls2
let sorted_spec ls res =
same_elems ls res && sorted res
With the following functions we can now test insertion sort:
let sort_test sorter ls =
let res = sorter ls in
sorted_spec ls res;;
# insert_sort [];;
- : 'a list = []
# sort_test insert_sort [];;
- : bool = true
# insert_sort [5; 7; 8; 42; 3; 3; 1];;
- : int list = [1; 3; 3; 5; 7; 8; 42]
# sort_test insert_sort [5; 7; 8; 42; 3; 3; 1];;
- : bool = true
1.5.3. Sorting invariants
Let us now make the intuition about the correctness of sorting formal,
capturing it in the form of specifications for the two recursive
functions it uses, walk
and insert
.
Since walk
is tail-recursive, we can get away without its
postcondition, and just specify the precondition, which is also its
invariant:
let insert_sort_walk_inv ls t acc =
sorted acc &&
same_elems (acc @ t) ls
The invariant insert_sort_walk_inv
ensures that the prefix acc
processed so far is sorted, and also that the concatenation of the
tail t
to be processed has the same elements as the original list
ls
.
The recursive procedure insert
is, unfortunately, not
tail-recursive, hence we will have to provide both the pre- and the
postcondition:
let insert_sort_insert_pre elem prefix = sorted prefix
let insert_sort_insert_post res elem prefix =
sorted res &&
same_elems res (elem :: prefix)
That is, whenever insert is run on a prefix
, it expects it to be sorted.
Once it finishes, it returns a sorted list res
, which has all alements of
prefix
, and also the inserted elem
. Let us notice that the postcondition
of insert
implies the precondition of walk
, at each recursive iteration.
Furthermore, the invariant of walk
becomes the correcntess specification of
the top-level sorting function, once t
becomes empty, i.e., in its base
case. We can now check all of those sepcifications by annotating the code with
them:
let insert_sort_with_inv ls =
let rec walk xs acc =
match xs with
| [] ->
let res = acc in
(* walk's postcondition *)
assert (sorted_spec ls res);
res
| h :: t ->
let rec insert elem remaining =
match remaining with
| [] ->
(* insert's postcondition *)
assert (insert_sort_insert_post [elem] elem remaining);
[elem]
| h :: t as l ->
if h < elem
then (
(* insert's precondition *)
assert (insert_sort_insert_pre elem t);
let res = insert elem t in
(* insert's postcondition *)
(assert (insert_sort_insert_post (h :: res) elem remaining);
h :: res))
else
let res = elem :: l in
(* insert's postcondition *)
(assert (insert_sort_insert_post res elem remaining);
res)
in
let acc' = (
(* insert's precondition *)
assert (insert_sort_insert_pre h acc);
insert h acc) in
(* walk's precondition *)
assert (insert_sort_walk_inv ls t acc');
walk t acc'
in
assert (insert_sort_walk_inv ls ls []);
walk ls []