Qcert.Basic.Data.RGroupBy


Section RGroupBy.


  Context {fdata:foreign_data}.

  Fixpoint add_in_groups (key: data) (d: data) (l: list (data × (list data))) : list (data × (list data)) :=
    match l with
    | nil(key, (d :: nil)) :: nil
    | (key', group) :: l'
      if data_eq_dec key key'
      then
        (key', d::group) :: l'
      else
        let l'' := add_in_groups key d l' in
        (key', group) :: l''
    end.


  Definition group_by_iter_eval (get_key: data option data) (l: list data) : option (list (data × (list data))) :=
    fold_right
      (fun d acc
         match acc with
         | Some acc'lift (fun kadd_in_groups k d acc') (get_key d)
         | NoneNone
         end)
      (Some nil) l.

  Definition group_by_iter_eval_alt (l: list (data × data)) : list (data × (list data)) :=
    fold_right
      (fun (d:data×data) accadd_in_groups (fst d) (snd d) acc)
      nil l.




  Definition key_is_eq (eval_key: data option data) (d1 d2:data) : option bool :=
    olift2 (fun x yif data_eq_dec x y then Some true else Some false)
           (eval_key d1)
           (eval_key d2).

  Definition key_is_eq_r (eval_key: data option data) (d1 d2:data) : option bool :=
    olift2 (fun x yif data_eq_dec x y then Some true else Some false)
           (eval_key d1)
           (Some d2).

  Lemma key_is_eq_with_project_eq sl d l :
    key_is_eq_r
      (fun d0 : data
         match d0 with
         | dunitNone
         | dnat _None
         | dbool _None
         | dstring _None
         | dcoll _None
         | drec rSome (drec (rproject r sl))
         | dleft _None
         | dright _None
         | dbrand _ _None
         | dforeign _None
         end) (drec l) d =
    Some (if data_eq_dec (drec (rproject l sl)) d then true else false).


  Definition group_of_key (eval_key: data option data) (k:data) (l: list data) :=
    (lift_filter (fun dkey_is_eq_r eval_key d k) l).

  Definition group_by_nested_eval (eval_key: data option data) (l: list data) : option (list (data × (list data))) :=
    let dupkeys := rmap (fun deval_key d) l in
    let keys := lift bdistinct dupkeys in
    olift (rmap (fun kolift (fun groupSome (k, group)) (group_of_key eval_key k l))) keys.

  Definition to_kv (l: list (data × list data)) :=
    map (fun xdrec (("key"%string,(fst x))::("value"%string,dcoll (snd x)) :: nil)) l.

  Definition group_by_nested_eval_kv (eval_key:data option data) (l: list data) : option (list data) :=
    lift to_kv (group_by_nested_eval eval_key l).


  Definition group_to_partitions (g:string) (group: data × list data) : option data :=
    match (fst group) with
    | drec keys
      Some (drec (rec_sort ((g,(dcoll (snd group)))::keys)))
    | _None
    end.

  Definition to_partitions (g:string) (l: list (data × list data)) :=
    lift_map (group_to_partitions g) l.

  Definition group_by_nested_eval_keys_partition
             (g:string) (eval_keys:data option data) (l: list data) : option (list data) :=
    olift (to_partitions g) (group_by_nested_eval eval_keys l).

  Section tableform.
    Definition group_by_nested_eval_table
               (g:string) (sl:list string) (l:list data) : option (list data) :=
      group_by_nested_eval_keys_partition
        g
        (fun d
           match d with
           | drec rSome (drec (rproject r sl))
           | _None
           end) l.

    Lemma group_of_key_over_table_correct sl d incoll :
      olift (fun group : list dataSome (dcoll group))
            (group_of_key
               (fun d : data
                  match d with
                  | dunitNone
                  | dnat _None
                  | dbool _None
                  | dstring _None
                  | dcoll _None
                  | drec rSome (drec (rproject r sl))
                  | dleft _None
                  | dright _None
                  | dbrand _ _None
                  | dforeign _None
                  end) d incoll)
      =
(olift
              (fun d1 : data
               lift_oncoll
                 (fun l2 : list datalift dcoll (rflatten l2)) d1)
              (lift dcoll
                 (rmap
                    (fun d1 : data
                     olift
                       (fun d0 : data
                        match d0 with
                        | dunitNone
                        | dnat _None
                        | dbool trueSome (dcoll (d1 :: nil))
                        | dbool falseSome (dcoll nil)
                        | dstring _None
                        | dcoll _None
                        | drec _None
                        | dleft _None
                        | dright _None
                        | dbrand _ _None
                        | dforeign _None
                        end)
                       (olift2
                          (fun d0 d2 : data
                           unbdata
                             (fun x y : data
                              if data_eq_dec x y then true else false) d0
                             d2)
                          match d1 with
                          | dunitNone
                          | dnat _None
                          | dbool _None
                          | dstring _None
                          | dcoll _None
                          | drec rSome (drec (rproject r sl))
                          | dleft _None
                          | dright _None
                          | dbrand _ _None
                          | dforeign _None
                          end (Some d))) incoll))).

    Lemma group_of_key_destruct_drec_inv g sl d l0 l1 incoll:
      match d with
      | dunitNone
      | dnat _None
      | dbool _None
      | dstring _None
      | dcoll _None
      | drec r2
        Some
          (drec
             (insertion_sort_insert rec_field_lt_dec
                                    (g, dcoll l1) (rec_sort r2)))
      | dleft _None
      | dright _None
      | dbrand _ _None
      | dforeign _None
      end = None
      olift (to_partitions g)
            (lift (fun t' : list (data × list data) ⇒ (d, l1) :: t')
                  (rmap
                     (fun k : data
                        olift (fun group : list dataSome (k, group))
                              (group_of_key
                                 (fun d : data
                                    match d with
                                    | dunitNone
                                    | dnat _None
                      | dbool _None
                      | dstring _None
                      | dcoll _None
                      | drec rSome (drec (rproject r sl))
                      | dleft _None
                      | dright _None
                      | dbrand _ _None
                      | dforeign _None
                                    end) k incoll)) l0)) = None.

    Lemma test l0 g sl l1 l2 incoll :
      olift (to_partitions g)
            (lift (fun t' : list (data × list data) ⇒ (drec l2, l1) :: t')
                  (rmap
                     (fun k : data
                        olift (fun group : list dataSome (k, group))
                              (group_of_key
                                 (fun d : data
                                    match d with
                                    | dunitNone
                                    | dnat _None
                                    | dbool _None
                                    | dstring _None
                                    | dcoll _None
                                    | drec rSome (drec (rproject r sl))
                                    | dleft _None
                                    | dright _None
                                    | dbrand _ _None
                                    | dforeign _None
                                    end) k incoll)) l0))
      =
      lift
        (fun t' : list data
           drec
             (insertion_sort_insert rec_field_lt_dec
                                    (g, dcoll l1) (rec_sort l2)) :: t')
        (rmap
           (fun d1 : data
              olift2
                (fun d0 d2 : data
                   match d0 with
                   | dunitNone
                   | dnat _None
                   | dbool _None
                   | dstring _None
                   | dcoll _None
                   | drec r1
                     match d2 with
                     | dunitNone
                     | dnat _None
                     | dbool _None
                     | dstring _None
                     | dcoll _None
                     | drec r2Some (drec (rec_sort (r1 ++ r2)))
                     | dleft _None
                     | dright _None
                     | dbrand _ _None
                     | dforeign _None
                     end
                   | dleft _None
                   | dright _None
                   | dbrand _ _None
                   | dforeign _None
                   end)
                (olift (fun d0 : dataSome (drec ((g, d0) :: nil)))
                       (olift
                          (fun d0 : data
                             lift_oncoll
                               (fun l3 : list datalift dcoll (rflatten l3)) d0)
                          (lift dcoll
                                (rmap
                                   (fun d0 : data
                                      olift
                                        (fun d2 : data
                                           match d2 with
                                           | dunitNone
                                           | dnat _None
                                           | dbool trueSome (dcoll (d0 :: nil))
                                           | dbool falseSome (dcoll nil)
                                           | dstring _None
                                           | dcoll _None
                                           | drec _None
                                           | dleft _None
                                           | dright _None
                                           | dbrand _ _None
                                           | dforeign _None
                                           end)
                                        (olift2
                                           (fun d2 d3 : data
                                              unbdata
                                                (fun x y : data
                                                   if data_eq_dec x y then true else false)
                                                d2 d3)
                                           match d0 with
                                           | dunitNone
                                           | dnat _None
                                           | dbool _None
                                           | dstring _None
                                           | dcoll _None
                                           | drec rSome (drec (rproject r sl))
                                           | dleft _None
                                           | dright _None
                                           | dbrand _ _None
                                           | dforeign _None
                                           end (Some d1))) incoll))))
                (Some d1)) l0).

    Lemma group_by_table_correct
          (g:string) (sl:list string)
          (incoll outcoll:list data):
      group_by_nested_eval_table g sl incoll = Some outcoll
      match
        olift (fun d1 : datarondcoll bdistinct d1)
              (lift dcoll
                    (rmap
                       (fun d1 : data
                          match d1 with
                          | dunitNone
                          | dnat _None
                          | dbool _None
                          | dstring _None
                          | dcoll _None
                          | drec rSome (drec (rproject r sl))
                          | dleft _None
                          | dright _None
                          | dbrand _ _None
                          | dforeign _None
                          end) incoll))
      with
      | Some dunitNone
      | Some (dnat _) ⇒ None
      | Some (dbool _) ⇒ None
      | Some (dstring _) ⇒ None
      | Some (dcoll c1) ⇒
        lift dcoll
             (rmap
                (fun d1 : data
                   olift2
                     (fun d0 d2 : data
                        match d0 with
                        | dunitNone
                        | dnat _None
                        | dbool _None
                        | dstring _None
                        | dcoll _None
                        | drec r1
                          match d2 with
                          | dunitNone
                          | dnat _None
                          | dbool _None
                          | dstring _None
                          | dcoll _None
                          | drec r2Some (drec (rec_sort (r1 ++ r2)))
                          | dleft _None
                          | dright _None
                          | dbrand _ _None
                          | dforeign _None
                          end
                        | dleft _None
                        | dright _None
                        | dbrand _ _None
                        | dforeign _None
                        end)
                     (olift (fun d0 : dataSome (drec ((g, d0) :: nil)))
                            (olift
                               (fun d0 : data
                                  lift_oncoll
                                    (fun l : list datalift dcoll (rflatten l)) d0)
                               (lift dcoll
                                     (rmap
                                        (fun d0 : data
                                           olift
                                             (fun d2 : data
                                                match d2 with
                                                | dunitNone
                                                | dnat _None
                                                | dbool trueSome (dcoll (d0 :: nil))
                                                | dbool falseSome (dcoll nil)
                                                | dstring _None
                                                | dcoll _None
                                                | drec _None
                                                | dleft _None
                                                | dright _None
                                                | dbrand _ _None
                                                | dforeign _None
                                                end)
                                             (olift2
                                                (fun d2 d3 : data
                                                   unbdata
                                                     (fun x y : data
                                                        if data_eq_dec x y
                                                        then true
                                                        else false) d2 d3)
                                                match d0 with
                                                | dunitNone
                                                | dnat _None
                                                | dbool _None
                                                | dstring _None
                                                | dcoll _None
                                                | drec rSome (drec (rproject r sl))
                                                | dleft _None
                                                | dright _None
                                                | dbrand _ _None
                                                | dforeign _None
                                                end (Some d1))) incoll))))
                     (Some d1)) c1)
      | Some (drec _) ⇒ None
      | Some (dleft _) ⇒ None
      | Some (dright _) ⇒ None
      | Some (dbrand _ _) ⇒ None
      | Some (dforeign _) ⇒ None
      | NoneNone
      end = Some (dcoll outcoll).

  End tableform.

  Section normalized.
    Context (h:brand_relation_t).

    Lemma bdistinct_normalized l :
      Forall (data_normalized h) l
      Forall (data_normalized h) (bdistinct l).

    Lemma rmap_rproject_normalized l l0 o :
      Forall (data_normalized h) l0
      (rmap
         (fun d : data
            match d with
            | dunitNone
            | dnat _None
            | dbool _None
            | dstring _None
            | dcoll _None
            | drec rSome (drec (rproject r l))
            | dleft _None
            | dright _None
            | dbrand _ _None
            | dforeign _None
            end) l0) = Some o
      Forall (data_normalized h) o.

    Lemma group_of_key_normalized a l l1 l2 :
      Forall (data_normalized h) l1
      group_of_key
           (fun d : data
            match d with
            | dunitNone
            | dnat _None
            | dbool _None
            | dstring _None
            | dcoll _None
            | drec rSome (drec (rproject r l))
            | dleft _None
            | dright _None
            | dbrand _ _None
            | dforeign _None
            end) a l1 = Some l2
      Forall (data_normalized h) l2.

    Lemma group_by_nested_eval_normalized l0 l o :
      Forall (data_normalized h) l0
      (group_by_nested_eval
         (fun d : data
            match d with
            | dunitNone
            | dnat _None
            | dbool _None
            | dstring _None
            | dcoll _None
            | drec rSome (drec (rproject r l))
            | dleft _None
            | dright _None
            | dbrand _ _None
            | dforeign _None
            end) l0) = Some o
      Forall (fun dddata_normalized h (fst dd)
                         Forall (data_normalized h) (snd dd)) o.

    Lemma group_to_partitions_normalized s a d :
      data_normalized h (fst a)
      Forall (data_normalized h) (snd a)
      group_to_partitions s a = Some d
      data_normalized h d.

    Lemma group_by_nested_eval_keys_partition_normalized l0 s l o :
      data_normalized h (dcoll l0)
      lift dcoll
           (group_by_nested_eval_keys_partition
              s
              (fun d : data
                 match d with
                 | dunitNone
                 | dnat _None
                 | dbool _None
                 | dstring _None
                 | dcoll _None
                 | drec rSome (drec (rproject r l))
                 | dleft _None
                 | dright _None
                 | dbrand _ _None
                 | dforeign _None
                 end) l0) = Some o
       data_normalized h o.

  End normalized.

End RGroupBy.