Beautiful Code Programming Praxis.
-
let match_re re text =
-
let len = String.length in
-
let get = String.get in
-
let remain str o = String.sub str o ((len str)-o) in
-
let next_ch ch o_re =
-
if len re > (o_re+1) then
-
if get re (o_re+1) = ch then true
-
else false
-
else false
-
in
-
let curr_ch ch o_re =
-
if len re > o_re then
-
if get re o_re = ch then true
-
else false
-
else false
-
in
-
let rec match_re_star c o_re o_text =
-
(* Printf.printf "match_re_star c:%c re:%s text:%s\n" c (remain re o_re) (remain text o_text); *)
-
if match_re_here o_re o_text then true
-
else if len text != o_text && ((get text o_text) = c || c = '.') then
-
match_re_star c o_re (o_text+1)
-
else
-
false
-
and match_re_here o_re o_text =
-
(* Printf.printf "match_re_here re:%s text:%s\n" (remain re o_re) (remain text o_text); *)
-
if len re = o_re then true
-
else if next_ch '*' o_re then
-
match_re_star (get re o_re) (o_re+2) o_text
-
else if get re o_re = '$' && len re = o_re+1 then
-
len text = o_text
-
else if len text != o_text && (get re o_re = '.' || get re o_re = get text o_text) then
-
match_re_here (o_re+1) (o_text+1)
-
else
-
false
-
and match_re_main o_re o_text =
-
(* Printf.printf "match_re_main re:%s text:%s\n" (remain re o_re) (remain text o_text); *)
-
if match_re_here o_re o_text then true
-
else if len text = o_text then false
-
else match_re_main o_re (o_text+1)
-
in
-
if curr_ch '^' 0 then
-
match_re_here 1 0
-
else match_re_main 0 0
-
-
let match_re_test () =
-
assert ((match_re "a" "a")=true);
-
assert ((match_re "a" "b")=false);
-
assert ((match_re "a" "ab")=true);
-
assert ((match_re "a" "ba")=true);
-
assert ((match_re "ab" "ab")=true);
-
assert ((match_re "ab" "ba")=false);
-
assert ((match_re "ab" "xab")=true);
-
assert ((match_re "ab" "aab")=true);
-
assert ((match_re "a.c" "ac")=false);
-
assert ((match_re "a.c" "abc")=true);
-
assert ((match_re "a.c" "xac")=false);
-
assert ((match_re "a.c" "xabcx")=true);
-
assert ((match_re "^ab" "ab")=true);
-
assert ((match_re "^ab" "ba")=false);
-
assert ((match_re "^ab" "aab")=false);
-
assert ((match_re "^ab" "abc")=true);
-
assert ((match_re "ab$" "ab")=true);
-
assert ((match_re "ab$" "ba")=false);
-
assert ((match_re "ab$" "aab")=true);
-
assert ((match_re "ab$" "abc")=false);
-
assert ((match_re "^ab$" "ab")=true);
-
assert ((match_re "^ab$" "ba")=false);
-
assert ((match_re "^ab$" "abc")=false);
-
assert ((match_re "^ab$" "aba")=false);
-
assert ((match_re "a.*c" "ac")=true);
-
assert ((match_re "a.*c" "abc")=true);
-
assert ((match_re "a.*c" "abbc")=true);
-
assert ((match_re "a.*c" "cbba")=false);
-
assert ((match_re "aa*" "x")=false);
-
assert ((match_re "aa*" "a")=true);
-
assert ((match_re "aa*" "aa")=true);
-
assert ((match_re "aa*" "ba")=true);
-
assert ((match_re "a*a*a" "a")=true);
-
assert ((match_re "a*a*a" "aaa")=true);
-
assert ((match_re "a*a*a" "xxxxx")=false)
-
-
let _ = match_re_test () (* no news is good news *)
OCaml translation of the regex match code in C.
I tried to beautify the code but it looks quite dirty due to the string operations. It will become more beautiful if we use a list of characters instead of native string as Scheme does in the suggested solution.

