rem Name : metaphone12.sql rem from : TechRepublic / Scott Stephens rem date : 9 December 2004 rem version: 1.2 rem desc : Based on Metaphone algorithm, public domain. rem usage : select meta.phone('Demo') from dual ; rem rem ================================================================== rem Changes by Roelof Schierbeek: rem rem 25-sep-05 RS: If no input length, return 'NO INPUT Value'. rem 26-sep-05 RS: changes in rules table: REM old : D > T, G > K, TH> 0, W > null rem new : D > D, G > G, TH> T, W > W rem Also removed all G* > J (G sounds like J in english, not in Dutch) rem And added rem add_rule(-1,'GH',null); versteegh=versteeg rem add_rule(-1,'DT',null); Smidt = Smid rem =================================================================== create or replace package meta is function phone(s varchar2) return varchar2; end; / show errors; create or replace package body meta is type rule_rec is record ( f_off integer, f_match varchar2(10), f_result varchar2(10) ); type rule_tab is table of rule_rec index by binary_integer; g_rules rule_tab; g_vowels constant varchar2(5) := 'AEIOU'; -- function cmp_rule ( p_rule rule_rec, p_off pls_integer, p_str varchar2 ) return boolean; -- function phone (s varchar2) return varchar2 is l_s varchar2(32767); l_r varchar2(32767); begin -- If no length, return IF s IS NULL THEN RETURN 'NO INPUT Value'; end if; -- get rid of non-alpha characters for i in 1..length(s) loop if upper(substr(s,i,1)) between 'A' and 'Z' then l_s := l_s || upper(substr(s,i,1)); end if; end loop; if l_s is null then return null; end if; -- clean up the word a bit if l_s like 'AE%' or l_s like 'PN%' or l_s like 'WR%' then l_s := substr(l_s,2); -- drop first letter elsif l_s like 'X%' then l_s := 'S' || substr(l_s,2); -- change to S elsif l_s like 'WH%' then l_s := 'W' || substr(l_s,2); -- change to W end if; -- remove all vowels defined in g_vowels for i in 1..length(l_s) loop if i > 1 and substr(l_s,i-1,1) = substr(l_s,i,1) and substr(l_s,i,1) != 'C' then -- drop the second of doubled letter except C null; elsif instr(g_vowels,substr(l_s,i,1)) > 0 then -- ignore all except initial vowels if i = 1 then l_r := l_r || substr(l_s,i,1); end if; elsif substr(l_s,i,2) = 'GH' and i < length(l_s)-2 and instr(g_vowels,substr(l_s,i+1,1)) = 0 then -- a difficult rule: -- silent if in "gh" and not at end or before a vowel null; else -- scan for word patterns for r in 1..g_rules.count loop if cmp_rule(g_rules(r),i,l_s) then l_r := l_r || g_rules(r).f_result; exit; end if; end loop; end if; end loop; return l_r; end phone; -- initialize the rules table procedure add_rule(p_off integer,p_match varchar2,p_result varchar2) is l_rule_rec rule_rec; begin l_rule_rec.f_off := p_off; l_rule_rec.f_match := p_match; l_rule_rec.f_result := p_result; g_rules(g_rules.count+1) := l_rule_rec; end add_rule; -- Compare rule to string. function cmp_rule ( p_rule rule_rec, p_off pls_integer, p_str varchar2 ) return Boolean is ch1 char; ch2 char; begin if p_rule.f_match like '%c%' or p_rule.f_match like '%v%' then for i in 1..length(p_rule.f_match) loop ch1 := substr(p_rule.f_match,i,1); ch2 := substr(p_str,p_off+p_rule.f_off+i,1); if ch2 is null then return false; end if; if ch1 = 'v' then if instr(g_vowels,ch2) = 0 then return false; end if; elsif ch1 = 'c' then if instr(g_vowels,ch2) != 0 then return false; end if; elsif ch1 != ch2 then return false; end if; end loop; return true; end if; if p_off <= -p_rule.f_off then return false; end if; return p_rule.f_match = substr(p_str,p_off+p_rule.f_off,length(p_rule.f_match)); end cmp_rule; begin -- initialize the rules table -- order is important add_rule(-1,'MB',null); add_rule(0,'B','B') ; add_rule(0,'CIA','X'); add_rule(0,'CH','X'); add_rule(0,'CI','S'); add_rule(0,'CE','S'); add_rule(0,'CY','S'); add_rule(0,'C','K') ; add_rule(-1,'DT',null); add_rule(0,'D','D') ; add_rule(0,'F','F') ; add_rule(-1,'GH',null); add_rule(0,'G','G') ; add_rule(-1,'vHc',null); add_rule(-1,'CH',null); add_rule(-1,'PH',null); add_rule(-1,'SH',null); add_rule(-1,'TH',null); add_rule(0,'H','H'); add_rule(0,'J','J'); add_rule(-1,'CK',null); add_rule(0,'K','K'); add_rule(0,'L','L'); add_rule(0,'M','M'); add_rule(0,'N','N'); add_rule(0,'PH','F'); add_rule(0,'P','P'); add_rule(0,'Q','K'); add_rule(0,'R','R'); add_rule(0,'SH','X'); add_rule(0,'SIO','X'); add_rule(0,'SIA','X'); add_rule(0,'S','S'); add_rule(0,'TIA','X'); add_rule(0,'TIO','X'); add_rule(0,'TH','T'); add_rule(0,'TCH',null); add_rule(0,'T','T'); add_rule(0,'V','F'); add_rule(0,'Wv','W'); add_rule(0,'W','W'); add_rule(0,'X','KS'); add_rule(0,'Yv','Y'); add_rule(0,'Y',null); add_rule(0,'Z','S'); end meta ; / show errors;