rem Name : Metaphone10.sql rem from : Scott Stephens rem date : 9 December 2004 rem version: 1.0 rem rem Note: rem This is the original source by Scott Stephens, published on TechRepublic. rem Article: Consider algorithms other than Soundex rem ========================================================================= rem Changes: rem 25-sep-05 R.Schierbeek: If no input length, return 'NO INPUT Value'. rem ========================================================================= create or replace package meta is function phone(s varchar2) return varchar2; end ; / show errors; create or replace package body meta is type rule_rec is record ( f_off integer, f_match varchar2(10), f_result varchar2(10) ); type rule_tab is table of rule_rec index by binary_integer; g_rules rule_tab; g_vowels constant varchar2(5) := 'AEIOU'; -- function cmp_rule ( p_rule rule_rec, p_off pls_integer, p_str varchar2 ) return boolean; -- function phone(s varchar2) return varchar2 is l_s varchar2(32767); l_r varchar2(32767); begin -- If no length, return IF s IS NULL THEN RETURN 'NO INPUT Value'; end if; -- get rid of non-alpha characters for i in 1..length(s) loop if upper(substr(s,i,1)) between 'A' and 'Z' then l_s := l_s || upper(substr(s,i,1)); end if; end loop; if l_s is null then return null; end if; -- clean up the word a bit if l_s like 'AE%' or l_s like 'GN%' or l_s like 'KN%' or l_s like 'PN%' or l_s like 'WR%' then l_s := substr(l_s,2); -- drop first letter elsif l_s like 'X%' then l_s := 'S' || substr(l_s,2); -- change to S elsif l_s like 'WH%' then l_s := 'W' || substr(l_s,2); -- change to W end if; for i in 1..length(l_s) loop if i > 1 and substr(l_s,i-1,1) = substr(l_s,i,1) and substr(l_s,i,1) != 'C' then -- drop the second of doubled letter except C null; elsif instr(g_vowels,substr(l_s,i,1)) > 0 then -- ignore all except initial vowels if i = 1 then l_r := l_r || substr(l_s,i,1); end if; elsif substr(l_s,i,2) = 'GH' and i < length(l_s)-2 and instr(g_vowels,substr(l_s,i+1,1)) = 0 then -- a difficult rule: -- silent if in "gh" and not at end or before a vowel null; else -- scan for word patterns for r in 1..g_rules.count loop if cmp_rule(g_rules(r),i,l_s) then l_r := l_r || g_rules(r).f_result; exit; end if; end loop; end if; end loop; return l_r; end phone; -- IN/OUT procedure add_rule(p_off integer,p_match varchar2,p_result varchar2) is l_rule_rec rule_rec; begin l_rule_rec.f_off := p_off; l_rule_rec.f_match := p_match; l_rule_rec.f_result := p_result; g_rules(g_rules.count+1) := l_rule_rec; end add_rule; -- IN/OUT function cmp_rule ( p_rule rule_rec, p_off pls_integer, p_str varchar2 ) return Boolean is ch1 char; ch2 char; begin if p_rule.f_match like '%c%' or p_rule.f_match like '%v%' then for i in 1..length(p_rule.f_match) loop ch1 := substr(p_rule.f_match,i,1); ch2 := substr(p_str,p_off+p_rule.f_off+i,1); if ch2 is null then return false; end if; if ch1 = 'v' then if instr(g_vowels,ch2) = 0 then return false; end if; elsif ch1 = 'c' then if instr(g_vowels,ch2) != 0 then return false; end if; elsif ch1 != ch2 then return false; end if; end loop; return true; end if; if p_off <= -p_rule.f_off then return false; end if; return p_rule.f_match = substr(p_str,p_off+p_rule.f_off,length(p_rule.f_match)); end cmp_rule; begin -- initialize the rules table -- order is important add_rule(-1,'MB',null); add_rule(0,'B','B'); add_rule(0,'CIA','X'); add_rule(0,'CH','X'); add_rule(0,'CI','S'); add_rule(0,'CE','S'); add_rule(0,'CY','S'); add_rule(0,'C','K'); add_rule(0,'DGE','J'); add_rule(0,'DGY','J'); add_rule(0,'DGI','J'); add_rule(0,'D','T'); add_rule(0,'F','F'); add_rule(0,'GN',null); add_rule(0,'GNED',null); add_rule(0,'GI','J'); add_rule(0,'GE','J'); add_rule(0,'GY','J'); add_rule(0,'G','K'); add_rule(-1,'vHc',null); add_rule(-1,'CH',null); add_rule(-1,'PH',null); add_rule(-1,'SH',null); add_rule(-1,'TH',null); add_rule(0,'H','H'); add_rule(0,'J','J'); add_rule(-1,'CK',null); add_rule(0,'K','K'); add_rule(0,'L','L'); add_rule(0,'M','M'); add_rule(0,'N','N'); add_rule(0,'PH','F'); add_rule(0,'P','P'); add_rule(0,'Q','K'); add_rule(0,'R','R'); add_rule(0,'SH','X'); add_rule(0,'SIO','X'); add_rule(0,'SIA','X'); add_rule(0,'S','S'); add_rule(0,'TIA','X'); add_rule(0,'TIO','X'); add_rule(0,'TH','0'); add_rule(0,'TCH',null); add_rule(0,'T','T'); add_rule(0,'V','F'); add_rule(0,'Wv','W'); add_rule(0,'W',null); add_rule(0,'X','KS'); add_rule(0,'Yv','Y'); add_rule(0,'Y',null); add_rule(0,'Z','S'); end meta; / show errors;