- markup (52)
- xml (7)
- xslt (21)
- atom (8)
- overlapping markup (2)
- schema (9)
- creole (4)
- xforms (1)
- pipelines (7)
- coding (2)
- dtll (1)
- genealogy (3)
- gtd (1)
- hardware (1)
- legislation (1)
- ontologies (2)
- unicode (1)
- web (24)
- google (3)
- rdf (6)
- rest (3)
- wikis (1)
- work (1)
- xpath (1)
- xquery (1)
- xtech2008 (3)
- life (26)
- children (5)
- equality (6)
- environment (4)
- gadgets (5)
- software (3)
- xlinq (2)
- conferences (7)
- xtech (6)
- blog (7)
- drupal (3)
Re: Levenshtein distance in XSLT 2.0
Oh well here’s the O(M*N) one, pretty similar to yours I think, and hits the same limits, I don’t think it should really as it’s tail recursive. Of course, normally I would have fully documented the code, but as I was so hurt by your remarks in the regex post, this is just unadorned xslt….
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:d="data:,dpc"> <xsl:param name="debug"/> <xsl:template name="test"> <xsl:sequence select="d:levenshteintest('0123456789001234567890','01234567890ABCDEFGHIJ')"/> <xsl:sequence select="d:levenshteintest('GUMBO','GAMBOL')"/> <xsl:sequence select="d:levenshteintest('GUMBO','GAMBOL')"/> <xsl:sequence select="d:levenshteintest('GUMBO','')"/> <xsl:sequence select="d:levenshteintest('','GAMBOL')"/> </xsl:template> <xsl:function name="d:levenshteintest"> <xsl:param name="s" as="xs:string"/> <xsl:param name="t" as="xs:string"/> <xsl:text> distance( "</xsl:text> <xsl:value-of select="$s"/> <xsl:text>" , "</xsl:text> <xsl:value-of select="$t"/> <xsl:text>" ) = </xsl:text> <xsl:value-of select="d:levenshtein($s,$t)"/> </xsl:function> <xsl:function name="d:levenshtein"> <xsl:param name="s" as="xs:string"/> <xsl:param name="t" as="xs:string"/> <xsl:variable name="n" select="string-length($s)"/> <xsl:variable name="m" select="string-length($t)"/> <xsl:variable name="ss" select="string-to-codepoints($s)"/> <xsl:variable name="tt" select="string-to-codepoints($t)"/> <xsl:sequence select="if ($n=0) then $m else if ($m=0) then $n else d:levenshtein2( $m, $n, (for $i in (0 to $n) return $i, for $j in (1 to $m) return ($j, for $i in (1 to $n) return (if($ss[$i]=$tt[$j]) then 0 else 1))), $n+3) "/> </xsl:function> <xsl:function name="d:levenshtein2"> <xsl:param name="m" as="xs:integer"/> <xsl:param name="n" as="xs:integer"/> <xsl:param name="a" as="xs:integer+"/> <xsl:param name="p" as="xs:integer"/> <xsl:if test="$debug='yes'"> <xsl:value-of select="$m,$n"/> ====================================== <xsl:for-each select="0 to ($m)"> <xsl:text> </xsl:text> <xsl:value-of select="for $x in subsequence($a,1+ . * ($n+1),$n+1) return format-number($x,'00')"/> </xsl:for-each> ====================================== </xsl:if> <xsl:choose> <xsl:when test="$p=2+($n+1)*($m+1)"> <xsl:sequence select="$a[last()]"/> </xsl:when> <xsl:otherwise> <xsl:sequence select="d:levenshtein2($m,$n, ($a[position()<$p], min(( $a[position()=$p - ($n+1)]+1, $a[position()=$p - 1] + 1, $a[position()=$p - ($n+2)]+$a[position()=$p] )), $a[position()>$p]), if($p mod ($n+1) = 0) then $p+2 else $p+1) "/> </xsl:otherwise> </xsl:choose> </xsl:function> </xsl:stylesheet> $ saxon8 -it test levenshtein.xsl distance( "0123456789001234567890" , "01234567890ABCDEFGHIJ" ) = 11 distance( "GUMBO" , "GAMBOL" ) = 2 distance( "GUMBO" , "GAMBOL" ) = 2 distance( "GUMBO" , "" ) = 5 distance( "" , "GAMBOL" ) = 6David