{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "

Working with Haskell on Jupyter: Levenshtein

" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Hello" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "putStrLn \"Hello\"" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "Failed to load interface for ‘Data.Vector.Unboxed’" ], "text/plain": [ "Failed to load interface for ‘Data.Vector.Unboxed’\n", "Use -v to see a list of the files searched for." ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "\n", "import System.Environment\n", "import qualified Data.Vector.Unboxed as V\n", "\n", "-- nextrow uses the previous row of the table to build the next row\n", "-- temp lets us access the cell to the left\n", "nextrow :: V.Vector Char -> V.Vector Char -> V.Vector Int -> Int -> V.Vector Int\n", "nextrow s t previousRow i =\n", " let\n", " -- i and j index into the table\n", " -- but s!0 aligns with row 1 down the side, \n", " -- and t!0 aligns with column 1 across the top\n", " cost j = if (s V.! (i-1)) == (t V.! (j-1)) then 0 else 1\n", " tlen = V.length t\n", " doEachCell cellToLeft j = \n", " if (j==0) then i \n", " else min (cellToLeft+1) (min (cost j + previousRow V.! (j-1)) (1+previousRow V.! j))\n", " in V.scanl (doEachCell) i (V.fromList [1..tlen])\n", "\n", "lastRow :: String -> String -> V.Vector Int\n", "lastRow s t =\n", " let\n", " svec = V.fromList s\n", " tvec = V.fromList t\n", " previousRow = V.fromList [0..length t]\n", " allRows = V.fromList [1..length s]\n", " in\n", " V.foldl (nextrow svec tvec) previousRow allRows\n", "\n", "main = do\n", " -- best to use only IO and let statements in main\n", "{- for testing, with hard-coded strings -}\n", "{-\n", " let s = \"rattle\"\n", " let t = \"scattered\"\n", " putStrLn (\"string \"++show(s)++\" is of length \"++show(length s))\n", " putStrLn (\"string \"++show(t)++\" is of length \"++show(length t))\n", " let answerRow = lastRow s t\n", " putStrLn (\"Levenshtein distance is last entry in \"++show(answerRow))\n", " putStrLn (\"Levenshtein distance is \"++show(V.last answerRow))\n", "\n", " let s2 = \"scattered\"\n", " let t2 = \"rattle\"\n", " putStrLn (\"string \"++show(s2)++\" is of length \"++show(length s2))\n", " putStrLn (\"string \"++show(t2)++\" is of length \"++show(length t2))\n", " let answerRow2 = lastRow s2 t2\n", " putStrLn (\"Levenshtein distance is last entry in \"++show(answerRow2))\n", " putStrLn (\"Levenshtein distance is \"++show(V.last answerRow2))\n", "-}\n", "\n", "{- for normal use, with two text files -}\n", " [f1,f2] <- getArgs\n", " s <- readFile f1\n", " putStrLn (\"file \"++show(f1)++\" is of length \"++show(length s))\n", " t <- readFile f2\n", " putStrLn (\"file \"++show(f2)++\" is of length \"++show(length t))\n", " let answer = V.last (lastRow s t)\n", " putStrLn (\"Levenshtein distance is \"++show(answer))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Haskell", "language": "haskell", "name": "haskell" }, "language_info": { "codemirror_mode": "ihaskell", "file_extension": ".hs", "name": "haskell", "version": "7.10.3" } }, "nbformat": 4, "nbformat_minor": 0 }