#----------------------------------------------------------------------- # index.py #----------------------------------------------------------------------- import sys import stdio from bst import OrderedSymbolTable # Accept integers minLength and minCount as command-line arguments. Read # words from standard input until end-of-file. Create an index # indicating where each word appears within standard input. Consider # only words that have at least minLength characters. Then write # the index to standard output. Write only words that occur at # least minCount times. minLength = int(sys.argv[1]) minCount = int(sys.argv[2]) words = stdio.readAllStrings() bst = OrderedSymbolTable() for i in range(len(words)): word = words[i] if len(word) >= minLength: if not word in bst: bst[word] = [] bst[word] += [i] for word in bst: occurrences = bst[word] if len(occurrences) >= minCount: stdio.write(word + ': ') for occurrence in occurrences: stdio.write(str(occurrence) + ' ') stdio.writeln() #----------------------------------------------------------------------- # python index.py 9 30 < tale.txt # confidence: 2794 23064 25031 34249 47907 48268 48577 ... # courtyard: 11885 12062 17303 17451 32404 32522 38663 ... # evremonde: 86211 90791 90798 90802 90814 90822 90856 ... # expression: 3777 5575 6574 7116 7195 8509 8928 15015 ... # gentleman: 2521 5290 5337 5698 6235 6301 6326 6338 ... # influence: 27809 36881 43141 43150 48308 54049 54067 ... # monseigneur: 85 90 36587 36590 36611 36636 36643 36680 ... # something: 3406 3765 9283 13234 13239 15245 20257 23642 ... # sometimes: 4514 4530 4548 6082 20731 33883 34239 34283 ... # vengeance: 56041 63943 67705 79351 79941 79945 80225 ... # python index.py 9 30 < mobydick.txt # carpenter: 109292 155782 162820 171660 171725 171861 171947 ... # completely: 8108 8372 8598 22809 28234 29930 37421 37854 ... # concerning: 7430 8432 21577 21854 25019 30606 37510 41442 ... # considering: 1650 14156 18786 18799 28664 28870 29013 78282 ... # creatures: 1247 13400 27886 30780 52474 71084 77843 81477 ... # especially: 101 3823 4358 6110 18798 21561 22148 24163 28175 ... # forecastle: 1275 1703 1751 36956 44449 46220 53057 55574 ... # gentlemen: 59297 61808 76405 90255 90323 90621 90932 91090 ... # greenland: 44300 48647 48673 48686 48782 48816 49899 49918 ... # harpooneer: 4677 4695 4931 4945 4975 5002 5643 5721 5740 ... # harpooneers: 4630 7218 7357 7675 11427 12418 34048 39082 ... # instances: 27550 53904 54261 65156 66402 66927 69646 69795 ... # intervals: 19909 29961 38130 64917 67511 71903 72680 77582 ... # leviathan: 2459 4028 27970 40852 48300 49118 49131 49265 ... # nantucket: 2298 2368 2410 2417 2441 2464 3126 13590 13759 ... # nevertheless: 3258 10720 13514 28881 38164 38865 42271 45272 ... # particular: 25 1474 2644 3115 10024 10676 19759 21725 24733 ... # something: 1052 1086 1900 2376 3869 3997 6359 7451 8332 ... # steelkilt: 90568 90936 91063 91088 91579 91622 91652 91654 ... # substance: 13699 21710 49813 77803 97580 97812 102060 107304... # themselves: 1069 1595 23815 37572 37880 40274 40296 40913 ... # therefore: 13216 13755 17592 28923 31122 32187 37859 39661 ...