# sketchn.py

Below is the syntax highlighted version of sketchn.py from §A5 Appendix: NumPy.

```#-----------------------------------------------------------------------
# sketchn.py
#-----------------------------------------------------------------------

import sys
import stdio
import stdarray
import numpy

#-----------------------------------------------------------------------

class Sketch:

# Construct a new Sketch object which is a profile of string
# text. The profile should consist of a unit vector of dimension
# d. Element i of the vector should indicate how many k-grams
# in the file (or web page) hash to i.
def __init__(self, text, k, d):
freq = stdarray.create1D(d, 0)
for i in range(len(text) - k):
kgram = text[i:i+k]
h = hash(kgram)
freq[h % d] += 1
a = numpy.array(freq, float)
self._sketch = a / numpy.linalg.norm(a) # Unit vector

# Return the similarity measure between self and Sketch object
# other as a number between 0 and 1. 0 indicates that the
# objects are dissimilar; 1 indicates that they are similar.
def similarTo(self, other):
return self._sketch.dot(other._sketch)

# Return a string representation of self.
def __str__(self):
return str(self._sketch)

#-----------------------------------------------------------------------

# For testing.
# Accept integers k and d as command-line arguments. Read text from
# standard input, and construct a Sketch object from that text, k, and
# d. Write the Sketch object to standard output.

def main():
k = int(sys.argv[1])
d = int(sys.argv[2])
sketch = Sketch(text, k, d)
stdio.writeln(sketch)

if __name__ == '__main__':
main()

#-----------------------------------------------------------------------

# more genome20.txt
# ATAGATGCATAGCGCATAGC

# python sketch.py 2 16 < genome20.txt
# [0.37210420376762543, 0.37210420376762543, 0.49613893835683387,
# 0.0, 0.12403473458920847, 0.0,
# 0.0, 0.0, 0.0,
# 0.0, 0.24806946917841693, 0.0,
# 0.12403473458920847, 0.6201736729460423, 0.0, 0.0]

```