1
2
3
4
5
6 """
7 This module implements the Lowess function for nonparametric regression.
8
9 Functions:
10 lowess Fit a smooth nonparametric regression curve to a scatterplot.
11
12 For more information, see
13
14 William S. Cleveland: "Robust locally weighted regression and smoothing
15 scatterplots", Journal of the American Statistical Association, December 1979,
16 volume 74, number 368, pp. 829-836.
17
18 William S. Cleveland and Susan J. Devlin: "Locally weighted regression: An
19 approach to regression analysis by local fitting", Journal of the American
20 Statistical Association, September 1988, volume 83, number 403, pp. 596-610.
21 """
22
23 import numpy
24
25 try:
26 from Bio.Cluster import median
27
28
29 except ImportError, x:
30
31 from numpy import median
32
33 -def lowess(x, y, f=2./3., iter=3):
34 """lowess(x, y, f=2./3., iter=3) -> yest
35
36 Lowess smoother: Robust locally weighted regression.
37 The lowess function fits a nonparametric regression curve to a scatterplot.
38 The arrays x and y contain an equal number of elements; each pair
39 (x[i], y[i]) defines a data point in the scatterplot. The function returns
40 the estimated (smooth) values of y.
41
42 The smoothing span is given by f. A larger value for f will result in a
43 smoother curve. The number of robustifying iterations is given by iter. The
44 function will run faster with a smaller number of iterations.
45
46 x and y should be numpy float arrays of equal length. The return value is
47 also a numpy float array of that length.
48
49 e.g.
50 >>> import numpy
51 >>> x = numpy.array([4, 4, 7, 7, 8, 9, 10, 10, 10, 11, 11, 12, 12, 12,
52 ... 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 16, 16,
53 ... 17, 17, 17, 18, 18, 18, 18, 19, 19, 19, 20, 20, 20, 20,
54 ... 20, 22, 23, 24, 24, 24, 24, 25], numpy.float)
55 >>> y = numpy.array([2, 10, 4, 22, 16, 10, 18, 26, 34, 17, 28, 14, 20, 24,
56 ... 28, 26, 34, 34, 46, 26, 36, 60, 80, 20, 26, 54, 32, 40,
57 ... 32, 40, 50, 42, 56, 76, 84, 36, 46, 68, 32, 48, 52, 56,
58 ... 64, 66, 54, 70, 92, 93, 120, 85], numpy.float)
59 >>> result = lowess(x, y)
60 >>> len(result)
61 50
62 >>> print "[%0.2f, ..., %0.2f]" % (result[0], result[-1])
63 [4.85, ..., 84.98]
64 """
65 n = len(x)
66 r = int(numpy.ceil(f*n))
67 h = [numpy.sort(abs(x-x[i]))[r] for i in range(n)]
68 w = numpy.clip(abs(([x]-numpy.transpose([x]))/h),0.0,1.0)
69 w = 1-w*w*w
70 w = w*w*w
71 yest = numpy.zeros(n)
72 delta = numpy.ones(n)
73 for iteration in range(iter):
74 for i in xrange(n):
75 weights = delta * w[:,i]
76 weights_mul_x = weights * x
77 b1 = numpy.dot(weights,y)
78 b2 = numpy.dot(weights_mul_x,y)
79 A11 = sum(weights)
80 A12 = sum(weights_mul_x)
81 A21 = A12
82 A22 = numpy.dot(weights_mul_x,x)
83 determinant = A11*A22 - A12*A21
84 beta1 = (A22*b1-A12*b2) / determinant
85 beta2 = (A11*b2-A21*b1) / determinant
86 yest[i] = beta1 + beta2*x[i]
87 residuals = y-yest
88 s = median(abs(residuals))
89 delta[:] = numpy.clip(residuals/(6*s),-1,1)
90 delta[:] = 1-delta*delta
91 delta[:] = delta*delta
92 return yest
93
95 """Run the Bio.Statistics.lowess module's doctests."""
96 print "Running doctests..."
97 import doctest
98 doctest.testmod()
99 print "Done"
100
101 if __name__ == "__main__":
102 _test()
103