## How to plot less than 100 points for bivariate spline in scipy.interpolate.spline - python - python

def smooth(points):
points = np.array(points)
# get x and y vectors
x = points[:,0]
y = points[:,1]
t = np.arange(x.shape[0], dtype=float)
t /= t[-1]
nt = np.linspace(0, 1, 100)
t = np.zeros(x.shape)
t[1:] = np.sqrt((x[1:] - x[:-1])**2 + (y[1:] - y[:-1])**2)
t = np.cumsum(t)
t /= t[-1]
x2 = scipy.interpolate.spline(t, x, nt)
y2 = scipy.interpolate.spline(t, y, nt)
return np.array(x2),np.array(y2)
The above function returns 100 points forming a spline but i require less than 100 points giving me the same spline.
Thanks in advance:)

## Related

### Python: heat density plot in a disk

My goal is to make a density heat map plot of sphere in 2D. The plotting code below the line works when I use rectangular domains. However, I am trying to use the code for a circular domain. The radius of sphere is 1. The code I have so far is: from pylab import * import numpy as np from matplotlib.colors import LightSource from numpy.polynomial.legendre import leggauss, legval xi = 0.0 xf = 1.0 numx = 500 yi = 0.0 yf = 1.0 numy = 500 def f(x): if 0 <= x <= 1: return 100 if -1 <= x <= 0: return 0 deg = 1000 xx, w = leggauss(deg) L = np.polynomial.legendre.legval(xx, np.identity(deg)) integral = (L * (f(x) * w)[None,:]).sum(axis = 1) c = (np.arange(1, 500) + 0.5) * integral[1:500] def r(x, y): return np.sqrt(x ** 2 + y ** 2) theta = np.arctan2(y, x) x, y = np.linspace(0, 1, 500000) def T(x, y): return (sum(r(x, y) ** l * c[:,None] * np.polynomial.legendre.legval(xx, identity(deg)) for l in range(1, 500))) T(x, y) should equal the sum of c the coefficients times the radius as a function of x and y to the l power times the legendre polynomial where the argument is of the legendre polynomial is cos(theta). In python: integrating a piecewise function, I learned how to use the Legendre polynomials in a summation but that method is slightly different, and for the plotting, I need a function T(x, y). This is the plotting code. densityinterpolation = 'bilinear' densitycolormap = cm.jet densityshadedflag = False densitybarflag = True gridflag = True plotfilename = 'laplacesphere.eps' x = arange(xi, xf, (xf - xi) / (numx - 1)) y = arange(yi, yf, (yf - yi) / (numy - 1)) X, Y = meshgrid(x, y) z = T(X, Y) if densityshadedflag: ls = LightSource(azdeg = 120, altdeg = 65) rgb = ls.shade(z, densitycolormap) im = imshow(rgb, extent = [xi, xf, yi, yf], cmap = densitycolormap) else: im = imshow(z, extent = [xi, xf, yi, yf], cmap = densitycolormap) im.set_interpolation(densityinterpolation) if densitybarflag: colorbar(im) grid(gridflag) show() I made the plot in Mathematica for reference of what my end goal is

If you set the values outside of the disk domain (or whichever domain you want) to float('nan'), those points will be ignored when plotting (leaving them in white color).

### Python how to interpolate large point clouds of 2D data

So let's say I have a point cloud of data in the form of Z = f(X, Y) The problem is that I have millions of points, with data that is extremely fine in some (X,Y) regions and extremely sparse in other regions. Ideally the interpolated solution needs to be continuous, and as smooth as possible. The application is for finite element analysis. I've tried: Instead of interpolating I use a KDTree to average nearest nodes. This works very well for points in the fine region but not so well at sparse regions, because discontinuities in the result may arise. scipy.interpolate.XXX - 2d functions run into memory error. scipy libraries aren't capable of interpolating large numbers of points. I'm thinking the best way is some sort of hacked up combination of KDTree average nearest nodes and then some sort of interpolation for far away points, but I'm thinking that interpolating millions of points ought to be solved problem... Anybody have any good ideas on what to do?

So to interpolate arbitrarily large point clouds I wrote a piece of code to partition data into smaller chunks. It's not the best piece of code but will be available for those too lazy to write their own. import scipy.interpolate from scipy.interpolate import griddata from scipy.spatial.qhull import QhullError class Interp2P(object): """ Reconstruction of griddata interpolation for 2d applications. Built for use for extremely large point clouds. Interpolation is partitioned into automatic control parameters px, py, pe, blockpts. The scipy implementation of interpolation functions has memory problems for large point clouds. This class divides the problem into several smaller partitions. arguments: ------- points : array shape (a, 3) table of point coordinates describing z = f(x,y) where - column 0 = x - column 1 = y - column 2 = z kind : str Interpolation method. Can be - 'nearest' - 'linear' - 'cubic' px : int or None Number of partitions in x-direction. If None, a default is calculated according to the number of blockpts py : int or None Number of partitions in y-direction. If None, a default is calculated according to the number of blockpts. pe : scalar Proportion of block length to overlap on other blocks. For example, if pe=0.25, the block will be extended 25% on both the left and right sides of px to overlap on successive blocks. blockpts : int Approximate number of interpolation points within each partition block. Defaults to 100*100. blockpts is used to automatically size either px or py if these are set to None. """ def __init__(self, points, kind='linear', px = None, py = None, pe = 0.25, blockpts = 100*100): points = np.array(points) self.x = points[:, 0] self.y = points[:, 1] self.z = points[:, 2] self.kind = kind self.px = px self.py = py self.pe = pe self.blockpts = blockpts self._set_partitions() return def _set_partitions(self): """ Calculate the number of partitions to use in data set""" ptnum = len(self.x) blockpts = self.blockpts blocknum = ptnum / blockpts + 1 if self.px is None: if self.py is None: self.px = int(np.sqrt(blocknum)) self.py = int(blocknum / self.px) else: self.px = int(blocknum / self.py) if self.py is None: self.py = int(blocknum / self.px) self.px = max(self.px, 1) self.py = max(self.py, 1) self.xmax = np.max(self.x) self.xmin = np.min(self.x) self.xlen = self.xmax - self.xmin self.xp = self.xlen / self.px self.xe = self.xp * self.pe self.ymax = np.max(self.y) self.ymin = np.min(self.y) self.ylen = self.ymax - self.ymin self.yp = self.ylen / self.py self.ye = self.yp * self.pe return def __call__(self, xi, yi): """Interpolate points xi and yi. arguments: ------- xi : array of shape (N,) x-coordinates of points to interpolate yi : array of shape (N,) y-coordinates of points to interpolate returns: ------ zi : array of shape (N, ...) output coordinates as zi = f(xi, yi) """ return self._interp(xi, yi) def _interp(self, xi, yi, xe = None, ye = None): if xe is None: xe = self.xe if ye is None: ye = self.ye xfudge = (self.xmax - self.xmin) / 1000. yfudge = (self.ymax - self.ymin) / 1000. xl = np.min(xi) xu = np.max(xi) + xfudge + self.xp yl = np.min(yi) yu = np.max(yi) + yfudge + self.yp xblocks = np.arange(xl, xu, self.xp) yblocks = np.arange(yl, yu, self.yp) xb1 = xblocks[0 : -1] xb2 = xblocks[1 :] yb1 = yblocks[0 : -1] yb2 = yblocks[1 :] z = np.zeros(len(yi)) for x1, x2 in zip(xb1, xb2): x1e = x1 - xe x2e = x2 + xe for y1, y2 in zip(yb1, yb2): y1e = y1 - ye y2e = y2 + ye #Set original data partition ix0 = np.logical_and(x1e <= self.x, self.x < x2e) iy0 = np.logical_and(y1e <= self.y, self.y < y2e) index1 = np.logical_and(ix0, iy0) x0 = self.x[index1] y0 = self.y[index1] z0 = self.z[index1] #Set new data partition ix = np.logical_and(x1 <= xi, xi <= x2) iy = np.logical_and(y1 <= yi, yi <= y2) index2 = np.logical_and(ix, iy) x = xi[index2] y = yi[index2] points = np.column_stack((x0, y0)) pointsnew = np.column_stack((x, y)) if len(pointsnew) > 0: try: zi = griddata(points, z0, pointsnew, method = self.kind) except (QhullError, ValueError): #Sometimes the parittion won't have enough points to #do a proper interpolation. Try to expand interpolation #Region in this case. zi = self._interp(x, y, xe=3*xe, ye=3*ye) #f = LinearNDInterpolator(points, z0, ) #zi = f(pointsnew) z[index2] = zi return z

### How to draw a curve in Cartesian coordinates in a semicircle tube in polar coordinates?

import numpy as np import matplotlib.pylab as plt def tube(): theta = np.linspace(0, np.pi/2, 30) x = np.cos(theta) y = np.sin(theta) z = x*0.8 w = y*0.8 plt.plot(z,w) plt.plot(x,y) plt.axis("equal") plt.show() print plt.figure(1);tube() def euler(): A, B, a = 40, 10, 2 t = 10 # time dt = 1e-3 # interval nbpt = int(t/dt) n = 1 s = 1. # sign of the derivative, initially chosen y = [0]*nbpt # result while n < nbpt: yp2 = B - A*y[n-1]**a if yp2 < 0: s = -s n -= 1 # recalculating the previous value else: y[n] = y[n-1] + dt*s*np.sqrt(yp2) n += 1 plt.plot(np.linspace(0,t,nbpt),y) plt.show() print plt.figure(2);euler() I want to draw the curve made with euler() in the tube made with tube(). I guess I must go from cartesian coordinates to polar coordinates but is there anyway to make the process easier with Python ?

There are many ways to do it since the question does not fully pin down what transformation you are looking for. However, assuming any transformation will do so long as the resultant curve oscillates between the boundary lines of the tube, you could use: def polarmap(x, y): # normalize x and y from 0 to 1 x = (x-x.min())/(x.max()-x.min()) y = (y-y.min())/(y.max()-y.min()) # make theta go from 0 to pi/2 theta = np.pi*x/2 # make r go from 0.8 to 1.0 (the min and max tube radius) r = 0.2*y + 0.8 # convert polar to cartesian x = r*np.cos(theta) y = r*np.sin(theta) plt.plot(x, y) For example, import numpy as np import matplotlib.pylab as plt def tube(): theta = np.linspace(0, np.pi/2, 30) x = np.cos(theta) y = np.sin(theta) z = x*0.8 w = y*0.8 plt.plot(z,w) plt.plot(x,y) def euler(): A, B, a = 40, 10, 2 t = 10 # time dt = 1e-3 # interval nbpt = int(t/dt) n = 1 s = 1. # sign of the derivative, initially chosen y = [0]*nbpt # result while n < nbpt: yp2 = B - A*y[n-1]**a if yp2 < 0: s = -s n -= 1 # recalculating the previous value else: y[n] = y[n-1] + dt*s*np.sqrt(yp2) n += 1 x = np.linspace(0,t,nbpt) y = np.array(y) return x, y def polarmap(x, y): # normalize x and y from 0 to 1 x = (x-x.min())/(x.max()-x.min()) y = (y-y.min())/(y.max()-y.min()) # make theta go from 0 to pi/2 theta = np.pi*x/2 # make r go from 0.8 to 1.0 (the min and max tube radius) r = 0.2*y + 0.8 # convert polar to cartesian x = r*np.cos(theta) y = r*np.sin(theta) plt.plot(x, y) fig, ax = plt.subplots() tube() x, y = euler() polarmap(x, y) plt.axis("equal") plt.show() which yields Notice that in polarmap the first step was to normalize both x and y so that they both ranged from 0 to 1. You can think of them as parameters on equal footing. If you swap the two parameters before passing them to polarmap, e.g.: x, y = euler() x, y = y, x # swap x and y polarmap(x, y) then you get

### Speed up Newtons Method using numpy arrays

I am using Newton's method to generate fractals that visualise the roots and the number of iterations taken to find the roots. I am not happy with the speed taken to complete the function. Is there are a way to speed up my code? def f(z): return z**4-1 def f_prime(z): '''Analytic derivative''' return 4*z**3 def newton_raphson(x, y, max_iter=20, eps = 1.0e-20): z = x + y * 1j iter = np.zeros((len(z), len(z))) for i in range(max_iter): z_old = z z = z-(f(z)/f_prime(z)) for k in range(len(z[:,0])): #this bit of the code is slow. Can I do this WITHOUT for loops? for j in range(len(z[:,1])): if iter[k,j] != 0: continue if z[k,j] == z_old[k,j]: iter[k,j] = i return np.angle(z), iter #return argument of root and iterations taken n_points = 1000; xmin = -1.5; xmax = 1.5 xs = np.linspace(xmin, xmax, n_points) X,Y = np.meshgrid(xs, xs) dat = newton_raphson(X, Y)

Possible optimisations in addition to vectorising the loops Save the result of z[mask] to avoid repeatedly using fancy indexing (~80% faster) Combine z - f(z) / f_prime(z) into one function (~25% in this case) Use lower precision (dtype=np.complex64) if the larger error is acceptable (~90%) Use broadcasting instead of meshgrid to create the z plane (negligible effect, but generally a good idea). def iterstep(z): return 0.75*z + 0.25 / z**3 def newton_raphson(Re, Im, max_iter): z = Re + 1j * Im[:, np.newaxis] itercount = np.zeros_like(z, dtype=np.int32) mask = np.ones_like(z, dtype=np.bool) for i in range(max_iter): z_old = z.copy() z[mask] = iterstep(z[mask]) mask = (z != z_old) itercount[mask] = i return np.angle(z), itercount axis = np.linspace(-1.5, 1.5, num=1000, dtype=np.complex64) angle, itercount = newton_raphson(Re=axis, Im=axis, max_iter=20) What it looks like

You can simply vectorize the loops for fairly large speed gains: def newton_raphson(x, y, max_iter=20, eps = 1.0e-20): z = x + y * 1j nz = len(z) iters = np.zeros((nz, nz)) for i in range(max_iter): z_old = z z = z-(f(z)/f_prime(z)) mask = (iters == 0) & (z == z_old) iters[mask] = i return np.angle(z), items Your presented equations are fairly simple; however, I would assume that your f and f_prime functions are significantly more complex. Further speedups can likely be found in those equations rather than in the presented problem. I would also avoid the use of iter as a variable name as it is a built in python function.

This should be what you are looking for and be working for all shapes of numpy arrays. def newton_numpy(x, y, max_iter=1000, eps = 1e-5): z = x + y * 1j # deltas is to store the differences between to iterations deltas = np.ones_like(z) # this is to store how many iterations were used for each element in z needed_iterations = np.zeros_like(z, dtype=int) it = 0 # we loop until max_iter or every element converged while it < max_iter and np.any(deltas > eps): z_old = z.copy() # recalculate only for values that have not yet converged mask = deltas > eps z[mask] = z[mask] - (f(z[mask]) / f_prime(z[mask])) needed_iterations[mask] += 1 deltas[mask] = np.abs(z_old[mask] - z[mask]) it += 1 return np.angle(z), needed_iterations With this code: n_points = 25 xmin = -1.5 xmax = 1.5 xs = np.linspace(xmin, xmax, n_points) X, Y = np.meshgrid(xs, xs) ang, iters = newton_numpy(X, Y, eps=1e-5, max_iters=1000) It takes 0.09 seconds and a mean of 85 iterations per element.

### Code for line of best fit of a scatter plot in python

Below is my code for scatter plotting the data in my text file. The file I am opening contains two columns. The left column is x coordinates and the right column is y coordinates. the code creates a scatter plot of x vs. y. I need a code to overplot a line of best fit to the data in the scatter plot, and none of the built in pylab function have worked for me. from matplotlib import * from pylab import * with open('file.txt') as f: data = [line.split() for line in f.readlines()] out = [(float(x), float(y)) for x, y in data] for i in out: scatter(i[0],i[1]) xlabel('X') ylabel('Y') title('My Title') show()

A one-line version of this excellent answer to plot the line of best fit is: plt.plot(np.unique(x), np.poly1d(np.polyfit(x, y, 1))(np.unique(x))) Using np.unique(x) instead of x handles the case where x isn't sorted or has duplicate values.

You can use numpy's polyfit. I use the following (you can safely remove the bit about coefficient of determination and error bounds, I just think it looks nice): #!/usr/bin/python3 import numpy as np import matplotlib.pyplot as plt import csv with open("example.csv", "r") as f: data = [row for row in csv.reader(f)] xd = [float(row[0]) for row in data] yd = [float(row[1]) for row in data] # sort the data reorder = sorted(range(len(xd)), key = lambda ii: xd[ii]) xd = [xd[ii] for ii in reorder] yd = [yd[ii] for ii in reorder] # make the scatter plot plt.scatter(xd, yd, s=30, alpha=0.15, marker='o') # determine best fit line par = np.polyfit(xd, yd, 1, full=True) slope=par[0][0] intercept=par[0][1] xl = [min(xd), max(xd)] yl = [slope*xx + intercept for xx in xl] # coefficient of determination, plot text variance = np.var(yd) residuals = np.var([(slope*xx + intercept - yy) for xx,yy in zip(xd,yd)]) Rsqr = np.round(1-residuals/variance, decimals=2) plt.text(.9*max(xd)+.1*min(xd),.9*max(yd)+.1*min(yd),'$R^2 = %0.2f$'% Rsqr, fontsize=30) plt.xlabel("X Description") plt.ylabel("Y Description") # error bounds yerr = [abs(slope*xx + intercept - yy) for xx,yy in zip(xd,yd)] par = np.polyfit(xd, yerr, 2, full=True) yerrUpper = [(xx*slope+intercept)+(par[0][0]*xx**2 + par[0][1]*xx + par[0][2]) for xx,yy in zip(xd,yd)] yerrLower = [(xx*slope+intercept)-(par[0][0]*xx**2 + par[0][1]*xx + par[0][2]) for xx,yy in zip(xd,yd)] plt.plot(xl, yl, '-r') plt.plot(xd, yerrLower, '--r') plt.plot(xd, yerrUpper, '--r') plt.show()

Assuming line of best fit for a set of points is: y = a + b * x where: b = ( sum(xi * yi) - n * xbar * ybar ) / sum((xi - xbar)^2) a = ybar - b * xbar Code and plot # sample points X = [0, 5, 10, 15, 20] Y = [0, 7, 10, 13, 20] # solve for a and b def best_fit(X, Y): xbar = sum(X)/len(X) ybar = sum(Y)/len(Y) n = len(X) # or len(Y) numer = sum([xi*yi for xi,yi in zip(X, Y)]) - n * xbar * ybar denum = sum([xi**2 for xi in X]) - n * xbar**2 b = numer / denum a = ybar - b * xbar print('best fit line:\ny = {:.2f} + {:.2f}x'.format(a, b)) return a, b # solution a, b = best_fit(X, Y) #best fit line: #y = 0.80 + 0.92x # plot points and fit line import matplotlib.pyplot as plt plt.scatter(X, Y) yfit = [a + b * xi for xi in X] plt.plot(X, yfit) UPDATE: notebook version

Have implemented #Micah 's solution to generate a trendline with a few changes and thought I'd share: Coded as a function Option for a polynomial trendline (input order=2) Function can also just return the coefficient of determination (R^2, input Rval=True) More Numpy array optimisations Code: def trendline(xd, yd, order=1, c='r', alpha=1, Rval=False): """Make a line of best fit""" #Calculate trendline coeffs = np.polyfit(xd, yd, order) intercept = coeffs[-1] slope = coeffs[-2] power = coeffs[0] if order == 2 else 0 minxd = np.min(xd) maxxd = np.max(xd) xl = np.array([minxd, maxxd]) yl = power * xl ** 2 + slope * xl + intercept #Plot trendline plt.plot(xl, yl, c, alpha=alpha) #Calculate R Squared p = np.poly1d(coeffs) ybar = np.sum(yd) / len(yd) ssreg = np.sum((p(xd) - ybar) ** 2) sstot = np.sum((yd - ybar) ** 2) Rsqr = ssreg / sstot if not Rval: #Plot R^2 value plt.text(0.8 * maxxd + 0.2 * minxd, 0.8 * np.max(yd) + 0.2 * np.min(yd), '$R^2 = %0.2f$' % Rsqr) else: #Return the R^2 value: return Rsqr