How to plot less than 100 points for bivariate spline in scipy.interpolate.spline - python - python

def smooth(points):
points = np.array(points)
# get x and y vectors
x = points[:,0]
y = points[:,1]
t = np.arange(x.shape[0], dtype=float)
t /= t[-1]
nt = np.linspace(0, 1, 100)
t = np.zeros(x.shape)
t[1:] = np.sqrt((x[1:] - x[:-1])**2 + (y[1:] - y[:-1])**2)
t = np.cumsum(t)
t /= t[-1]
x2 = scipy.interpolate.spline(t, x, nt)
y2 = scipy.interpolate.spline(t, y, nt)
return np.array(x2),np.array(y2)
The above function returns 100 points forming a spline but i require less than 100 points giving me the same spline.
Thanks in advance:)

Related

Python: heat density plot in a disk

My goal is to make a density heat map plot of sphere in 2D. The plotting code below the line works when I use rectangular domains. However, I am trying to use the code for a circular domain. The radius of sphere is 1. The code I have so far is:
from pylab import *
import numpy as np
from matplotlib.colors import LightSource
from numpy.polynomial.legendre import leggauss, legval
xi = 0.0
xf = 1.0
numx = 500
yi = 0.0
yf = 1.0
numy = 500
def f(x):
if 0 <= x <= 1:
return 100
if -1 <= x <= 0:
return 0
deg = 1000
xx, w = leggauss(deg)
L = np.polynomial.legendre.legval(xx, np.identity(deg))
integral = (L * (f(x) * w)[None,:]).sum(axis = 1)
c = (np.arange(1, 500) + 0.5) * integral[1:500]
def r(x, y):
return np.sqrt(x ** 2 + y ** 2)
theta = np.arctan2(y, x)
x, y = np.linspace(0, 1, 500000)
def T(x, y):
return (sum(r(x, y) ** l * c[:,None] *
np.polynomial.legendre.legval(xx, identity(deg)) for l in range(1, 500)))
T(x, y) should equal the sum of c the coefficients times the radius as a function of x and y to the l power times the legendre polynomial where the argument is of the legendre polynomial is cos(theta).
In python: integrating a piecewise function, I learned how to use the Legendre polynomials in a summation but that method is slightly different, and for the plotting, I need a function T(x, y).
This is the plotting code.
densityinterpolation = 'bilinear'
densitycolormap = cm.jet
densityshadedflag = False
densitybarflag = True
gridflag = True
plotfilename = 'laplacesphere.eps'
x = arange(xi, xf, (xf - xi) / (numx - 1))
y = arange(yi, yf, (yf - yi) / (numy - 1))
X, Y = meshgrid(x, y)
z = T(X, Y)
if densityshadedflag:
ls = LightSource(azdeg = 120, altdeg = 65)
rgb = ls.shade(z, densitycolormap)
im = imshow(rgb, extent = [xi, xf, yi, yf], cmap = densitycolormap)
else:
im = imshow(z, extent = [xi, xf, yi, yf], cmap = densitycolormap)
im.set_interpolation(densityinterpolation)
if densitybarflag:
colorbar(im)
grid(gridflag)
show()
I made the plot in Mathematica for reference of what my end goal is
If you set the values outside of the disk domain (or whichever domain you want) to float('nan'), those points will be ignored when plotting (leaving them in white color).

Python how to interpolate large point clouds of 2D data

So let's say I have a point cloud of data in the form of
Z = f(X, Y)
The problem is that I have millions of points, with data that is extremely fine in some (X,Y) regions and extremely sparse in other regions.
Ideally the interpolated solution needs to be continuous, and as smooth as possible. The application is for finite element analysis.
I've tried:
Instead of interpolating I use a KDTree to average nearest nodes. This works very well for points in the fine region but not so well at sparse regions, because discontinuities in the result may arise.
scipy.interpolate.XXX - 2d functions run into memory error. scipy libraries aren't capable of interpolating large numbers of points.
I'm thinking the best way is some sort of hacked up combination of KDTree average nearest nodes and then some sort of interpolation for far away points, but I'm thinking that interpolating millions of points ought to be solved problem...
Anybody have any good ideas on what to do?
So to interpolate arbitrarily large point clouds I wrote a piece of code to partition data into smaller chunks. It's not the best piece of code but will be available for those too lazy to write their own.
import scipy.interpolate
from scipy.interpolate import griddata
from scipy.spatial.qhull import QhullError
class Interp2P(object):
"""
Reconstruction of griddata interpolation for 2d applications.
Built for use for extremely large point clouds. Interpolation
is partitioned into automatic control parameters px, py, pe, blockpts.
The scipy implementation of interpolation functions has memory problems
for large point clouds. This class divides the problem into several
smaller partitions.
arguments:
-------
points : array shape (a, 3)
table of point coordinates describing z = f(x,y) where
- column 0 = x
- column 1 = y
- column 2 = z
kind : str
Interpolation method. Can be
- 'nearest'
- 'linear'
- 'cubic'
px : int or None
Number of partitions in x-direction. If None, a default is calculated
according to the number of blockpts
py : int or None
Number of partitions in y-direction. If None, a default is calculated
according to the number of blockpts.
pe : scalar
Proportion of block length to overlap on other blocks.
For example, if pe=0.25, the block will be extended 25% on both the
left and right sides of px to overlap on successive blocks.
blockpts : int
Approximate number of interpolation points within each partition block.
Defaults to 100*100. blockpts is used to automatically size either
px or py if these are set to None.
"""
def __init__(self, points, kind='linear',
px = None, py = None, pe = 0.25, blockpts = 100*100):
points = np.array(points)
self.x = points[:, 0]
self.y = points[:, 1]
self.z = points[:, 2]
self.kind = kind
self.px = px
self.py = py
self.pe = pe
self.blockpts = blockpts
self._set_partitions()
return
def _set_partitions(self):
""" Calculate the number of partitions to use in data set"""
ptnum = len(self.x)
blockpts = self.blockpts
blocknum = ptnum / blockpts + 1
if self.px is None:
if self.py is None:
self.px = int(np.sqrt(blocknum))
self.py = int(blocknum / self.px)
else:
self.px = int(blocknum / self.py)
if self.py is None:
self.py = int(blocknum / self.px)
self.px = max(self.px, 1)
self.py = max(self.py, 1)
self.xmax = np.max(self.x)
self.xmin = np.min(self.x)
self.xlen = self.xmax - self.xmin
self.xp = self.xlen / self.px
self.xe = self.xp * self.pe
self.ymax = np.max(self.y)
self.ymin = np.min(self.y)
self.ylen = self.ymax - self.ymin
self.yp = self.ylen / self.py
self.ye = self.yp * self.pe
return
def __call__(self, xi, yi):
"""Interpolate points xi and yi.
arguments:
-------
xi : array of shape (N,)
x-coordinates of points to interpolate
yi : array of shape (N,)
y-coordinates of points to interpolate
returns:
------
zi : array of shape (N, ...)
output coordinates as zi = f(xi, yi)
"""
return self._interp(xi, yi)
def _interp(self, xi, yi, xe = None, ye = None):
if xe is None:
xe = self.xe
if ye is None:
ye = self.ye
xfudge = (self.xmax - self.xmin) / 1000.
yfudge = (self.ymax - self.ymin) / 1000.
xl = np.min(xi)
xu = np.max(xi) + xfudge + self.xp
yl = np.min(yi)
yu = np.max(yi) + yfudge + self.yp
xblocks = np.arange(xl, xu, self.xp)
yblocks = np.arange(yl, yu, self.yp)
xb1 = xblocks[0 : -1]
xb2 = xblocks[1 :]
yb1 = yblocks[0 : -1]
yb2 = yblocks[1 :]
z = np.zeros(len(yi))
for x1, x2 in zip(xb1, xb2):
x1e = x1 - xe
x2e = x2 + xe
for y1, y2 in zip(yb1, yb2):
y1e = y1 - ye
y2e = y2 + ye
#Set original data partition
ix0 = np.logical_and(x1e <= self.x, self.x < x2e)
iy0 = np.logical_and(y1e <= self.y, self.y < y2e)
index1 = np.logical_and(ix0, iy0)
x0 = self.x[index1]
y0 = self.y[index1]
z0 = self.z[index1]
#Set new data partition
ix = np.logical_and(x1 <= xi, xi <= x2)
iy = np.logical_and(y1 <= yi, yi <= y2)
index2 = np.logical_and(ix, iy)
x = xi[index2]
y = yi[index2]
points = np.column_stack((x0, y0))
pointsnew = np.column_stack((x, y))
if len(pointsnew) > 0:
try:
zi = griddata(points, z0, pointsnew, method = self.kind)
except (QhullError, ValueError):
#Sometimes the parittion won't have enough points to
#do a proper interpolation. Try to expand interpolation
#Region in this case.
zi = self._interp(x, y, xe=3*xe, ye=3*ye)
#f = LinearNDInterpolator(points, z0, )
#zi = f(pointsnew)
z[index2] = zi
return z

How to draw a curve in Cartesian coordinates in a semicircle tube in polar coordinates?

import numpy as np
import matplotlib.pylab as plt
def tube():
theta = np.linspace(0, np.pi/2, 30)
x = np.cos(theta)
y = np.sin(theta)
z = x*0.8
w = y*0.8
plt.plot(z,w)
plt.plot(x,y)
plt.axis("equal")
plt.show()
print plt.figure(1);tube()
def euler():
A, B, a = 40, 10, 2
t = 10 # time
dt = 1e-3 # interval
nbpt = int(t/dt)
n = 1
s = 1. # sign of the derivative, initially chosen
y = [0]*nbpt # result
while n < nbpt:
yp2 = B - A*y[n-1]**a
if yp2 < 0:
s = -s
n -= 1 # recalculating the previous value
else:
y[n] = y[n-1] + dt*s*np.sqrt(yp2)
n += 1
plt.plot(np.linspace(0,t,nbpt),y)
plt.show()
print plt.figure(2);euler()
I want to draw the curve made with euler() in the tube made with tube(). I guess I must go from cartesian coordinates to polar coordinates but is there anyway to make the process easier with Python ?
There are many ways to do it since the question does not fully pin down what transformation you are looking for. However, assuming any transformation will do so long as the resultant curve oscillates between the boundary lines of the tube, you could use:
def polarmap(x, y):
# normalize x and y from 0 to 1
x = (x-x.min())/(x.max()-x.min())
y = (y-y.min())/(y.max()-y.min())
# make theta go from 0 to pi/2
theta = np.pi*x/2
# make r go from 0.8 to 1.0 (the min and max tube radius)
r = 0.2*y + 0.8
# convert polar to cartesian
x = r*np.cos(theta)
y = r*np.sin(theta)
plt.plot(x, y)
For example,
import numpy as np
import matplotlib.pylab as plt
def tube():
theta = np.linspace(0, np.pi/2, 30)
x = np.cos(theta)
y = np.sin(theta)
z = x*0.8
w = y*0.8
plt.plot(z,w)
plt.plot(x,y)
def euler():
A, B, a = 40, 10, 2
t = 10 # time
dt = 1e-3 # interval
nbpt = int(t/dt)
n = 1
s = 1. # sign of the derivative, initially chosen
y = [0]*nbpt # result
while n < nbpt:
yp2 = B - A*y[n-1]**a
if yp2 < 0:
s = -s
n -= 1 # recalculating the previous value
else:
y[n] = y[n-1] + dt*s*np.sqrt(yp2)
n += 1
x = np.linspace(0,t,nbpt)
y = np.array(y)
return x, y
def polarmap(x, y):
# normalize x and y from 0 to 1
x = (x-x.min())/(x.max()-x.min())
y = (y-y.min())/(y.max()-y.min())
# make theta go from 0 to pi/2
theta = np.pi*x/2
# make r go from 0.8 to 1.0 (the min and max tube radius)
r = 0.2*y + 0.8
# convert polar to cartesian
x = r*np.cos(theta)
y = r*np.sin(theta)
plt.plot(x, y)
fig, ax = plt.subplots()
tube()
x, y = euler()
polarmap(x, y)
plt.axis("equal")
plt.show()
which yields
Notice that in polarmap the first step was to normalize both x and y so
that they both ranged from 0 to 1. You can think of them as parameters on equal
footing. If you swap the two parameters before passing them to polarmap, e.g.:
x, y = euler()
x, y = y, x # swap x and y
polarmap(x, y)
then you get

Speed up Newtons Method using numpy arrays

I am using Newton's method to generate fractals that visualise the roots and the number of iterations taken to find the roots.
I am not happy with the speed taken to complete the function. Is there are a way to speed up my code?
def f(z):
return z**4-1
def f_prime(z):
'''Analytic derivative'''
return 4*z**3
def newton_raphson(x, y, max_iter=20, eps = 1.0e-20):
z = x + y * 1j
iter = np.zeros((len(z), len(z)))
for i in range(max_iter):
z_old = z
z = z-(f(z)/f_prime(z))
for k in range(len(z[:,0])): #this bit of the code is slow. Can I do this WITHOUT for loops?
for j in range(len(z[:,1])):
if iter[k,j] != 0:
continue
if z[k,j] == z_old[k,j]:
iter[k,j] = i
return np.angle(z), iter #return argument of root and iterations taken
n_points = 1000; xmin = -1.5; xmax = 1.5
xs = np.linspace(xmin, xmax, n_points)
X,Y = np.meshgrid(xs, xs)
dat = newton_raphson(X, Y)
Possible optimisations in addition to vectorising the loops
Save the result of z[mask] to avoid repeatedly using fancy indexing (~80% faster)
Combine z - f(z) / f_prime(z) into one function (~25% in this case)
Use lower precision (dtype=np.complex64) if the larger error is acceptable (~90%)
Use broadcasting instead of meshgrid to create the z plane (negligible effect, but generally a good idea).
def iterstep(z):
return 0.75*z + 0.25 / z**3
def newton_raphson(Re, Im, max_iter):
z = Re + 1j * Im[:, np.newaxis]
itercount = np.zeros_like(z, dtype=np.int32)
mask = np.ones_like(z, dtype=np.bool)
for i in range(max_iter):
z_old = z.copy()
z[mask] = iterstep(z[mask])
mask = (z != z_old)
itercount[mask] = i
return np.angle(z), itercount
axis = np.linspace(-1.5, 1.5, num=1000, dtype=np.complex64)
angle, itercount = newton_raphson(Re=axis, Im=axis, max_iter=20)
What it looks like
You can simply vectorize the loops for fairly large speed gains:
def newton_raphson(x, y, max_iter=20, eps = 1.0e-20):
z = x + y * 1j
nz = len(z)
iters = np.zeros((nz, nz))
for i in range(max_iter):
z_old = z
z = z-(f(z)/f_prime(z))
mask = (iters == 0) & (z == z_old)
iters[mask] = i
return np.angle(z), items
Your presented equations are fairly simple; however, I would assume that your f and f_prime functions are significantly more complex. Further speedups can likely be found in those equations rather than in the presented problem.
I would also avoid the use of iter as a variable name as it is a built in python function.
This should be what you are looking for and be working for all shapes of numpy arrays.
def newton_numpy(x, y, max_iter=1000, eps = 1e-5):
z = x + y * 1j
# deltas is to store the differences between to iterations
deltas = np.ones_like(z)
# this is to store how many iterations were used for each element in z
needed_iterations = np.zeros_like(z, dtype=int)
it = 0
# we loop until max_iter or every element converged
while it < max_iter and np.any(deltas > eps):
z_old = z.copy()
# recalculate only for values that have not yet converged
mask = deltas > eps
z[mask] = z[mask] - (f(z[mask]) / f_prime(z[mask]))
needed_iterations[mask] += 1
deltas[mask] = np.abs(z_old[mask] - z[mask])
it += 1
return np.angle(z), needed_iterations
With this code:
n_points = 25
xmin = -1.5
xmax = 1.5
xs = np.linspace(xmin, xmax, n_points)
X, Y = np.meshgrid(xs, xs)
ang, iters = newton_numpy(X, Y, eps=1e-5, max_iters=1000)
It takes 0.09 seconds and a mean of 85 iterations per element.

Code for line of best fit of a scatter plot in python

Below is my code for scatter plotting the data in my text file. The file I am opening contains two columns. The left column is x coordinates and the right column is y coordinates. the code creates a scatter plot of x vs. y. I need a code to overplot a line of best fit to the data in the scatter plot, and none of the built in pylab function have worked for me.
from matplotlib import *
from pylab import *
with open('file.txt') as f:
data = [line.split() for line in f.readlines()]
out = [(float(x), float(y)) for x, y in data]
for i in out:
scatter(i[0],i[1])
xlabel('X')
ylabel('Y')
title('My Title')
show()
A one-line version of this excellent answer to plot the line of best fit is:
plt.plot(np.unique(x), np.poly1d(np.polyfit(x, y, 1))(np.unique(x)))
Using np.unique(x) instead of x handles the case where x isn't sorted or has duplicate values.
You can use numpy's polyfit. I use the following (you can safely remove the bit about coefficient of determination and error bounds, I just think it looks nice):
#!/usr/bin/python3
import numpy as np
import matplotlib.pyplot as plt
import csv
with open("example.csv", "r") as f:
data = [row for row in csv.reader(f)]
xd = [float(row[0]) for row in data]
yd = [float(row[1]) for row in data]
# sort the data
reorder = sorted(range(len(xd)), key = lambda ii: xd[ii])
xd = [xd[ii] for ii in reorder]
yd = [yd[ii] for ii in reorder]
# make the scatter plot
plt.scatter(xd, yd, s=30, alpha=0.15, marker='o')
# determine best fit line
par = np.polyfit(xd, yd, 1, full=True)
slope=par[0][0]
intercept=par[0][1]
xl = [min(xd), max(xd)]
yl = [slope*xx + intercept for xx in xl]
# coefficient of determination, plot text
variance = np.var(yd)
residuals = np.var([(slope*xx + intercept - yy) for xx,yy in zip(xd,yd)])
Rsqr = np.round(1-residuals/variance, decimals=2)
plt.text(.9*max(xd)+.1*min(xd),.9*max(yd)+.1*min(yd),'$R^2 = %0.2f$'% Rsqr, fontsize=30)
plt.xlabel("X Description")
plt.ylabel("Y Description")
# error bounds
yerr = [abs(slope*xx + intercept - yy) for xx,yy in zip(xd,yd)]
par = np.polyfit(xd, yerr, 2, full=True)
yerrUpper = [(xx*slope+intercept)+(par[0][0]*xx**2 + par[0][1]*xx + par[0][2]) for xx,yy in zip(xd,yd)]
yerrLower = [(xx*slope+intercept)-(par[0][0]*xx**2 + par[0][1]*xx + par[0][2]) for xx,yy in zip(xd,yd)]
plt.plot(xl, yl, '-r')
plt.plot(xd, yerrLower, '--r')
plt.plot(xd, yerrUpper, '--r')
plt.show()
Assuming line of best fit for a set of points is:
y = a + b * x
where:
b = ( sum(xi * yi) - n * xbar * ybar ) / sum((xi - xbar)^2)
a = ybar - b * xbar
Code and plot
# sample points
X = [0, 5, 10, 15, 20]
Y = [0, 7, 10, 13, 20]
# solve for a and b
def best_fit(X, Y):
xbar = sum(X)/len(X)
ybar = sum(Y)/len(Y)
n = len(X) # or len(Y)
numer = sum([xi*yi for xi,yi in zip(X, Y)]) - n * xbar * ybar
denum = sum([xi**2 for xi in X]) - n * xbar**2
b = numer / denum
a = ybar - b * xbar
print('best fit line:\ny = {:.2f} + {:.2f}x'.format(a, b))
return a, b
# solution
a, b = best_fit(X, Y)
#best fit line:
#y = 0.80 + 0.92x
# plot points and fit line
import matplotlib.pyplot as plt
plt.scatter(X, Y)
yfit = [a + b * xi for xi in X]
plt.plot(X, yfit)
UPDATE:
notebook version
Have implemented #Micah 's solution to generate a trendline with a few changes and thought I'd share:
Coded as a function
Option for a polynomial trendline (input order=2)
Function can also just return the coefficient of determination (R^2, input Rval=True)
More Numpy array optimisations
Code:
def trendline(xd, yd, order=1, c='r', alpha=1, Rval=False):
"""Make a line of best fit"""
#Calculate trendline
coeffs = np.polyfit(xd, yd, order)
intercept = coeffs[-1]
slope = coeffs[-2]
power = coeffs[0] if order == 2 else 0
minxd = np.min(xd)
maxxd = np.max(xd)
xl = np.array([minxd, maxxd])
yl = power * xl ** 2 + slope * xl + intercept
#Plot trendline
plt.plot(xl, yl, c, alpha=alpha)
#Calculate R Squared
p = np.poly1d(coeffs)
ybar = np.sum(yd) / len(yd)
ssreg = np.sum((p(xd) - ybar) ** 2)
sstot = np.sum((yd - ybar) ** 2)
Rsqr = ssreg / sstot
if not Rval:
#Plot R^2 value
plt.text(0.8 * maxxd + 0.2 * minxd, 0.8 * np.max(yd) + 0.2 * np.min(yd),
'$R^2 = %0.2f$' % Rsqr)
else:
#Return the R^2 value:
return Rsqr

Resources