Wilbert's website at SocSci

> Software> Python> Modules> correlation

python/correlation.php 2015-10-05


Everyone seems to have a different opinion on how cross correlation is to be normalized. This is mine:

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy as np
def correlate(a, v, mode='valid', ddof=0, norm=True):
	""" (normalized) cross correlation """
	a = np.array(a)
	v = np.array(v)
	if len(a)<len(v):
		a,v = v,a

	if norm:
		a = a - a.mean() 
		v = v - v.mean()

	# zero padding of a, c is return vector
	if mode=='full': # return len(a) + len(v) -1 values
		c = np.empty(len(a)+len(v)-1)
		a = np.concatenate((np.zeros(len(v)-1), a, np.zeros(len(v)-1)))
	elif mode=='same': # return len(a) values
		c = np.empty(len(a))
		a = np.concatenate((np.zeros((len(v)-1)//2), a, np.zeros(len(v)-(len(v)-1)//2)))
	elif mode=='valid': # return len(a)-len(v)+1 values
		c = np.empty(len(a)-len(v)+1)
		print("error, do not know mode: {}".format(mode))
	# correlate
	for i in range(len(c)):
		print (a.shape, v.shape, a[i:i+len(v)].shape, c.shape)
		c[i] = np.dot(a[i:i+len(v)], v)
		if norm:
			c[i] /= np.std(a[i:i+len(v)])*np.std(v)*(len(v)-ddof)
	return c