Friday, 25 October 2013

Ipython Notebook Playing with Probability

In [2]:
%pylab inline
from matplotlib import mpl,pyplot,colors
import numpy as np
import itertools
Populating the interactive namespace from numpy and matplotlib
Automatic calling is: Smart

The next cell imports the or and and operators. We then give these more convenient names for when we call them in the future.
In [3]:
import operator

OR = operator.__or__
AND =operator.__and__
In [4]:
# list
class Prob(object):
    def __init__(self, vals):
        self.vals = vals
    def values(self):
        return self.vals
    def size(self):
        return len(self.vals)
    def function(string):
        def f(x):
            return eval (string)
        return f

def combs(a,b):
    """returns all combinations of two probability objects"""
    return list(itertools.product(*[a.values(),b.values()]))   
class Dice(Prob):
    def __init__(self, sides = 6):
        Prob.__init__(self, arange(1,sides+1))
    # check if a value is odd
    def odd():
        def isOdd(n):
            return n % 2 == 1
        return isOdd
    # check if a value is even
    def even():
        def isEven(n):
            return n % 2 == 0
        return isEven
class Coin(Prob):
    vals = ['Heads', 'Tails']
    def __init__(self):
    # check if a coin came up 'Heads'
    def heads():
        def isHead(side):
            return side == 'Heads'
        return isHead
    # check if a coin came up 'Tails'
    def tails():
        def isTail(side):
            return side == 'Tails'
        return isTail

def evaluate(t1,t2, op):
    # get all combinations of the probability objects
    each = combs(t1[0], t2[0])
    # turns true or false into +/-5 for Colour Map
    def g(x):
        if x:
            return 5 
            return -5
    # run the rules on the elements of each and use op to combine them
    bools = [op(t1[1](element[0]) , t2[1](element[1])) for element in each]
    # use g to convert to number
    # convert list to numpy array so it can be reshaped
    zvals = np.array([g(boolean) for boolean in bools])
    return zvals.reshape(t1[0].size(), t2[0].size())
In [5]:
def show(a,b,op):
    zvals = evaluate(a,b,op)
    plot(a[0],b[0], zvals)
def compare(d1, d2, rule):
        c = combs(d1, d2) 
        print [(x,y, eval(rule)) for x,y in c]
        def g(x):
            if x:
                return 5 
                return -5
        zvals = np.array([g(eval(rule)) for x,y in c]).reshape(d1.size(), d2.size())
        plot(d1,d2, zvals)

def plot(p2,p1,zvals):
    fig, ax = pyplot.subplots()
    # make a color map of fixed colors
    cmap = mpl.colors.ListedColormap(['blue','red'])
    norm = mpl.colors.BoundaryNorm(bounds, cmap.N)

    # tell imshow about color map so that only set colors are used
    img = pyplot.pcolormesh(zvals, cmap = cmap,norm=norm)

    #pyplot.grid(b=True, which='major', color='black', linestyle='-')
    m = max(p1.size(), p2.size())

    px = p1.size()
    xlocations = np.array(range(px))
    xminorlocations = np.array(range(px))+0.5
    py = p2.size()
    ylocations = np.array(range(py))
    yminorlocations = np.array(range(py))+0.5
    pyplot.xticks(xminorlocations, p1.values(), rotation=0, size=15)
    ax.set_xticks(xlocations, minor=True)
    pyplot.yticks(yminorlocations, p2.values(), rotation=0, size=15)
    ax.set_yticks(ylocations, minor=True)
    grid(True, which='minor', linestyle='-')


Creating Our Probability Objects

A coin or dice can be created by calling Coin() or Dice() respectively. A different-sided dice can be created by pass ing a number to Dice(). Here we will create a 12-sided dice.
In [6]:
d = Dice()
d12 = Dice(12)
c = Coin()

Creating (Probability Object, Rule) 2-Tuples

IPython Notebook Playing With Functions

In [149]:
%pylab inline
from matplotlib import mpl,pyplot,colors
import numpy as np
from sympy import *
from sympy.interactive import printing
import functools
Populating the interactive namespace from numpy and matplotlib

WARNING: pylab import has clobbered these variables: ['prod', 'plotting', 'cosh', 'Circle', 'power', 'diag', 'sinh', 'trunc', 'binomial', 'plot', 'eye', 'det', 'tan', 'product', 'gamma', 'roots', 'vectorize', 'zeros', 'interactive', 'conjugate', 'take', 'solve', 'trace', 'beta', 'colors', 'ones', 'multinomial', 'transpose', 'cos', 'diff', 'invert', 'pi', 'tanh', 'Polygon', 'reshape', 'sqrt', 'source', 'add', 'test', 'poly', 'mod', 'sign', 'log', 'var', 'seterr', 'flatten', 'floor', 'nan', 'exp', 'sin']
`%pylab --no-import-all` prevents importing * from pylab and numpy

In [150]:
x = Symbol('x')

class Function(object):
    def __init__(self, coeff, start=-5, end = 5, points = 20):
        # more points = more accurate but take more time
        self.points =(end-start)* points
        # tuples in the form (coefficient, power of x)
        l = len(coeff)
        self.coeff = [(value, l - 1 - index) for index, value in enumerate(coeff)]
        self.X = np.linspace(start, end, self.points)
        # coefficients for the first and second derivatives
        self.D1 = [(a*b, b-1) for a,b in self.coeff[:-1]]
        self.D2 = [(a*b, b-1) for a,b in self.D1[:-1]]
        # set the Y values
        self.Y = np.zeros(self.points)
        for a,b in self.coeff:            
            self.Y = self.Y +a*(self.X**b) 
    def y(self,coeff, change = None):
        y = np.zeros(self.points)
        for a,b in coeff:
            if b == change:
                print "changing",change
                y = y -a*(self.X**b)
                y = y +a*(self.X**b)
        return y
    def values(self):
    def d1(self):
        return self.D1
    def d2(self):
        return self.D2
    # plot the function and its first derivative
    def plotD1(self):
    # plot the function and its first AND second derivatives
    def plotD1D2(self):
    def plot(self, coeff = None):
    def change(self,co):
        pyplot.plot(self.X,self.y(self.coeff, co))
    def changeWithDerivative(self,co):
        x1 = self.X
        #plt.subplot(2, 1, 1)
        plt.plot(x1, self.Y, 'b-')
        plt.plot(x1, self.y(self.D1), 'b-')
        plt.grid(which='major', color='k', linestyle='-')

        #plt.subplot(2, 1, 2)
        plt.plot(self.X,self.y(self.coeff, co), 'r-')
        plt.plot(self.X, self.y(self.D1,co-1), 'r-')
        plt.grid(which='major', color='k', linestyle='-')


def functionFromRoots(listOfRoots, start=-5, end = 5, points = 10):
    factors = [(x-a) for a in listOfRoots]
    print "Here are the factors:", factors
    def ffr(a,b):
        return expand(a*b)
    poly = functools.reduce(lambda a,b: ffr(a,b), factors)
    print "Here is the polynomial", latex(poly)
    poly = Poly(poly)
    return Function(poly.coeffs(), start, end, points)
roots = [1,-1,4]
ffr = functionFromRoots(roots)
l = [1,-1,-14,0]
a = Function(l)

Here are the factors: [x - 1, x + 1, x - 4]
Here is the polynomial x^{3} - 4 x^{2} - x + 4

In [151]:
ff = functionFromRoots([1,2,3])

z = Poly(expand((x+1)*(x+2)*(x+3)))

print z.coeffs()
Here are the factors: [x - 1, x - 2, x - 3]
Here is the polynomial x^{3} - 6 x^{2} + 11 x - 6
[1, 6, 11, 6]

Drawing a Polynomial From Roots

When working with larger degree polynomials we could just enter more numbers into the Function constructor. For example:
f = Function[1,5,-7,12])
The higher the degree the harder it is to intuitively get real roots in a narrow range. For this we can use functionFromRoots(listOfRoots) which takes a list of roots as its argument.
In [152]:
roots = [1,-1,4]
ffr = functionFromRoots(roots)
Here are the factors: [x - 1, x + 1, x - 4]
Here is the polynomial x^{3} - 4 x^{2} - x + 4

Using [1,-1,4] makes it hard to look at the key parts so I will also use 'start = -2' to bring the picture into focus
In [153]:
ffr = functionFromRoots(roots, start=-2)
Here are the factors: [x - 1, x + 1, x - 4]
Here is the polynomial x^{3} - 4 x^{2} - x + 4

Change The Sign Of a Coefficient

2010 Project Maths with Ipython Notebook (and NBConvert)

In [10]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
%load_ext autoreload
%autoreload 1
A car rental company has been using Evertread tyres on their fleet of economy cars. All cars in this fleet are identical. The company manages the tyres on each car in such a way that the four tyres all wear out at the same time. The company keeps a record of the lifespan of each set of tyres. The records show that the lifespan of these sets of tyres is normally distributed with mean 45 000 km and standard deviation 8000 km.
The company is considering switching brands from Evertread tyres to SafeRun tyres, because they are cheaper. The distributors of SafeRun tyres claim that these tyres have the same mean lifespan as Evertread tyres. The car rental company wants to check this claim before they switch brands. They have enough data on Evertread tyres to regard these as a known population. They want to test a sample of SafeRun tyres against it.
The company selects 25 economy cars at random from the fleet and fits them with the new tyres. For these cars, it is found that the mean life span of the tyres is 43 850 km.
Test, at the 5% level of significance, the hypothesis that the mean lifespan of SafeRun tyres is the same as the known mean of Evertread tyres. State clearly what the company can conclude about the tyres.

State the Null and Alternative Hypothesese

\[H_0: \mu = 45000\]
\[H_1: \mu \neq 45000\]
Every out come in life is a mixture of true talent plus luck. If a footballer scores fifteen in goals there is a small chance that he is a true-fifteen-goal player who had even luck. There is a greater chance that he was a player who should have scored goals who was unlucky or a player who should have scored less goals but was unlucky. We can never know for sure but if the standard deviations for goals is 2 we but be much happier rejecting the null hypothesis that a player is a fifteen-goal player if he went out and scored 25 goals (5 standard deviations away) than if he scored 17 goals (1 standard deviation away)
One thing to note with standard deviations is how quickly things escalate:
In [11]:
numbers = [0.5 ,1.0 , 1.5 ,2.0 , 2.5, 3.0,4.0]

for x in numbers:
    # get the p-value
    print x, "standard deviation would be due to chance one out of", int(1.0/(norm.sf(x)*2)), "times"
0.5 standard deviation would be due to chance one out of 1 times
1.0 standard deviation would be due to chance one out of 3 times
1.5 standard deviation would be due to chance one out of 7 times
2.0 standard deviation would be due to chance one out of 21 times
2.5 standard deviation would be due to chance one out of 80 times
3.0 standard deviation would be due to chance one out of 370 times
4.0 standard deviation would be due to chance one out of 15787 times

Probability Distribution Function

We can see why this is by drawing a probability distribution and notice how numbers that are close to the mean are higher than those that are far away.
In [12]:
# Question Variables
mean = 45000
hypothetical_population_mean = mean
sd = 8000
sample_size = 25
confidence_interval = 1*sd  # The value from the log tables

curve_width = 4*sd # the number of standard deviations to show

# Get the range of numbers
# Use numpy's arrange(start,finish,increment)
range = np.arange(mean - curve_width, mean + curve_width , curve_width * 0.002)

# pdf is probability distribution function
y = norm.pdf(range,mean,sd)

# Show the graph
plt.plot(range, y, color="black")
[<matplotlib.lines.Line2D at 0x52a5fb0>]

How far is far enough away

We are interested in numbers in the red regions as those are far enough away that we can reject the idea that that the difference was due to chance.
In [13]:
plt.plot(range,y,color="black", ls='-')

left = mean - confidence_interval
right = mean + confidence_interval

# x < - 2
subx1 = [n for n in range if n < left] 

suby1 = y[:len(subx1)]

plt.fill_betweenx(suby1,subx1, left, color = "red")

# x < - 2
subx2 = [n for n in range if n > right] 

suby2 = y[-len(subx2):]

plt.fill_betweenx(suby2,subx2, right, color="red")
<matplotlib.collections.PolyCollection at 0x5ca5f10>

Transformation and Normalisation

Arrow Key Nav