Ticket #3726: sage-3726-part6.patch

File sage-3726-part6.patch, 34.1 kB (added by was, 5 months ago)
  • a/sage/stats/hmm/all.py

    old new  
     1############################################################################# 
     2#       Copyright (C) 2008 William Stein <wstein@gmail.com> 
     3#  Distributed under the terms of the GNU General Public License (GPL), 
     4#  version 2 or any later version at your option. 
     5#  The full text of the GPL is available at: 
     6#                  http://www.gnu.org/licenses/ 
     7############################################################################# 
     8 
    19from hmm  import DiscreteHiddenMarkovModel 
    210from chmm import GaussianHiddenMarkovModel 
  • a/sage/stats/hmm/chmm.pxd

    old new  
     1############################################################################# 
     2#       Copyright (C) 2008 William Stein <wstein@gmail.com> 
     3#  Distributed under the terms of the GNU General Public License (GPL), 
     4#  version 2 or any later version at your option. 
     5#  The full text of the GPL is available at: 
     6#                  http://www.gnu.org/licenses/ 
     7############################################################################# 
    18 
    29from hmm cimport HiddenMarkovModel 
    310 
     
    193200 
    194201 
    195202    int ghmm_cmodel_free (ghmm_cmodel ** smo) 
    196      
     203 
     204    # Normalizes initial and transition probabilities and mixture weights. 
     205    # 0 on success / -1 on error 
     206    int ghmm_cmodel_normalize(ghmm_cmodel *smo)     
    197207         
    198208cdef class ContinuousHiddenMarkovModel(HiddenMarkovModel): 
    199209    cdef ghmm_cmodel* m 
  • a/sage/stats/hmm/chmm.pyx

    old new  
     1""" 
     2Continuous Hidden Markov Models 
    13 
     4AUTHORS: 
     5    -- William Stein (2008) 
     6    -- The authors of GHMM http://ghmm.sourceforge.net/ 
     7""" 
     8 
     9############################################################################# 
     10#       Copyright (C) 2008 William Stein <wstein@gmail.com> 
     11#  Distributed under the terms of the GNU General Public License (GPL), 
     12#  version 2 or any later version at your option. 
     13#  The full text of the GPL is available at: 
     14#                  http://www.gnu.org/licenses/ 
     15############################################################################# 
    216 
    317include "../../ext/stdsage.pxi" 
    418 
    519include "misc.pxi" 
    620 
    721cdef class ContinuousHiddenMarkovModel(HiddenMarkovModel): 
    8     def __init__(self, A, B, pi, name=None): 
     22    """ 
     23    Abstract base class for continuous hidden Markov models. 
     24 
     25         
     26    INPUT: 
     27        A -- square matrix (or list of lists) 
     28        B -- list or matrix with numbers of rows equal to number of rows of A; 
     29             each row defines the emissions 
     30        pi -- list 
     31        name -- (default: None); a string 
     32 
     33    EXAMPLES: 
     34        sage: sage.stats.hmm.chmm.ContinuousHiddenMarkovModel([[1.0]], [(-0.0,10.0)], [1], "model") 
     35        <sage.stats.hmm.chmm.ContinuousHiddenMarkovModel object at ...> 
     36    """ 
     37    def __init__(self, A, B, pi, name): 
     38        """ 
     39        Constructor for continuous Hidden markov model abstract base class. 
     40 
     41        EXAMPLES: 
     42        This class is an abstract base class, so shouldn't ever be constructed by users.  
     43            sage: sage.stats.hmm.chmm.ContinuousHiddenMarkovModel([[1.0]], [(0.0,1.0)], [1], None) 
     44            <sage.stats.hmm.chmm.ContinuousHiddenMarkovModel object at ...> 
     45        """ 
    946        self.initialized = False 
    1047        HiddenMarkovModel.__init__(self, A, B, pi) 
    1148        self.m = <ghmm_cmodel*> safe_malloc(sizeof(ghmm_cmodel)) 
    1249        # Set number of states 
    1350        self.m.N = self.A.nrows() 
     51        # Assign model identifier (the name) if specified 
     52        if name is not None:  
     53            name = str(name) 
     54            self.m.name = <char*> safe_malloc(len(name)) 
     55            strcpy(self.m.name, name) 
     56        else: 
     57            self.m.name = NULL 
     58             
     59 
     60    def name(self): 
     61        """ 
     62        Return the name of this model. 
     63 
     64        OUTPUT: 
     65            string or None 
     66 
     67        EXAMPLES: 
     68            sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,0], 'Test Model') 
     69            sage: m.name() 
     70            'Test Model' 
     71 
     72        If the model is not explicitly named then this function returns None: 
     73            sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,0]) 
     74            sage: m.name() 
     75            sage: m.name() is None 
     76            True 
     77        """ 
     78        if self.m.name: 
     79            s = str(self.m.name) 
     80            return s 
     81        else: 
     82            return None 
     83             
    1484 
    1585cdef class GaussianHiddenMarkovModel(ContinuousHiddenMarkovModel): 
    16     """ 
     86    r""" 
    1787    Create a Gaussian Hidden Markov Model.  The probability 
    1888    distribution associated with each state is a Gaussian 
    1989    distribution. 
     
    2696              Gaussian distributions associated to each state 
    2797        pi -- list of floats that sums to 1.0; these are 
    2898              the initial probabilities of each hidden state 
    29         name -- (default: None);  
     99        name -- (default: None); a string 
    30100 
    31101    EXAMPLES: 
    32102    Define the transition matrix: 
     
    38108    The initial probabilities per state: 
    39109        sage: pi = [1,0,0] 
    40110 
    41     Create the continuous Gaussian hidden Markov model: 
    42         sage: m = hmm.GaussianHiddenMarkovModel(A, B, pi); m 
     111    We create the continuous Gaussian hidden Markov model defined by $A,B,\pi$: 
     112        sage: hmm.GaussianHiddenMarkovModel(A, B, pi) 
     113        Gaussian Hidden Markov Model with 3 States 
     114        Transition matrix: 
     115        [0.0 1.0 0.0] 
     116        [0.5 0.0 0.5] 
     117        [0.3 0.3 0.4] 
     118        Emission parameters: 
     119        [(0.0, 1.0), (-1.0, 0.5), (1.0, 0.20000000000000001)] 
     120        Initial probabilities: [1.0, 0.0, 0.0] 
    43121    """ 
    44122    def __init__(self, A, B, pi, name=None): 
    45         ContinuousHiddenMarkovModel.__init__(self, A, B, pi) 
     123        """ 
     124        EXAMPLES: 
     125        We make a very simple model: 
     126            sage: hmm.GaussianHiddenMarkovModel([[1]], [(0,1)], [1], 'simple') 
     127            Gaussian Hidden Markov Model simple with 1 States 
     128            Transition matrix: 
     129            [1.0] 
     130            Emission parameters: 
     131            [(0.0, 1.0)] 
     132            Initial probabilities: [1.0] 
     133 
     134        We test a degenerate case: 
     135            sage: hmm.GaussianHiddenMarkovModel([], [], [], 'simple') 
     136            Gaussian Hidden Markov Model simple with 0 States 
     137            Transition matrix: 
     138            [] 
     139            Emission parameters: 
     140            [] 
     141            Initial probabilities: [] 
     142        """ 
     143        ContinuousHiddenMarkovModel.__init__(self, A, B, pi, name=name) 
    46144 
    47145        # Set number of outputs.  This is 1 here because each 
    48146        # output is a single Gaussian distribution. 
     
    51149        # Set the model type to continuous 
    52150        self.m.model_type = GHMM_kContinuousHMM 
    53151 
    54         # Assign model identifier if specified 
    55         if name is not None:  
    56             name = str(name) 
    57             self.m.name = name 
    58         else: 
    59             self.m.name = NULL 
    60              
    61152        # 1 transition matrix 
    62153        self.m.cos   =  1 
    63154        # Set that no a prior model probabilities are set. 
     
    69160        cdef ghmm_cstate* states = <ghmm_cstate*> safe_malloc(sizeof(ghmm_cstate) * self.m.N) 
    70161        cdef ghmm_cstate* state 
    71162        cdef ghmm_c_emission* e 
     163        cdef Py_ssize_t i, j, k 
    72164 
    73165        for i in range(self.m.N): 
    74166            # Parameters of normal distribution 
     
    78170            state.M     = 1 
    79171            state.pi    = pi[i] 
    80172            state.desc  = NULL 
    81             state.out_states = 0 
    82             state.in_states = 0 
    83173            e = <ghmm_c_emission*> safe_malloc(sizeof(ghmm_c_emission)) 
    84174            e.type      = 0  # normal 
    85175            e.dimension = 1 
     
    91181            e.sigmainv  = NULL 
    92182            state.e     = e 
    93183            state.c     = to_double_array([1.0]) 
    94             state.in_a  = ighmm_cmatrix_alloc(1, self.m.N) 
    95             state.out_a = ighmm_cmatrix_alloc(1, self.m.N) 
     184 
     185            ######################################################### 
     186            # Initialize state transition data. 
     187            # NOTE: This code is similar to a block of code in hmm.pyx. 
     188             
     189            # Set "out" probabilities, i.e., the probabilities to 
     190            # transition to another hidden state from this state. 
     191            v = self.A[i] 
     192            k = self.m.N 
     193            state.out_states = k 
     194            state.out_id = <int*> safe_malloc(sizeof(int)*k) 
     195            state.out_a  = ighmm_cmatrix_alloc(1, k) 
     196            for j in range(k): 
     197                state.out_id[j] = j 
     198                state.out_a[0][j]  = v[j] 
     199 
     200            # Set "in" probabilities 
     201            v = self.A.column(i) 
     202            state.in_states = k 
     203            state.in_id = <int*> safe_malloc(sizeof(int)*k) 
     204            state.in_a  = ighmm_cmatrix_alloc(1, k) 
     205            for j in range(k): 
     206                state.in_id[j] = j 
     207                state.in_a[0][j]  = v[j] 
     208 
     209            #########################################################                 
     210 
    96211 
    97212        # Set states 
    98213        self.m.s = states 
     
    102217        self.initialized = True 
    103218 
    104219    def __dealloc__(self): 
     220        """ 
     221        Dealloc the memory used by this Gaussian HMM, but only if the 
     222        HMM was successfully initialized. 
     223 
     224        EXAMPLES:   
     225            sage: m = hmm.GaussianHiddenMarkovModel([[1.0]], [(0.0,1.0)], [1])   # implicit doctest 
     226            sage: del m 
     227        """ 
    105228        if self.initialized: 
    106229            ghmm_cmodel_free(&self.m) 
     230 
     231    def __copy__(self): 
     232        """ 
     233        Return a copy of this Gaussian HMM. 
     234         
     235        EXAMPLES: 
     236            sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,0], 'NAME') 
     237            sage: copy(m) 
     238            Gaussian Hidden Markov Model NAME with 2 States 
     239            Transition matrix: 
     240            [0.4 0.6] 
     241            [0.1 0.9] 
     242            Emission parameters: 
     243            [(0.0, 1.0), (1.0, 1.0)] 
     244            Initial probabilities: [1.0, 0.0] 
     245        """ 
     246         
     247        return GaussianHiddenMarkovModel(self.transition_matrix(), self.emission_parameters(), 
     248                                         self.initial_probabilities(), self.name()) 
     249 
     250    def __cmp__(self, other): 
     251        """ 
     252        Compare two Gaussian HMM's. 
     253 
     254        INPUT: 
     255            self, other -- objects; if other is not a Gaussian HMM compare types. 
     256        OUTPUT: 
     257            -1,0,1 
     258 
     259        The transition matrices are compared, then the emission 
     260        parameters, and the initial probabilities.  The names are not 
     261        compared, so two GHMM's with the same parameters but different 
     262        names compare to be equal. 
     263 
     264        EXAMPLES: 
     265            sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,2], "Test 1") 
     266            sage: m.__cmp__(m) 
     267            0 
     268 
     269        Note that the name doesn't matter: 
     270            sage: n = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,2], "Test 2") 
     271            sage: m.__cmp__(n) 
     272            0 
     273 
     274        Normalizing fixes the initial probabilities, hence m and n are no longer equal. 
     275            sage: n.normalize() 
     276            sage: m.__cmp__(n) 
     277            1 
     278        """ 
     279        if not isinstance(other, GaussianHiddenMarkovModel): 
     280            return cmp(type(self), type(other)) 
     281 
     282        if self is other: return 0  # easy special case 
     283         
     284        cdef GaussianHiddenMarkovModel o = other 
     285        if self.m.N < o.m.N: 
     286            return -1 
     287        elif self.m.N > o.m.N: 
     288            return 1 
     289        cdef Py_ssize_t i, j 
     290 
     291        # The code below is somewhat long and tedious because I want it to be 
     292        # very fast.  All it does is explicitly loop through the transition 
     293        # matrix, emission parameters and initial state probabilities checking 
     294        # that they agree, and if not returning -1 or 1. 
     295        # Compare transition matrices 
     296        for i from 0 <= i < self.m.N: 
     297            for j from 0 <= j < self.m.s[i].out_states: 
     298                if self.m.s[i].out_a[0][j] < o.m.s[i].out_a[0][j]: 
     299                    return -1 
     300                elif self.m.s[i].out_a[0][j] > o.m.s[i].out_a[0][j]: 
     301                    return 1 
     302 
     303        # Compare emissions parameters 
     304        for i from 0 <= i < self.m.N: 
     305            if self.m.s[i].e.mean.val < o.m.s[i].e.mean.val: 
     306                return -1 
     307            elif self.m.s[i].e.mean.val > o.m.s[i].e.mean.val: 
     308                return 1 
     309            if self.m.s[i].e.variance.val < o.m.s[i].e.variance.val: 
     310                return -1 
     311            elif self.m.s[i].e.variance.val > o.m.s[i].e.variance.val: 
     312                return 1 
     313             
     314        # Compare initial state probabilities 
     315        for 0 <= i < self.m.N: 
     316            if self.m.s[i].pi < o.m.s[i].pi: 
     317                return -1 
     318            elif self.m.s[i].pi > o.m.s[i].pi: 
     319                return 1 
     320 
     321        return 0 
    107322 
    108323    def __repr__(self): 
    109324        """ 
     
    114329 
    115330        EXAMPLES: 
    116331            sage: m = hmm.GaussianHiddenMarkovModel([[0.0,1.0,0],[0.5,0.0,0.5],[0.3,0.3,0.4]], [(0.0,1.0), (-1.0,0.5), (1.0,0.2)], [1,0,0]) 
    117             sage: a.__repr__() 
    118             "Discrete Hidden Markov Model (2 states, 2 outputs)\nInitial probabilities: [0.5, 0.5]\nTransition matrix:\n[0.1 0.9]\n[0.1 0.9]\nEmission matrix:\n[0.9 0.1]\n[0.1 0.9]\nEmission symbols: [3/4, 'abc']" 
     332            sage: m.__repr__() 
     333            'Gaussian Hidden Markov Model with 3 States\nTransition matrix:\n[0.0 1.0 0.0]\n[0.5 0.0 0.5]\n[0.3 0.3 0.4]\nEmission parameters:\n[(0.0, 1.0), (-1.0, 0.5), (1.0, 0.20000000000000001)]\nInitial probabilities: [1.0, 0.0, 0.0]' 
    119334        """ 
    120         s = "Gaussian Hidden Markov Model%s (%s states, %s outputs)"%( 
     335        s = "Gaussian Hidden Markov Model%s with %s States"%( 
    121336            ' ' + self.m.name if self.m.name else '', 
    122             self.m.N, self.m.M) 
    123         s += '\nInitial probabilities: %s'%self.initial_probabilities() 
     337            self.m.N) 
    124338        s += '\nTransition matrix:\n%s'%self.transition_matrix() 
    125339        s += '\nEmission parameters:\n%s'%self.emission_parameters() 
     340        s += '\nInitial probabilities: %s'%self.initial_probabilities() 
    126341        return s 
    127342 
    128343    def initial_probabilities(self): 
     
    135350        EXAMPLES: 
    136351            sage: m = hmm.GaussianHiddenMarkovModel([[0.0,1.0,0],[0.5,0.0,0.5],[0.3,0.3,0.4]], [(0.0,1.0), (-1.0,0.5), (1.0,0.2)], [0.4,0.3,0.3]) 
    137352            sage: m.initial_probabilities() 
    138             [0.4, 0.3, 0.3
     353            [0.40000000000000002, 0.29999999999999999, 0.29999999999999999
    139354        """ 
    140355        cdef Py_ssize_t i 
    141356        return [self.m.s[i].pi for i in range(self.m.N)] 
    142357     
    143     def transition_matrix(self, list_only=True): 
     358    def transition_matrix(self): 
    144359        """ 
    145360        Return the hidden state transition matrix.  
     361 
     362        OUTPUT: 
     363            matrix whose rows give the transition probabilities of the 
     364            Hidden Markov Model states. 
    146365         
    147366        EXAMPLES: 
    148367            sage: m = hmm.GaussianHiddenMarkovModel([[0.0,1.0,0],[0.5,0.0,0.5],[0.3,0.3,0.4]], [(0.0,1.0), (-1.0,0.5), (1.0,0.2)], [1,0,0]) 
    149368            sage: m.transition_matrix() 
    150             [0.9 0.1] 
    151             [0.9 0.1] 
     369            [0.0 1.0 0.0] 
     370            [0.5 0.0 0.5] 
     371            [0.3 0.3 0.4] 
    152372        """ 
    153373        cdef Py_ssize_t i, j 
     374        # Update the state of the "immutable" matrix A, then return a reference to it. 
    154375        for i from 0 <= i < self.m.N: 
    155376            for j from 0 <= j < self.m.s[i].out_states: 
    156377                self.A.set_unsafe_double(i,j,self.m.s[i].out_a[0][j]) 
     
    158379 
    159380    def emission_parameters(self): 
    160381        """ 
    161         Return the emission probability matrix. 
     382        Return the emission parameters list. 
     383 
     384        OUTPUT: 
     385            list of tuples (mu, sigma) that define Gaussian distributions associated to each state. 
    162386         
    163387        EXAMPLES: 
    164             sage: m = hmm.GaussianHiddenMarkovModel([[0.0,1.0,0],[0.5,0.0,0.5],[0.3,0.3,0.4]], [(0.0,1.0), (-1.0,0.5), (1.0,0.2)], [0.1,0.4,0.5]
     388            sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(1.5,2),(-1,3)], [1,0], 'NAME'
    165389            sage: m.emission_parameters() 
    166             [(0.0, 1.0), (-1.0, 0.5), (1.0, 0.20000...)] 
     390            [(1.5, 2.0), (-1.0, 3.0)] 
    167391        """ 
    168         cdef Py_ssize_t i, j 
    169         v = [] 
    170         for i from 0 <= i < self.m.N: 
    171             v.append((self.m.s[i].e.mean.val, self.m.s[i].e.variance.val)) 
    172         return v 
     392        cdef Py_ssize_t i         
     393        return [(self.m.s[i].e.mean.val, self.m.s[i].e.variance.val) for i in range(self.m.N)] 
     394 
     395    def normalize(self): 
     396        """ 
     397        Normalize the transition and emission probabilities, if 
     398        applicable.  This changes self in place. 
     399 
     400        EXAMPLES: 
     401            sage: m = hmm.GaussianHiddenMarkovModel([[1.0,1.2],[0,0.1]], [(0.0,1.0),(1,1)], [1,2]) 
     402            sage: m.normalize() 
     403            sage: m 
     404            Gaussian Hidden Markov Model with 2 States 
     405            Transition matrix: 
     406            [0.454545454545 0.545454545455] 
     407            [           0.0            1.0] 
     408            Emission parameters: 
     409            [(0.0, 1.0), (1.0, 1.0)] 
     410            Initial probabilities: [0.33333333333333331, 0.66666666666666663] 
     411        """ 
     412        if ghmm_cmodel_normalize(self.m): 
     413            raise RuntimeError, "error normalizing model (note that model may still have been partly changed)" 
  • a/sage/stats/hmm/hmm.pxd

    old new  
     1############################################################################# 
     2#       Copyright (C) 2008 William Stein <wstein@gmail.com> 
     3#  Distributed under the terms of the GNU General Public License (GPL), 
     4#  version 2 or any later version at your option. 
     5#  The full text of the GPL is available at: 
     6#                  http://www.gnu.org/licenses/ 
     7############################################################################# 
     8 
    19from sage.matrix.matrix_real_double_dense cimport Matrix_real_double_dense 
    210 
    311cdef extern from "ghmm/ghmm.h": 
  • a/sage/stats/hmm/hmm.pyx

    old new  
    1111   * continuous hmm's 
    1212""" 
    1313 
     14############################################################################# 
     15#       Copyright (C) 2008 William Stein <wstein@gmail.com> 
     16#  Distributed under the terms of the GNU General Public License (GPL), 
     17#  version 2 or any later version at your option. 
     18#  The full text of the GPL is available at: 
     19#                  http://www.gnu.org/licenses/ 
     20############################################################################# 
     21 
    1422import math 
    1523 
    1624from sage.matrix.all import is_Matrix, matrix 
     
    2230include "misc.pxi" 
    2331 
    2432cdef class HiddenMarkovModel: 
     33    """ 
     34    Abstract base class for all Hidden Markov Models. 
     35 
     36    INPUT: 
     37        A -- matrix or list 
     38        B -- matrix or list 
     39        pi -- list of floats 
     40 
     41    EXAMPLES: 
     42    One shouldn't directly call this constructor since this class is an abstract 
     43    base class.   
     44        sage: sage.stats.hmm.hmm.HiddenMarkovModel([[0.4,0.6],[1,0]], [[1,0],[0.5,0.5]], [0.5,0.5]) 
     45        <sage.stats.hmm.hmm.HiddenMarkovModel object at ...> 
     46    """ 
    2547    def __init__(self, A, B, pi): 
     48        """ 
     49        INPUT: 
     50            A -- matrix or list 
     51            B -- matrix or list 
     52            pi -- list of floats 
     53 
     54        EXAMPLES: 
     55            sage: hmm.DiscreteHiddenMarkovModel([[1]], [[0.0,1.0]], [1]) 
     56            Discrete Hidden Markov Model with 1 States and 2 Emissions 
     57            Transition matrix: 
     58            [1.0] 
     59            Emission matrix: 
     60            [0.0 1.0] 
     61            Initial probabilities: [1.0] 
     62        """ 
     63        # Convert A to a matrix 
    2664        if not is_Matrix(A): 
    27             A = matrix(RDF, len(A), len(A[0]), A) 
     65            n = len(A) 
     66            A = matrix(RDF, n, len(A[0]) if n > 0 else 0, A) 
     67 
     68        # Convert B to a matrix             
    2869        if not is_Matrix(B): 
    29             B = matrix(RDF, len(B), len(B[0]), B) 
     70            n = len(B) 
     71            B = matrix(RDF, n, len(B[0]) if n > 0 else 0, B) 
     72 
     73        # Some consistency checks 
    3074        if not A.is_square(): 
     75            print A.parent() 
    3176            raise ValueError, "A must be square" 
    3277        if A.nrows() != B.nrows(): 
    3378            raise ValueError, "number of rows of A and B must be the same" 
     79 
     80        # Make sure A and B are over RDF. 
    3481        if A.base_ring() != RDF: 
    3582            A = A.change_ring(RDF) 
    3683        if B.base_ring() != RDF: 
    3784            B = B.change_ring(RDF) 
    3885 
     86        # Make sure the initial probabilities are all floats.  
    3987        self.pi = [float(x) for x in pi] 
    4088        if len(self.pi) != A.nrows(): 
    4189            raise ValueError, "length of pi must equal number of rows of A" 
    4290 
     91        # Record the now validated matrices A and B as attributes. 
     92        # They get used later as attributes in the constructors for 
     93        # derived classes. 
    4394        self.A = A 
    4495        self.B = B 
    4596 
     
    56107            name -- (optional) name of the model 
    57108 
    58109        EXAMPLES: 
    59         We create a discrete HMM with 2 internal states on an alphabet of 
    60         size 2. 
    61          
     110        We create a discrete HMM with 2 internal states on an alphabet of size 2. 
    62111            sage: a = hmm.DiscreteHiddenMarkovModel([[0.2,0.8],[0.5,0.5]], [[1,0],[0,1]], [0,1]) 
    63              
    64112        """ 
    65         self.initialized = False 
     113        # memory has not all been setup yet. 
     114        self.initialized = False   
     115 
     116        # This sets self.A, self.B and pi after doing appropriate coercions, etc. 
    66117        HiddenMarkovModel.__init__(self, A, B, pi) 
    67118 
    68         cdef Py_ssize_t i, j, k 
    69119        self.set_emission_symbols(emission_symbols) 
    70120 
    71121        self.m = <ghmm_dmodel*> safe_malloc(sizeof(ghmm_dmodel)) 
     
    101151        silent_states = [] 
    102152        tmp_order     = [] 
    103153         
    104         for i in range(self.m.N): 
     154        cdef Py_ssize_t i, j, k 
     155         
     156        for i from 0 <= i < self.m.N: 
    105157            v = self.B[i] 
    106158 
    107159            # Get a reference to the i-th state for convenience of the notation below. 
     
    127179 
    128180            silent_states.append( 1 if sum(v) == 0 else 0) 
    129181 
    130             # Set out probabilities, i.e., the probabilities that each 
    131             # symbol will be emitted from this state.  
     182            # Set "out" probabilities, i.e., the probabilities to 
     183            # transition to another hidden state from this state. 
    132184            v = self.A[i] 
    133             nz = v.nonzero_positions() 
    134             k = len(nz) 
     185            k = self.m.N 
    135186            state.out_states = k 
    136187            state.out_id = <int*> safe_malloc(sizeof(int)*k) 
    137188            state.out_a  = <double*> safe_malloc(sizeof(double)*k) 
    138             for j in range(k)
    139                 state.out_id[j] = nz[j] 
    140                 state.out_a[j]  = v[nz[j]
     189            for j from 0 <= j < k
     190                state.out_id[j] = j 
     191                state.out_a[j]  = v[j
    141192 
    142193            # Set "in" probabilities 
    143194            v = self.A.column(i) 
    144             nz = v.nonzero_positions() 
    145             k = len(nz) 
    146195            state.in_states = k 
    147196            state.in_id = <int*> safe_malloc(sizeof(int)*k) 
    148197            state.in_a  = <double*> safe_malloc(sizeof(double)*k) 
    149             for j in range(k)
    150                 state.in_id[j] = nz[j] 
    151                 state.in_a[j]  = v[nz[j]
     198            for j from 0 <= j < k
     199                state.in_id[j] = j 
     200                state.in_a[j]  = v[j
    152201                 
    153202            state.fix = 0 
    154203                 
    155204        self.m.s = states 
    156205        self.initialized = True 
    157206 
    158 ##         if sum(silent_states) > 0: 
    159 ##             self.m.model_type |= GHMM_kSilentStates 
    160 ##             self.m.silent = to_int_array(silent_states) 
    161 ##         self.m.maxorder = max(tmp_order) 
    162 ##         if self.m.maxorder > 0: 
    163 ##             self.m.model_type |= GHMM_kHigherOrderEmissions 
    164 ##             self.m.order = to_int_array(tmp_order) 
    165 ##         # Initialize lookup table for powers of the alphabet size, 
    166 ##         # which speeds up models with higher order states. 
    167 ##         powLookUp = [1] * (self.m.maxorder+2) 
    168 ##         for i in range(1,len(powLookUp)): 
    169 ##             powLookUp[i] = powLookUp[i-1] * self.m.M 
    170 ##         self.m.pow_lookup = to_int_array(powLookUp) 
    171 ##         self.initialized = True 
     207    def __cmp__(self, other): 
     208        """ 
     209        Compare two Discrete HMM's. 
     210 
     211        INPUT: 
     212            self, other -- objects; if other is not a Discrete HMM compare types. 
     213        OUTPUT: 
     214            -1,0,1 
     215 
     216        The transition matrices are compared, then the emission 
     217        parameters, and the initial probabilities.  The names are not 
     218        compared, so two GHMM's with the same parameters but different 
     219        names compare to be equal. 
     220 
     221        EXAMPLES: 
     222            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[1,1]], [1,2]) 
     223            sage: m.__cmp__(m) 
     224            0 
     225 
     226        Note that the name doesn't matter: 
     227            sage: n = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[1,1]], [1,2]) 
     228            sage: m.__cmp__(n) 
     229            0 
     230 
     231        Normalizing fixes the initial probabilities, hence m and n are no longer equal. 
     232            sage: n.normalize() 
     233            sage: m.__cmp__(n) 
     234            1 
     235        """ 
     236        if not isinstance(other, DiscreteHiddenMarkovModel): 
     237            return cmp(type(self), type(other)) 
     238 
     239        if self is other: return 0  # easy special case 
    172240         
     241        cdef DiscreteHiddenMarkovModel o = other 
     242        if self.m.N < o.m.N: 
     243            return -1 
     244        elif self.m.N > o.m.N: 
     245            return 1 
     246        cdef Py_ssize_t i, j 
     247 
     248        # This code is similar to code in chmm.pyx, but with several small differences. 
     249         
     250        # Compare transition matrices 
     251        for i from 0 <= i < self.m.N: 
     252            for j from 0 <= j < self.m.s[i].out_states: 
     253                if self.m.s[i].out_a[j] < o.m.s[i].out_a[j]: 
     254                    return -1 
     255                elif self.m.s[i].out_a[j] > o.m.s[i].out_a[j]: 
     256                    return 1 
     257 
     258        # Compare emission matrices 
     259        for i from 0 <= i < self.m.N: 
     260            for j from 0 <= j < self.B._ncols: 
     261                if self.m.s[i].b[j] < o.m.s[i].b[j]: 
     262                    return -1 
     263                elif self.m.s[i].b[j] > o.m.s[i].b[j]: 
     264                    return 1 
     265                 
     266        # Compare initial state probabilities 
     267        for 0 <= i < self.m.N: 
     268            if self.m.s[i].pi < o.m.s[i].pi: 
     269                return -1 
     270            elif self.m.s[i].pi > o.m.s[i].pi: 
     271                return 1 
     272 
     273        return 0 
     274 
    173275    def __dealloc__(self): 
     276        """ 
     277        Deallocate memory allocated by the HMM. 
     278 
     279        EXAMPLES: 
     280            sage: a = hmm.DiscreteHiddenMarkovModel([[0.2,0.8],[0.5,0.5]], [[1,0],[0,1]], [0,1])  # indirect doctest 
     281            sage: del a 
     282        """ 
    174283        if self.initialized: 
    175284            ghmm_dmodel_free(&self.m) 
    176285 
     
    184293        EXAMPLES: 
    185294            sage: a = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.1,0.9]], [[0.9,0.1],[0.1,0.9]], [0.5,0.5], [3/4, 'abc']) 
    186295            sage: a.__repr__() 
    187             "Discrete Hidden Markov Model (2 states, 2 outputs)\nInitial probabilities: [0.5, 0.5]\nTransition matrix:\n[0.1 0.9]\n[0.1 0.9]\nEmission matrix:\n[0.9 0.1]\n[0.1 0.9]\nEmission symbols: [3/4, 'abc']" 
     296            "Discrete Hidden Markov Model with 2 States and 2 Emissions\nTransition matrix:\n[0.1 0.9]\n[0.1 0.9]\nEmission matrix:\n[0.9 0.1]\n[0.1 0.9]\nInitial probabilities: [0.5, 0.5]\nEmission symbols: [3/4, 'abc']" 
    188297        """ 
    189         s = "Discrete Hidden Markov Model%s (%s states, %s outputs)"%( 
     298        s = "Discrete Hidden Markov Model%s with %s States and %s Emissions"%( 
    190299            ' ' + self.m.name if self.m.name else '', 
    191300            self.m.N, self.m.M) 
    192         s += '\nInitial probabilities: %s'%self.initial_probabilities() 
    193301        s += '\nTransition matrix:\n%s'%self.transition_matrix() 
    194302        s += '\nEmission matrix:\n%s'%self.emission_matrix() 
     303        s += '\nInitial probabilities: %s'%self.initial_probabilities() 
    195304        if self._emission_symbols_dict: 
    196305            s += '\nEmission symbols: %s'%self._emission_symbols 
    197306        return s 
     
    251360            sage: a = hmm.DiscreteHiddenMarkovModel([[0.5,1],[1.2,0.9]], [[1,0.5],[0.5,1]], [0.1,1.2]) 
    252361            sage: a.normalize() 
    253362            sage: a 
    254             Discrete Hidden Markov Model (2 states, 2 outputs) 
    255             Initial probabilities: [0.076923076923076927, 0.92307692307692302] 
     363            Discrete Hidden Markov Model with 2 States and 2 Emissions 
    256364            Transition matrix: 
    257365            [0.333333333333 0.666666666667] 
    258366            [0.571428571429 0.428571428571] 
    259367            Emission matrix: 
    260368            [0.666666666667 0.333333333333] 
    261369            [0.333333333333 0.666666666667] 
     370            Initial probabilities: [0.076923076923076927, 0.92307692307692302] 
    262371        """ 
    263372        ghmm_dmodel_normalize(self.m) 
    264373 
     
    491600        Now the model's emission matrix changes since it is much 
    492601        more likely to emit 1 than 0.   
    493602            sage: a 
    494             Discrete Hidden Markov Model (2 states, 2 outputs) 
    495             Initial probabilities: [0.5, 0.5] 
     603            Discrete Hidden Markov Model with 2 States and 2 Emissions 
    496604            Transition matrix: 
    497605            [0.5 0.5] 
    498606            [0.5 0.5] 
    499607            Emission matrix: 
    500608            [0.166666666667 0.833333333333] 
    501609            [0.166666666667 0.833333333333] 
     610            Initial probabilities: [0.5, 0.5] 
    502611 
    503612        Note that 1/6 = 1.666...: 
    504613            sage: 1.0/6 
     
    509618            sage: a = hmm.DiscreteHiddenMarkovModel([[0.5,0.5],[0.5,0.5]], [[0.5,0.5],[0.5,0.5]], [0.5,0.5], ['up','down']) 
    510619            sage: a.baum_welch([['up','up'], ['down','up']]) 
    511620            sage: a 
    512             Discrete Hidden Markov Model (2 states, 2 outputs) 
    513             Initial probabilities: [0.5, 0.5] 
     621            Discrete Hidden Markov Model with 2 States and 2 Emissions 
    514622            Transition matrix: 
    515623            [0.5 0.5] 
    516624            [0.5 0.5] 
    517625            Emission matrix: 
    518626            [0.75 0.25] 
    519627            [0.75 0.25] 
    520             Emission symbols: ['up', 'down']         
     628            Initial probabilities: [0.5, 0.5] 
     629            Emission symbols: ['up', 'down'] 
    521630 
    522631        NOTE: Training for models including silent states is not yet supported. 
    523632 
     
    544653################################################################################## 
    545654 
    546655cdef ghmm_dseq* malloc_ghmm_dseq(seqs) except NULL: 
     656    """ 
     657    Allocate a discrete sequence of samples. 
     658 
     659    INPUT: 
     660        seqs -- a list of sequences 
     661 
     662    OUTPUT: 
     663        C pointer to ghmm_dseq 
     664    """ 
    547665    cdef ghmm_dseq* d = ghmm_dseq_calloc(len(seqs)) 
    548666    if d == NULL: 
    549667        raise MemoryError 
  • a/sage/stats/hmm/misc.pxi

    old new  
     1############################################################################# 
     2#       Copyright (C) 2008 William Stein <wstein@gmail.com> 
     3#  Distributed under the terms of the GNU General Public License (GPL), 
     4#  version 2 or any later version at your option. 
     5#  The full text of the GPL is available at: 
     6#                  http://www.gnu.org/licenses/ 
     7############################################################################# 
     8 
     9cdef extern from "string.h": 
     10    char *strcpy(char *s1, char *s2) 
    111 
    212cdef double* to_double_array(v) except NULL: 
     13    """ 
     14    Transform a Python list of floats to a C array of doubles.  The caller is 
     15    responsible for deallocating the resulting memory. 
     16     
     17    INPUT: 
     18        v -- a list of objects coercible to floats 
     19    OUTPUT: 
     20        a newly allocated C array of doubles 
     21    """ 
    322    cdef double x 
    423    cdef double* w = <double*> safe_malloc(sizeof(double)*len(v)) 
    524    cdef Py_ssize_t i = 0 
     
    928    return w 
    1029 
    1130cdef int* to_int_array(v) except NULL: 
     31    """ 
     32    Transform a Python list of ints to a C array of ints.  The caller is 
     33    responsible for deallocating the resulting memory. 
     34     
     35    INPUT: 
     36        v -- a list of objects coercible to ints 
     37    OUTPUT: 
     38        a newly allocated C array of ints 
     39    """ 
    1240    cdef int x 
    1341    cdef int* w = <int*> safe_malloc(sizeof(int)*len(v)) 
    1442    cdef Py_ssize_t i = 0 
     
    2149    """ 
    2250    malloc the given bytes of memory and check that the malloc 
    2351    succeeds -- if not raise a MemoryError. 
     52 
     53    INPUT: 
     54        bytes -- a nonnegatie integer 
     55         
     56    OUTPUT: 
     57        void pointer or raise a MemoryError.  
    2458    """ 
    2559    cdef void* t = sage_malloc(bytes) 
    2660    if not t: