Ticket #3726: sage-3726-part2.patch

File sage-3726-part2.patch, 12.0 kB (added by was, 5 months ago)
  • a/sage/stats/hmm/hmm.pyx

    old new  
    1111 
    1212from sage.matrix.all import is_Matrix, matrix 
    1313from sage.rings.all  import RDF 
     14from sage.misc.randstate import random 
    1415 
    1516from sage.matrix.matrix_real_double_dense cimport Matrix_real_double_dense 
    1617 
     
    5758    ctypedef struct ghmm_alphabet: 
    5859        pass 
    5960     
     61    # Discrete HMM's. 
    6062    ctypedef struct ghmm_dmodel: 
    6163        # Number of states 
    6264        int N    
     
    133135 
    134136    int ghmm_dmodel_normalize(ghmm_dmodel *m) 
    135137    int ghmm_dmodel_free(ghmm_dmodel **m) 
     138    int ghmm_dmodel_logp(ghmm_dmodel *m, int *O, int len, double *log_p) 
    136139 
     140 
     141    # Discrete sequences 
     142    ctypedef struct ghmm_dseq: 
     143        # sequence array. sequence[i] [j] = j-th symbol of i-th seq 
     144        int **seq 
     145        # matrix of state ids, can be used to save the viterbi path during sequence generation. 
     146        # ATTENTION: is NOT allocated by ghmm_dseq_calloc  
     147        int **states 
     148        # array of sequence length 
     149        int *seq_len 
     150        # array of state path lengths 
     151        int *states_len 
     152        # array of sequence IDs 
     153        double *seq_id 
     154        # positive sequence weights.  default is 1 = no weight 
     155        double *seq_w 
     156        # total number of sequences  
     157        long seq_number 
     158        # reserved space for sequences is always >= seq_number 
     159        long capacity 
     160        # sum of sequence weights 
     161        double total_w 
     162        # matrix of state labels corresponding to seq 
     163        int **state_labels 
     164        # number of labels for each sequence 
     165        int *state_labels_len 
     166        # internal flags 
     167        unsigned int flags 
     168 
     169    ghmm_dseq *ghmm_dmodel_label_generate_sequences( 
     170        ghmm_dmodel * mo, int seed, int global_len, long seq_number, int Tmax) 
     171    ghmm_dseq *ghmm_dmodel_generate_sequences(ghmm_dmodel* mo, int seed, int global_len, 
     172                                          long seq_number, int Tmax) 
    137173 
    138174cdef class HiddenMarkovModel: 
    139175    pass 
     
    142178cdef class DiscreteHiddenMarkovModel: 
    143179    cdef ghmm_dmodel* m 
    144180    cdef bint initialized 
    145     cdef object emission_symbols 
     181    cdef object _emission_symbols 
     182    cdef object _emission_symbols_special     
    146183    cdef Matrix_real_double_dense A, B 
    147184     
    148185    def __init__(self, A, B, pi, emission_symbols=None, name=None): 
     
    178215 
    179216        cdef Py_ssize_t i, j, k 
    180217 
    181         if emission_symbols is None: 
    182             self.emission_symbols = range(B.ncols()) 
    183         else: 
    184             self.emission_symbols = list(emission_symbols) 
     218        self.A = A 
     219        self.B = B 
     220        self.set_emission_symbols(emission_symbols) 
    185221 
    186         self.m = <ghmm_dmodel*> sage_malloc(sizeof(ghmm_dmodel)) 
    187         if not self.m: raise MemoryError 
     222        self.m = <ghmm_dmodel*> safe_malloc(sizeof(ghmm_dmodel)) 
     223 
     224        self.m.label = to_int_array(range(len(self._emission_symbols))) 
    188225         
    189226        # Set all pointers to NULL 
    190227        self.m.s = NULL; self.m.name = NULL; self.m.silent = NULL 
    191228        self.m.tied_to = NULL; self.m.order = NULL; self.m.background_id = NULL 
    192229        self.m.bp = NULL; self.m.topo_order = NULL; self.m.pow_lookup = NULL; 
    193         self.m.label = NULL; self.m.label_alphabet = NULL; self.m.alphabet = NULL 
     230        self.m.label_alphabet = NULL; self.m.alphabet = NULL 
    194231 
    195232        # Set number of states and number of outputs 
    196233        self.m.N = A.nrows() 
    197         self.m.M = len(self.emission_symbols) 
     234        self.m.M = len(self._emission_symbols) 
    198235        # Set the model type to discrete 
    199236        self.m.model_type = GHMM_kDiscreteHMM 
    200  
    201         self.A = A 
    202         self.B = B 
    203237 
    204238        # Set that no a prior model probabilities are set. 
    205239        self.m.prior = -1 
     
    289323 
    290324        self.initialized = True 
    291325         
     326    def __dealloc__(self): 
     327        if self.initialized: 
     328            ghmm_dmodel_free(&self.m) 
    292329 
    293330    def __repr__(self): 
    294331        s = "Discrete Hidden Markov Model%s (%s states, %s outputs)\n"%( 
     
    300337        return s 
    301338 
    302339    def initial_probabilities(self): 
     340        """ 
     341        Return the list of initial state probabilities. 
     342         
     343        EXAMPLES: 
     344            sage: a = hmm.DiscreteHiddenMarkovModel([[0.9,0.1],[0.9,0.1]], [[0.5,0.5,0,0],[0,0,.5,.5]], [0.5,0.5], [1,-1,3,-3]) 
     345            sage: a.initial_probabilities() 
     346            [0.5, 0.5] 
     347        """ 
    303348        cdef Py_ssize_t i 
    304349        return [self.m.s[i].pi for i in range(self.m.N)] 
    305350 
    306351    def transition_matrix(self, list_only=True): 
     352        """ 
     353        Return the hidden state transition matrix.  
     354         
     355        EXAMPLES: 
     356            sage: a = hmm.DiscreteHiddenMarkovModel([[0.9,0.1],[0.9,0.1]], [[0.5,0.5,0,0],[0,0,.5,.5]], [0.5,0.5], [1,-1,3,-3]) 
     357            sage: a.transition_matrix() 
     358            [0.9 0.1] 
     359            [0.9 0.1] 
     360        """ 
    307361        cdef Py_ssize_t i, j 
    308362        for i from 0 <= i < self.m.N: 
    309363            for j from 0 <= j < self.m.s[i].out_states: 
     
    311365        return self.A 
    312366 
    313367    def emission_matrix(self, list_only=True): 
     368        """ 
     369        Return the emission probability matrix. 
     370         
     371        EXAMPLES: 
     372            sage: a = hmm.DiscreteHiddenMarkovModel([[0.9,0.1],[0.9,0.1]], [[0.5,0.5,0,0],[0,0,.5,.5]], [0.5,0.5], [1,-1,3,-3]) 
     373            sage: a.emission_matrix() 
     374            [0.5 0.5 0.0 0.0] 
     375            [0.0 0.0 0.5 0.5] 
     376        """ 
    314377        cdef Py_ssize_t i, j 
    315378        for i from 0 <= i < self.m.N: 
    316379            for j from 0 <= j < self.B._ncols: 
     
    320383    def normalize(self): 
    321384        """ 
    322385        Normalize the transition and emission probabilities, if applicable. 
     386 
     387        EXAMPLES: 
     388            sage: a = hmm.DiscreteHiddenMarkovModel([[0.5,1],[1.2,0.9]], [[1,0.5],[0.5,1]], [0.1,1.2]) 
     389            sage: a.normalize() 
     390            sage: a 
     391            Discrete Hidden Markov Model (2 states, 2 outputs) 
     392            Initial probabilities: [0.076923076923076927, 0.92307692307692302] 
     393            Transition matrix: 
     394            [0.333333333333 0.666666666667] 
     395            [0.571428571429 0.428571428571] 
     396            Emission matrix: 
     397            [0.666666666667 0.333333333333] 
     398            [0.333333333333 0.666666666667] 
    323399        """ 
    324400        ghmm_dmodel_normalize(self.m) 
    325401 
    326     def __dealloc__(self): 
    327         if self.initialized: 
    328             ghmm_dmodel_free(&self.m) 
     402    def sample_single(self, long length): 
     403        """ 
     404        Return a single sample computed using this Hidden Markov Model. 
     405         
     406        EXAMPLE: 
     407            sage: set_random_seed(0) 
     408            sage: a = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.1,0.9]], [[1,0],[0,1]], [0,1]) 
     409            sage: a.sample_single(20) 
     410            [1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
     411            sage: a.sample_single(1000).count(0) 
     412            113 
     413 
     414        If the emission symbols are set 
     415            sage: set_random_seed(0) 
     416            sage: a = hmm.DiscreteHiddenMarkovModel([[0.5,0.5],[0.1,0.9]], [[1,0],[0,1]], [0,1], ['up', 'down']) 
     417            sage: print a.sample_single(10) 
     418            ['down', 'up', 'down', 'down', 'up', 'down', 'down', 'up', 'down', 'up'] 
     419             
     420        """ 
     421        seed = random() 
     422        cdef ghmm_dseq *d = ghmm_dmodel_generate_sequences(self.m, seed, length, 1, length) 
     423        cdef Py_ssize_t i 
     424        v = [d.seq[0][i] for i in range(length)] 
     425        if self._emission_symbols_special: 
     426            return v 
     427        else: 
     428            w = self._emission_symbols 
     429            return [w[i] for i in v] 
     430 
     431    def sample(self, long length, long number): 
     432        """ 
     433        Return number samples from this HMM of given length. 
     434 
     435        EXAMPLES: 
     436            sage: set_random_seed(0) 
     437            sage: a = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.1,0.9]], [[1,0],[0,1]], [0,1]) 
     438            sage: print a.sample(10, 3) 
     439            [[1, 0, 1, 1, 0, 1, 1, 0, 1, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]] 
     440        """ 
     441        seed = random() 
     442        cdef ghmm_dseq *d = ghmm_dmodel_generate_sequences(self.m, seed, length, number, length) 
     443        cdef Py_ssize_t i, j 
     444        v = [[d.seq[j][i] for i in range(length)] for j in range(number)] 
     445        if self._emission_symbols_special: 
     446            return v 
     447        else: 
     448            w = self._emission_symbols 
     449            return [[w[i] for i in z] for z in v] 
     450 
     451    def emission_symbols(self): 
     452        """ 
     453        Return a copy of the list of emission symbols of self. 
     454 
     455        Use set_emission_symbols to set the list of emission symbols. 
     456 
     457        EXAMPLES: 
     458            sage: a = hmm.DiscreteHiddenMarkovModel([[0.5,0.5],[0.1,0.9]], [[1,0],[0,1]], [0,1], ['up', -3/179]) 
     459            sage: a.emission_symbols() 
     460            ['up', -3/179] 
     461        """ 
     462        return list(self._emission_symbols) 
     463 
     464    def set_emission_symbols(self, emission_symbols): 
     465        """ 
     466        Set the list of emission symbols. 
     467 
     468        EXAMPLES: 
     469            sage: set_random_seed(0) 
     470            sage: a = hmm.DiscreteHiddenMarkovModel([[0.5,0.5],[0.1,0.9]], [[1,0],[0,1]], [0,1], ['up', 'down']) 
     471            sage: a.set_emission_symbols([3,5]) 
     472            sage: a.emission_symbols() 
     473            [3, 5] 
     474            sage: a.sample_single(10) 
     475            [5, 3, 5, 5, 3, 5, 5, 3, 5, 3] 
     476            sage: a.set_emission_symbols([pi,5/9+e]) 
     477            sage: a.sample_single(10) 
     478            [e + 5/9, e + 5/9, e + 5/9, e + 5/9, e + 5/9, e + 5/9, pi, pi, e + 5/9, pi] 
     479        """ 
     480        if emission_symbols is None: 
     481            self._emission_symbols = range(self.B.ncols()) 
     482        else: 
     483            self._emission_symbols = list(emission_symbols) 
     484        self._emission_symbols_special = (self._emission_symbols == range(self.B.ncols())) 
     485 
     486    cpdef double log_likelihood(self, seq): 
     487        r""" 
     488        Return $\log ( P[seq | model] )$, the log of the probability of seeing 
     489        the given sequence given this model, using the forward algorithm and 
     490        assuming independance of the sequence seq. 
     491 
     492        WARNING: By convention we return 1.0 for 0 probability events.  
     493         
     494        EXAMPLES: 
     495            sage: a = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.1,0.9]], [[1,0],[0,1]], [0,1]) 
     496            sage: a.log_likelihood([1,1]) 
     497            -0.10536051565782635 
     498            sage: a.log_likelihood([1,0]) 
     499            -2.3025850929940459 
     500 
     501        Notice that any sequence starting with 0 can't occur, since 
     502        the above model always starts in a state that produces 1 with 
     503        probability 1.  By convention log(probability 0) is 1. 
     504            sage: a.log_likelihood([0,0]) 
     505            1.0 
     506 
     507        Here's a special case where each sequence is equally probable. 
     508            sage: a = hmm.DiscreteHiddenMarkovModel([[0.5,0.5],[0.5,0.5]], [[1,0],[0,1]], [0.5,0.5]) 
     509            sage: a.log_likelihood([0,0]) 
     510            -1.3862943611198906 
     511            sage: log(0.25) 
     512            -1.38629436111989 
     513            sage: a.log_likelihood([0,1]) 
     514            -1.3862943611198906 
     515            sage: a.log_likelihood([1,0]) 
     516            -1.3862943611198906 
     517            sage: a.log_likelihood([1,1]) 
     518            -1.3862943611198906 
     519        """ 
     520        cdef double log_p 
     521        cdef int* O = to_int_array(seq) 
     522        cdef int ret = ghmm_dmodel_logp(self.m, O, len(seq), &log_p) 
     523        sage_free(O) 
     524        if ret == -1: 
     525            # forward returned -1: sequence can't be built 
     526            return -float('Inf') 
     527        return log_p 
     528 
    329529     
    330  
    331  
    332530 
    333531################################################################################## 
    334532# Helper Functions