Ticket #3726: sage-3726-part1.patch

File sage-3726-part1.patch, 15.7 kB (added by was, 5 months ago)
  • a/sage/matrix/matrix_real_double_dense.pxd

    old new  
    1414    cdef int _signum 
    1515    cdef int _LU_valid 
    1616    cdef _c_compute_LU(self) 
    17 #    cdef ModuleElement _add_c_impl(self, ModuleElement right )  
     17    cdef set_unsafe_double(self, Py_ssize_t i, Py_ssize_t j, double value) 
  • a/sage/matrix/matrix_real_double_dense.pyx

    old new  
    224224 
    225225    cdef get_unsafe(self, Py_ssize_t i, Py_ssize_t j): 
    226226        return RealDoubleElement(gsl_matrix_get(self._matrix,i,j)) 
     227 
     228    cdef set_unsafe_double(self, Py_ssize_t i, Py_ssize_t j, double value): 
     229        gsl_matrix_set(self._matrix,i,j,value) 
    227230     
    228231 
    229232    ######################################################################## 
  • a/sage/stats/all.py

    old new  
    11from r import (ttest) 
     2 
     3import hmm.all as hmm 
  • /dev/null

    old new  
     1from hmm import DiscreteHiddenMarkovModel 
  • /dev/null

    old new  
     1r""" 
     2Hidden Markov Models 
     3 
     4AUTHOR: William Stein 
     5 
     6EXAMPLES: 
     7 
     8""" 
     9 
     10import math 
     11 
     12from sage.matrix.all import is_Matrix, matrix 
     13from sage.rings.all  import RDF 
     14 
     15from sage.matrix.matrix_real_double_dense cimport Matrix_real_double_dense 
     16 
     17include "../../ext/stdsage.pxi" 
     18 
     19cdef extern from "ghmm/ghmm.h": 
     20    cdef int GHMM_kSilentStates 
     21    cdef int GHMM_kHigherOrderEmissions 
     22    cdef int GHMM_kDiscreteHMM 
     23 
     24cdef extern from "ghmm/model.h": 
     25    ctypedef struct ghmm_dstate: 
     26        # Initial probability 
     27        double pi 
     28        # Output probability 
     29        double *b 
     30 
     31        # ID's of the following states 
     32        int *out_id 
     33        # ID's of the previous states 
     34        int *in_id 
     35 
     36        # Transition probabilities to successor states. 
     37        double *out_a 
     38        # Transition probabilities from predecessor states. 
     39        double *in_a 
     40 
     41        # Number of successor states 
     42        int out_states 
     43        # Number of precursor states 
     44        int in_states 
     45        # if fix == 1 then output probabilities b stay fixed during the training 
     46        int fix 
     47        # Contains a description of the state (null terminated utf-8) 
     48        char * desc 
     49        # x coordinate position for graph representation plotting 
     50        int xPosition 
     51        # y coordinate position for graph representation plotting 
     52        int yPosition 
     53 
     54    ctypedef struct ghmm_dbackground: 
     55        pass 
     56 
     57    ctypedef struct ghmm_alphabet: 
     58        pass 
     59     
     60    ctypedef struct ghmm_dmodel: 
     61        # Number of states 
     62        int N    
     63        # Number of outputs 
     64        int M    
     65        # Vector of states 
     66        ghmm_dstate *s   
     67        # Contains bit flags for varios model extensions such as 
     68        # kSilentStates, kTiedEmissions (see ghmm.h for a complete list) 
     69        int model_type 
     70        # The a priori probability for the model. 
     71        # A value of -1 indicates that no prior is defined.  
     72        # Note: this is not to be confused with priors on emission distributions. 
     73        double prior 
     74        # An arbitrary name for the model (null terminated utf-8) 
     75        char * name 
     76 
     77        # Flag variables for each state indicating whether it is emitting or not.  
     78        # Note: silent != NULL iff (model_type & kSilentStates) == 1  
     79        int *silent 
     80         
     81        # Int variable for the maximum level of higher order emissions. 
     82        int maxorder 
     83         
     84        # saves the history of emissions as int,  
     85        # the nth-last emission is (emission_history * |alphabet|^n+1) % |alphabet| 
     86        int emission_history 
     87 
     88        # Flag variables for each state indicating whether the states emissions 
     89        # are tied to another state. Groups of tied states are represented 
     90        # by their tie group leader (the lowest numbered member of the group). 
     91        # tied_to[s] == kUntied  : s is not a tied state 
     92        # tied_to[s] == s        : s is a tie group leader 
     93        # tied_to[t] == s        : t is tied to state s (t>s) 
     94        # Note: tied_to != NULL iff (model_type & kTiedEmissions) != 0  */ 
     95        int *tied_to 
     96 
     97        # Note: State store order information of the emissions. 
     98        # Classical HMMS have emission order 0; that is, the emission probability 
     99        # is conditioned only on the state emitting the symbol. 
     100        # For higher order emissions, the emission are conditioned on the state s 
     101        # as well as the previous emission_order observed symbols. 
     102        # The emissions are stored in the state's usual double* b.  
     103        # Note: order != NULL iff (model_type & kHigherOrderEmissions) != 0  */ 
     104        int * order 
     105         
     106        # ghmm_dbackground is a pointer to a 
     107        # ghmm_dbackground structure, which holds (essentially) an 
     108        # array of background distributions (which are just vectors of floating 
     109        # point numbers like state.b). 
     110        # For each state the array background_id indicates which of the background 
     111        # distributions to use in parameter estimation. A value of kNoBackgroundDistribution 
     112        # indicates that none should be used. 
     113        # Note: background_id != NULL iff (model_type & kHasBackgroundDistributions) != 0  
     114        int *background_id 
     115        ghmm_dbackground *bp 
     116 
     117        # Topological ordering of silent states  
     118        #  Condition: topo_order != NULL iff (model_type & kSilentStates) != 0  
     119        int *topo_order 
     120        int topo_order_length 
     121 
     122        # pow_lookup is a array of precomputed powers 
     123        # It contains in the i-th entry M (alphabet size) to the power of i 
     124        # The last entry is maxorder+1. 
     125        int *pow_lookup 
     126 
     127        # Store for each state a class label. Limits the possibly state sequence 
     128        # Note: label != NULL iff (model_type & kLabeledStates) != 0  
     129        int* label 
     130        ghmm_alphabet* label_alphabet 
     131        ghmm_alphabet* alphabet 
     132 
     133 
     134    int ghmm_dmodel_normalize(ghmm_dmodel *m) 
     135    int ghmm_dmodel_free(ghmm_dmodel **m) 
     136 
     137 
     138cdef class HiddenMarkovModel: 
     139    pass 
     140 
     141 
     142cdef class DiscreteHiddenMarkovModel: 
     143    cdef ghmm_dmodel* m 
     144    cdef bint initialized 
     145    cdef object emission_symbols 
     146    cdef Matrix_real_double_dense A, B 
     147     
     148    def __init__(self, A, B, pi, emission_symbols=None, name=None): 
     149        """ 
     150        INPUTS: 
     151            A  -- square matrix of doubles; the state change probabilities 
     152            B  -- matrix of doubles; emission probabilities 
     153            pi -- list of doubles; probabilities for each initial state 
     154            emission_state -- list of B.ncols() symbols (just used for printing) 
     155            name -- (optional) name of the model 
     156 
     157        EXAMPLES: 
     158        We create a discrete HMM with 2 internal states on an alphabet of 
     159        size 2. 
     160         
     161            sage: a = hmm.DiscreteHiddenMarkovModel([[0.2,0.8],[0.5,0.5]], [[1,0],[0,1]], [0,1]) 
     162             
     163        """ 
     164        if not is_Matrix(A): 
     165            A = matrix(RDF, len(A), len(A[0]), A) 
     166        if not is_Matrix(B): 
     167            B = matrix(RDF, len(B), len(B[0]), B) 
     168        if not A.is_square(): 
     169            raise ValueError, "A must be square" 
     170        if A.nrows() != B.nrows(): 
     171            raise ValueError, "number of rows of A and B must be the same" 
     172        if A.base_ring() != RDF: 
     173            A = A.change_ring(RDF) 
     174        if B.base_ring() != RDF: 
     175            B = B.change_ring(RDF) 
     176        if len(pi) != A.nrows(): 
     177            raise ValueError, "length of pi must equal number of rows of A" 
     178 
     179        cdef Py_ssize_t i, j, k 
     180 
     181        if emission_symbols is None: 
     182            self.emission_symbols = range(B.ncols()) 
     183        else: 
     184            self.emission_symbols = list(emission_symbols) 
     185 
     186        self.m = <ghmm_dmodel*> sage_malloc(sizeof(ghmm_dmodel)) 
     187        if not self.m: raise MemoryError 
     188         
     189        # Set all pointers to NULL 
     190        self.m.s = NULL; self.m.name = NULL; self.m.silent = NULL 
     191        self.m.tied_to = NULL; self.m.order = NULL; self.m.background_id = NULL 
     192        self.m.bp = NULL; self.m.topo_order = NULL; self.m.pow_lookup = NULL; 
     193        self.m.label = NULL; self.m.label_alphabet = NULL; self.m.alphabet = NULL 
     194 
     195        # Set number of states and number of outputs 
     196        self.m.N = A.nrows() 
     197        self.m.M = len(self.emission_symbols) 
     198        # Set the model type to discrete 
     199        self.m.model_type = GHMM_kDiscreteHMM 
     200 
     201        self.A = A 
     202        self.B = B 
     203 
     204        # Set that no a prior model probabilities are set. 
     205        self.m.prior = -1 
     206        # Assign model identifier if specified 
     207        if name is not None:  
     208            name = str(name) 
     209            self.m.name = name 
     210        else: 
     211            self.m.name = NULL 
     212 
     213        # Allocate and initialize states 
     214        cdef ghmm_dstate* states = <ghmm_dstate*> safe_malloc(sizeof(ghmm_dstate) * self.m.N) 
     215        cdef ghmm_dstate* state 
     216 
     217        silent_states = [] 
     218        tmp_order     = [] 
     219         
     220        for i in range(self.m.N): 
     221            v = B[i] 
     222 
     223            # Get a reference to the i-th state for convenience of the notation below. 
     224            state = &(states[i]) 
     225            state.desc = NULL 
     226 
     227            # Compute state order 
     228            if self.m.M > 1: 
     229                order = math.log( len(v), self.m.M ) - 1 
     230            else: 
     231                order = len(v) - 1 
     232 
     233            # Check for valid number of emission parameters 
     234            order = int(order) 
     235            if self.m.M**(order+1) == len(v): 
     236                tmp_order.append(order) 
     237            else: 
     238                raise ValueError, "number of columns (= %s) of B must be a power of the number of emission symbols (= %s)"%( 
     239                    B.ncols(), len(emission_symbols)) 
     240 
     241            state.b = to_double_array(v) 
     242            state.pi = pi[i] 
     243 
     244            silent_states.append( 1 if sum(v) == 0 else 0) 
     245 
     246            # Set out probabilities, i.e., the probabilities that each 
     247            # symbol will be emitted from this state.  
     248            v = A[i] 
     249            nz = v.nonzero_positions() 
     250            k = len(nz) 
     251            state.out_states = k 
     252            state.out_id = <int*> safe_malloc(sizeof(int)*k) 
     253            state.out_a  = <double*> safe_malloc(sizeof(double)*k) 
     254            for j in range(k): 
     255                state.out_id[j] = nz[j] 
     256                state.out_a[j]  = v[nz[j]] 
     257 
     258            # Set "in" probabilities 
     259            v = A.column(i) 
     260            nz = v.nonzero_positions() 
     261            k = len(nz) 
     262            state.in_states = k 
     263            state.in_id = <int*> safe_malloc(sizeof(int)*k) 
     264            state.in_a  = <double*> safe_malloc(sizeof(double)*k) 
     265            for j in range(k): 
     266                state.in_id[j] = nz[j] 
     267                state.in_a[j]  = v[nz[j]] 
     268                 
     269            state.fix = 0 
     270                 
     271        self.m.s = states 
     272        self.initialized = True; return 
     273 
     274        if sum(silent_states) > 0: 
     275            self.m.model_type |= GHMM_kSilentStates 
     276            self.m.silent = to_int_array(silent_states) 
     277 
     278        self.m.maxorder = max(tmp_order) 
     279        if self.m.maxorder > 0: 
     280            self.m.model_type |= GHMM_kHigherOrderEmissions 
     281            self.m.order = to_int_array(tmp_order) 
     282 
     283        # Initialize lookup table for powers of the alphabet size, 
     284        # which speeds up models with higher order states. 
     285        powLookUp = [1] * (self.m.maxorder+2) 
     286        for i in range(1,len(powLookUp)): 
     287            powLookUp[i] = powLookUp[i-1] * self.m.M 
     288        self.m.pow_lookup = to_int_array(powLookUp) 
     289 
     290        self.initialized = True 
     291         
     292 
     293    def __repr__(self): 
     294        s = "Discrete Hidden Markov Model%s (%s states, %s outputs)\n"%( 
     295            ' ' + self.m.name if self.m.name else '', 
     296            self.m.N, self.m.M) 
     297        s += 'Initial probabilities: %s\n'%self.initial_probabilities() 
     298        s += 'Transition matrix:\n%s\n'%self.transition_matrix() 
     299        s += 'Emission matrix:\n%s\n'%self.emission_matrix() 
     300        return s 
     301 
     302    def initial_probabilities(self): 
     303        cdef Py_ssize_t i 
     304        return [self.m.s[i].pi for i in range(self.m.N)] 
     305 
     306    def transition_matrix(self, list_only=True): 
     307        cdef Py_ssize_t i, j 
     308        for i from 0 <= i < self.m.N: 
     309            for j from 0 <= j < self.m.s[i].out_states: 
     310                self.A.set_unsafe_double(i,j,self.m.s[i].out_a[j]) 
     311        return self.A 
     312 
     313    def emission_matrix(self, list_only=True): 
     314        cdef Py_ssize_t i, j 
     315        for i from 0 <= i < self.m.N: 
     316            for j from 0 <= j < self.B._ncols: 
     317                self.B.set_unsafe_double(i,j,self.m.s[i].b[j]) 
     318        return self.B 
     319 
     320    def normalize(self): 
     321        """ 
     322        Normalize the transition and emission probabilities, if applicable. 
     323        """ 
     324        ghmm_dmodel_normalize(self.m) 
     325 
     326    def __dealloc__(self): 
     327        if self.initialized: 
     328            ghmm_dmodel_free(&self.m) 
     329     
     330 
     331 
     332 
     333################################################################################## 
     334# Helper Functions 
     335################################################################################## 
     336 
     337cdef double* to_double_array(v) except NULL: 
     338    cdef double x 
     339    cdef double* w = <double*> safe_malloc(sizeof(double)*len(v)) 
     340    cdef Py_ssize_t i = 0 
     341    for x in v: 
     342        w[i] = x 
     343        i += 1 
     344    return w 
     345 
     346cdef int* to_int_array(v) except NULL: 
     347    cdef int x 
     348    cdef int* w = <int*> safe_malloc(sizeof(int)*len(v)) 
     349    cdef Py_ssize_t i = 0 
     350    for x in v: 
     351        w[i] = x 
     352        i += 1 
     353    return w 
     354 
     355cdef void* safe_malloc(int bytes) except NULL: 
     356    """ 
     357    malloc the given bytes of memory and check that the malloc 
     358    succeeds -- if not raise a MemoryError. 
     359    """ 
     360    cdef void* t = sage_malloc(bytes) 
     361    if not t: 
     362        raise MemoryError, "error allocating memory for Hidden Markov Model" 
     363    return t 
     364         
  • a/setup.py

    old new  
    481481 
    482482finance_fractal = Extension('sage.finance.fractal', ['sage/finance/fractal.pyx']) 
    483483 
     484hmm = Extension('sage.stats.hmm.hmm', ['sage/stats/hmm/hmm.pyx'], 
     485                libraries = ['ghmm']) 
     486 
    484487##################################################### 
    485488 
    486489ext_modules = [ \ 
     
    597600     
    598601    markov_multifractal, 
    599602    finance_fractal, 
     603 
     604    hmm, 
    600605 
    601606    Extension('sage.media.channels', 
    602607              sources = ['sage/media/channels.pyx']), \ 
     
    13551360 
    13561361                     'sage.finance', 
    13571362 
     1363                      
    13581364                     'sage.functions', 
    13591365 
    13601366                     'sage.geometry', 
     
    14271433                      
    14281434                     'sage.stats',  
    14291435 
     1436                     'sage.stats.hmm', 
     1437 
    14301438                     'sage.parallel',  
    14311439                      
    14321440                     'sage.schemes',