Ticket #3773: sage-3773-3726-referee_response.patch

File sage-3773-3726-referee_response.patch, 20.6 kB (added by was, 5 months ago)

this addresses all josh's comments on #3773 and also #3726

  • a/sage/matrix/matrix_real_double_dense.pxd

    old new  
    1515    cdef int _LU_valid 
    1616    cdef _c_compute_LU(self) 
    1717    cdef set_unsafe_double(self, Py_ssize_t i, Py_ssize_t j, double value) 
     18    cdef double get_unsafe_double(self, Py_ssize_t i, Py_ssize_t j) 
  • a/sage/matrix/matrix_real_double_dense.pyx

    old new  
    227227 
    228228    cdef set_unsafe_double(self, Py_ssize_t i, Py_ssize_t j, double value): 
    229229        gsl_matrix_set(self._matrix,i,j,value) 
    230      
     230 
     231    cdef double get_unsafe_double(self, Py_ssize_t i, Py_ssize_t j): 
     232        return gsl_matrix_get(self._matrix,i,j) 
    231233 
    232234    ######################################################################## 
    233235    # LEVEL 2 functionality 
  • a/sage/stats/hmm/chmm.pyx

    old new  
    106106        pi -- list of floats that sums to 1.0; these are 
    107107              the initial probabilities of each hidden state 
    108108        name -- (default: None); a string 
     109        normalize -- (optional; default=True) whether or not to 
     110                     normalize the model so the probabilities add to 1 
    109111 
    110112    EXAMPLES: 
    111113    Define the transition matrix: 
     
    128130        [(0.0, 1.0), (-1.0, 0.5), (1.0, 0.20000000000000001)] 
    129131        Initial probabilities: [1.0, 0.0, 0.0] 
    130132    """ 
    131     def __init__(self, A, B, pi=None, name=None): 
     133    def __init__(self, A, B, pi=None, name=None, normalize=True): 
    132134        """ 
    133135        EXAMPLES: 
    134136        We make a very simple model: 
     
    148150            Emission parameters: 
    149151            [] 
    150152            Initial probabilities: [] 
     153 
     154        TESTS: 
     155        We test normalize=False: 
     156            sage: hmm.GaussianHiddenMarkovModel([[1,100],[0,1]], [(0,1),(0,2)], [1/2,1/2], normalize=False).transition_matrix() 
     157            [  1.0 100.0] 
     158            [  0.0   1.0]             
    151159        """ 
    152160        ContinuousHiddenMarkovModel.__init__(self, A, B, pi, name=name) 
    153161 
     
    164172        self._initialize_state(pi) 
    165173        self.m.class_change = NULL 
    166174        self.initialized = True 
     175        if normalize: 
     176            self.normalize() 
    167177 
    168178    def _initialize_state(self, pi): 
    169179        """ 
     
    293303        names compare to be equal. 
    294304 
    295305        EXAMPLES: 
    296             sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,2], "Test 1"
     306            sage: m = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,2], "Test 1", normalize=False
    297307            sage: m.__cmp__(m) 
    298308            0 
    299309 
    300310        Note that the name doesn't matter: 
    301             sage: n = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,2], "Test 2"
     311            sage: n = hmm.GaussianHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [(0.0,1.0),(1,1)], [1,2], "Test 2", normalize=False
    302312            sage: m.__cmp__(n) 
    303313            0 
    304314 
     
    485495        if ghmm_cmodel_normalize(self.m): 
    486496            raise RuntimeError, "error normalizing model (note that model may still have been partly changed)" 
    487497 
    488     def sample(self, long length, long number): 
     498    def sample(self, long length, number=None): 
    489499        """ 
    490500        Return number samples from this HMM of given length. 
    491501 
     
    493503            length -- positive integer 
    494504             
    495505        OUTPUT: 
    496             a list of number TimeSeries each of the given length  
     506            if number is not given, return a single TimeSeries. 
     507            if number is given, return a list of TimeSeries. 
    497508 
    498509        EXAMPLES: 
    499510            sage: m = hmm.GaussianHiddenMarkovModel([[1]], [(0,1)], [1]) 
     
    501512            sage: m.sample(5,2) 
    502513            [[-2.2808, -0.0699, 0.1858, 1.3624, 1.8252], 
    503514             [0.0080, 0.1244, 0.5098, 0.9961, 0.4710]] 
     515 
     516            sage: m = hmm.GaussianHiddenMarkovModel([[1]], [(0,1)], [1]) 
     517            sage: set_random_seed(0) 
     518            sage: m.sample(5) 
     519            [-2.2808, -0.0699, 0.1858, 1.3624, 1.8252] 
    504520        """ 
     521        if number is None: 
     522            single = True 
     523            number = 1 
     524        else: 
     525            single = False 
    505526        seed = random() 
    506527        cdef ghmm_cseq *d = ghmm_cmodel_generate_sequences(self.m, seed, length, number, length) 
    507528        cdef Py_ssize_t i, j 
     
    512533            for i from 0 <= i < length: 
    513534                T._values[i] = d.seq[j][i] 
    514535            ans.append(T) 
     536        if single: 
     537            return ans[0] 
    515538        return ans 
    516539        # The line below would replace the above by something that returns a list of lists. 
    517540        #return [[d.seq[j][i] for i in range(length)] for j in range(number)] 
    518    
    519     def sample_single(self, long length): 
    520         """ 
    521         Return a single sample computed using this Gaussian Hidden Markov Model. 
    522  
    523         INPUT: 
    524             length -- positive integer 
    525         OUTPUT: 
    526             a TimeSeries 
    527  
    528         EXAMPLES: 
    529             sage: m = hmm.GaussianHiddenMarkovModel([[1]], [(0,1)], [1]) 
    530             sage: set_random_seed(0) 
    531             sage: m.sample_single(5) 
    532             [-2.2808, -0.0699, 0.1858, 1.3624, 1.8252] 
    533         """ 
    534         return self.sample(length,1)[0] 
    535541 
    536542 
    537543             
     
    657663        Baum-Welch algorithm to increase the probability of observing O. 
    658664 
    659665        INPUT: 
    660             training_seqs -- a list of lists of emission symbols; all sequences of length 0 are ignored. 
     666            training_seqs -- a list of lists of emission symbols; all sequences of 
     667                      length 0 are ignored. 
    661668            max_iter -- integer or None (default: 10000) maximum number 
    662669                      of Baum-Welch steps to take 
    663670            log_likehood_cutoff -- positive float or None (default: 0.00001); 
     
    684691            [(1.0, 0.01)] 
    685692            Initial probabilities: [1.0] 
    686693 
     694        We train using a list of lists: 
     695            sage: m = hmm.GaussianHiddenMarkovModel([[1,0],[0,1]], [(0,1),(0,2)], [1/2,1/2]) 
     696            sage: m.baum_welch([[1,2,], [3,2]]) 
     697            sage: m 
     698            Gaussian Hidden Markov Model with 2 States 
     699            Transition matrix: 
     700            [1.0 0.0] 
     701            [0.0 1.0] 
     702            Emission parameters: 
     703            [(1.946539535984342, 0.70508296299241024), (2.0208156913293394, 0.70680033099099593)] 
     704            Initial probabilities: [0.28024729110782109, 0.71975270889217891] 
     705 
     706        We train the same model, but waiting one of the lists more than the other. 
     707            sage: m = hmm.GaussianHiddenMarkovModel([[1,0],[0,1]], [(0,1),(0,2)], [1/2,1/2]) 
     708            sage: m.baum_welch([([1,2,],10), ([3,2],1)]) 
     709            sage: m 
     710            Gaussian Hidden Markov Model with 2 States 
     711            Transition matrix: 
     712            [1.0 0.0] 
     713            [0.0 1.0] 
     714            Emission parameters: 
     715            [(1.5851786151779879, 0.57264580740105153), (1.5945035666064733, 0.57928632238916189)] 
     716            Initial probabilities: [0.38546857052811945, 0.61453142947188055] 
     717         
     718 
    687719        Training sequences of length 0 are gracefully ignored: 
    688720            sage: m.baum_welch([]) 
    689721            sage: m.baum_welch([([],1)]) 
     
    713745    sequences a ValueError is raised, since GHMM doesn't treat 
    714746    this degenerate case well. 
    715747    """ 
    716     if isinstance(seq, list) and len(seq) > 0 and not isinstance(seq[0], tuple): 
     748    if isinstance(seq, list) and len(seq) > 0 and not isinstance(seq[0], (list, tuple)): 
    717749        seq = TimeSeries(seq) 
    718750    if isinstance(seq, TimeSeries): 
    719751        seq = [(seq,float(1))] 
     
    787819        pi -- list of floats that sums to 1.0; these are 
    788820              the initial probabilities of each hidden state 
    789821        name -- (default: None); a string 
     822        normalize -- (optional; default=True) whether or not to normalize 
     823                     the model so the probabilities add to 1 
    790824 
    791825    EXAMPLES: 
    792826        sage: A  = [[0.5,0.5],[0.5,0.5]] 
     
    798832        [0.5 0.5] 
    799833        [0.5 0.5] 
    800834        Emission parameters: 
    801         [[(0.5, (0.0, 1.0)), (0.10000000000000001, (1.0, 10000.0))], [(1.0, (1.0, 1.0)), (0.0, (0.0, 0.10000000000000001))]] 
     835        [[(1.0, (0.0, 1.0)), (0.10000000000000001, (1.0, 10000.0))], [(1.0, (1.0, 1.0)), (0.0, (0.0, 0.10000000000000001))]] 
    802836        Initial probabilities: [1.0, 0.0] 
     837         
    803838 
    804839    TESTS: 
    805840    We test that standard deviations must be positive: 
     
    814849        ... 
    815850        ValueError: number of Gaussian mixtures must be positive 
    816851    """ 
    817     def __init__(self, A, B, pi=None, name=None): 
     852    def __init__(self, A, B, pi=None, name=None, normalize=True): 
    818853        """ 
    819854        EXAMPLES: 
    820855            sage: hmm.GaussianMixtureHiddenMarkovModel([[1]], [[(1,(0,1))]], [1], name='simple') 
     
    922957 
    923958        OUTPUT: 
    924959           list of lists of tuples (weight, (mu, sigma)) 
    925 
     960 
    926961        EXAMPLES: 
    927962            sage: m = hmm.GaussianMixtureHiddenMarkovModel([[0.5,0.5],[0.5,0.5]], [[(0.5,(0.0,1.0)), (0.1,(1,10000))],[(1,(1,1))]], [1,0]) 
    928963            sage: m.emission_parameters() 
    929             [[(0.5, (0.0, 1.0)), (0.10000000000000001, (1.0, 10000.0))], 
    930              [(1.0, (1.0, 1.0)), (0.0, (0.0, 0.0))]] 
     964            [[(1.0, (0.0, 1.0)), (0.10000000000000001, (1.0, 10000.0))], [(1.0, (1.0, 1.0)), (0.0, (0.0, 0.0))]] 
    931965        """ 
    932966        cdef Py_ssize_t i,j 
    933967         
  • a/sage/stats/hmm/hmm.pyx

    old new  
    5353            [0.0 1.0] 
    5454            Initial probabilities: [1.0] 
    5555        """ 
     56        cdef Py_ssize_t n 
     57         
    5658        # Convert A to a matrix 
    5759        if not is_Matrix(A): 
    5860            n = len(A) 
     
    9395        self.A = A 
    9496        self.B = B 
    9597 
     98        # Check that all entries of A are nonnegative and raise a 
     99        # ValueError otherwise. Note that we don't check B since it 
     100        # has entries that are potentially negative in the continuous 
     101        # case.  But GHMM prints clear warnings when the emission 
     102        # probabilities are negative, i.e., it does not silently give 
     103        # wrong results like it does for negative transition 
     104        # probabilities. 
     105        cdef Py_ssize_t i, j 
     106        for i from 0 <= i < self.A._nrows: 
     107            for j from 0 <= j < self.A._ncols: 
     108                if self.A.get_unsafe_double(i,j) < 0: 
     109                    raise ValueError, "each transition probability must be nonnegative" 
     110                 
     111 
    96112 
    97113cdef class DiscreteHiddenMarkovModel(HiddenMarkovModel): 
    98      
    99     def __init__(self, A, B, pi=None, emission_symbols=None, name=None): 
     114    """ 
     115    Create a discrete hidden Markov model. 
     116 
     117    hmm.DiscreteHiddenMarkovModel(A, B, pi=None, emission_symbols=None, name=None, normalize=True) 
     118n     
     119    INPUTS: 
     120        A  -- square matrix of doubles; the state change probabilities 
     121        B  -- matrix of doubles; emission probabilities 
     122        pi -- list of floats; probabilities for each initial state 
     123        emission_state -- list of B.ncols() symbols (just used for printing) 
     124        name -- (optional) name of the model 
     125        normalize -- (optional; default=True) whether or not to normalize 
     126                     the model so the probabilities add to 1 
     127 
     128    EXAMPLES: 
     129    We create a discrete HMM with 2 internal states on an alphabet of size 2. 
     130        sage: hmm.DiscreteHiddenMarkovModel([[0.2,0.8],[0.5,0.5]], [[1,0],[0,1]], [0,1]) 
     131        Discrete Hidden Markov Model with 2 States and 2 Emissions 
     132        Transition matrix: 
     133        [0.2 0.8] 
     134        [0.5 0.5] 
     135        Emission matrix: 
     136        [1.0 0.0] 
     137        [0.0 1.0] 
     138        Initial probabilities: [0.0, 1.0] 
     139 
     140    The transition probabilities must be nonnegative: 
     141        sage: hmm.DiscreteHiddenMarkovModel([[-0.2,0.8],[0.5,0.5]], [[1,0],[0,1]], [0,1]) 
     142        Traceback (most recent call last): 
     143        ... 
     144        ValueError: each transition probability must be nonnegative 
     145 
     146    The transition probabilities are by default automatically normalized: 
     147        sage: a = hmm.DiscreteHiddenMarkovModel([[0.2,0.3],[0.5,0.5]], [[1,0],[0,1]], [0,1]) 
     148        sage: a.transition_matrix() 
     149        [0.4 0.6] 
     150        [0.5 0.5] 
     151    """ 
     152    def __init__(self, A, B, pi=None, emission_symbols=None, name=None, normalize=True): 
    100153        """ 
    101         INPUTS: 
    102             A  -- square matrix of doubles; the state change probabilities 
    103             B  -- matrix of doubles; emission probabilities 
    104             pi -- list of floats; probabilities for each initial state 
    105             emission_state -- list of B.ncols() symbols (just used for printing) 
    106             name -- (optional) name of the model 
    107  
    108154        EXAMPLES: 
    109         We create a discrete HMM with 2 internal states on an alphabet of size 2. 
    110             sage: a = hmm.DiscreteHiddenMarkovModel([[0.2,0.8],[0.5,0.5]], [[1,0],[0,1]], [0,1]) 
     155            sage: hmm.DiscreteHiddenMarkovModel([[1]], [[0.0,1.0]], [1]) 
     156            Discrete Hidden Markov Model with 1 States and 2 Emissions 
     157            Transition matrix: 
     158            [1.0] 
     159            Emission matrix: 
     160            [0.0 1.0] 
     161            Initial probabilities: [1.0] 
    111162        """ 
    112163        # memory has not all been setup yet. 
    113164        self.initialized = False   
     
    203254                 
    204255        self.m.s = states 
    205256        self.initialized = True 
     257        if normalize: 
     258            self.normalize() 
    206259 
    207260    def __cmp__(self, other): 
    208261        """ 
     
    219272        names compare to be equal. 
    220273 
    221274        EXAMPLES: 
    222             sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[1,1]], [1,2]
     275            sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[1,1]], [1,2], normalize=False
    223276            sage: m.__cmp__(m) 
    224277            0 
    225278 
    226279        Note that the name doesn't matter: 
    227             sage: n = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[1,1]], [1,2]
     280            sage: n = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[1,1]], [1,2], normalize=False
    228281            sage: m.__cmp__(n) 
    229282            0 
    230283 
     
    460513        """ 
    461514        ghmm_dmodel_normalize(self.m) 
    462515 
    463     def sample_single(self, long length): 
    464         """ 
    465         Return a single sample computed using this Hidden Markov Model. 
    466          
    467         EXAMPLE: 
    468             sage: set_random_seed(0) 
    469             sage: a = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.1,0.9]], [[1,0],[0,1]], [0,1]) 
    470             sage: a.sample_single(20) 
    471             [1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
    472             sage: a.sample_single(1000).count(0) 
    473             113 
    474  
    475         If the emission symbols are set 
    476             sage: set_random_seed(0) 
    477             sage: a = hmm.DiscreteHiddenMarkovModel([[0.5,0.5],[0.1,0.9]], [[1,0],[0,1]], [0,1], ['up', 'down']) 
    478             sage: print a.sample_single(10) 
    479             ['down', 'up', 'down', 'down', 'up', 'down', 'down', 'up', 'down', 'up'] 
    480              
    481         """ 
    482         seed = random() 
    483         cdef ghmm_dseq *d = ghmm_dmodel_generate_sequences(self.m, seed, length, 1, length) 
    484         cdef Py_ssize_t i 
    485         v = [d.seq[0][i] for i in range(length)] 
    486         ghmm_dseq_free(&d) 
    487         if self._emission_symbols_dict: 
    488             w = self._emission_symbols 
    489             return [w[i] for i in v] 
    490         else: 
    491             return v 
    492  
    493     def sample(self, long length, long number): 
     516    def sample(self, long length, number=None): 
    494517        """ 
    495518        Return number samples from this HMM of given length. 
     519 
     520        INPUT: 
     521            length -- positive integer 
     522            number -- (default: None) if given, compute list of this many sample sequences 
     523 
     524        OUTPUT: 
     525            if number is not given, return a single TimeSeries. 
     526            if number is given, return a list of TimeSeries. 
    496527 
    497528        EXAMPLES: 
    498529            sage: set_random_seed(0) 
    499530            sage: a = hmm.DiscreteHiddenMarkovModel([[0.1,0.9],[0.1,0.9]], [[1,0],[0,1]], [0,1]) 
    500531            sage: print a.sample(10, 3) 
    501532            [[1, 0, 1, 1, 0, 1, 1, 0, 1, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]] 
     533            sage: a.sample(15) 
     534            [1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1] 
     535            sage: list(a.sample(1000)).count(0) 
     536            95 
     537 
     538        If the emission symbols are set 
     539            sage: set_random_seed(0) 
     540            sage: a = hmm.DiscreteHiddenMarkovModel([[0.5,0.5],[0.1,0.9]], [[1,0],[0,1]], [0,1], ['up', 'down']) 
     541            sage: a.sample(10) 
     542            ['down', 'up', 'down', 'down', 'up', 'down', 'down', 'up', 'down', 'up'] 
    502543        """ 
    503544        seed = random() 
     545        if number is None: 
     546            number = 1 
     547            single = True 
     548        else: 
     549            single = False 
    504550        cdef ghmm_dseq *d = ghmm_dmodel_generate_sequences(self.m, seed, length, number, length) 
    505551        cdef Py_ssize_t i, j 
    506552        v = [[d.seq[j][i] for i in range(length)] for j in range(number)] 
    507553        if self._emission_symbols_dict: 
    508554            w = self._emission_symbols 
    509             return [[w[i] for i in z] for z in v] 
    510         else: 
    511             return v 
     555            v = [[w[i] for i in z] for z in v] 
     556        if single: 
     557            return v[0] 
     558        return v 
    512559 
    513560    def emission_symbols(self): 
    514561        """ 
     
    533580            sage: a.set_emission_symbols([3,5]) 
    534581            sage: a.emission_symbols() 
    535582            [3, 5] 
    536             sage: a.sample_single(10) 
     583            sage: a.sample(10) 
    537584            [5, 3, 5, 5, 3, 5, 5, 3, 5, 3] 
    538585            sage: a.set_emission_symbols([pi,5/9+e]) 
    539             sage: a.sample_single(10) 
     586            sage: a.sample(10) 
    540587            [e + 5/9, e + 5/9, e + 5/9, e + 5/9, e + 5/9, e + 5/9, pi, pi, e + 5/9, pi] 
    541588        """ 
    542589        if emission_symbols is None: 
     
    707754            sage: 1.0/6 
    708755            0.166666666666667 
    709756 
     757        We run Baum-Welch on a single sequence: 
     758            sage: a = hmm.DiscreteHiddenMarkovModel([[0.5,0.5],[0.5,0.5]], [[0.5,0.5],[0.5,0.5]], [0.5,0.5]) 
     759            sage: a.baum_welch([1,0,1]*10) 
     760            sage: a 
     761            Discrete Hidden Markov Model with 2 States and 2 Emissions 
     762            Transition matrix: 
     763            [0.5 0.5] 
     764            [0.5 0.5] 
     765            Emission matrix: 
     766            [0.333333333333 0.666666666667] 
     767            [0.333333333333 0.666666666667] 
     768            Initial probabilities: [0.5, 0.5] 
     769 
    710770        TESTS: 
    711771        We test training with non-default string symbols: 
    712772            sage: a = hmm.DiscreteHiddenMarkovModel([[0.5,0.5],[0.5,0.5]], [[0.5,0.5],[0.5,0.5]], [0.5,0.5], ['up','down']) 
     
    783843def unpickle_discrete_hmm_v0(A, B, pi, emission_symbols,name): 
    784844    """ 
    785845    TESTS: 
    786         sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[1,1]], [1,0], name='test model') 
     846        sage: m = hmm.DiscreteHiddenMarkovModel([[0.4,0.6],[0.1,0.9]], [[0.0,1.0],[0.5,0.5]], [1,0], name='test model') 
    787847        sage: loads(dumps(m)) == m 
    788848        True 
    789849        sage: loads(dumps(m)).name() 
     
    795855        [0.1 0.9] 
    796856        Emission matrix: 
    797857        [0.0 1.0] 
    798         [1.0 1.0
     858        [0.5 0.5
    799859        Initial probabilities: [1.0, 0.0] 
    800860        Emission symbols: ['a', 'b'] 
    801861    """