ó
r
\c           @   sB   d  d l  Z  d  d l Z d d l m Z d e f d     YZ d S(   i˙˙˙˙Ni   (   t   ProbingStatet   CharSetProberc           B   s   e  Z d  Z d
 d  Z d   Z e d    Z d   Z e d    Z	 d   Z
 e d    Z e d    Z e d	    Z RS(   gffffffî?c         C   s(   d  |  _ | |  _ t j t  |  _ d  S(   N(   t   Nonet   _statet   lang_filtert   loggingt	   getLoggert   __name__t   logger(   t   selfR   (    (    sJ   /data/av2000/b2b/venv/lib/python2.7/site-packages/chardet/charsetprober.pyt   __init__'   s    		c         C   s   t  j |  _ d  S(   N(   R    t	   DETECTINGR   (   R	   (    (    sJ   /data/av2000/b2b/venv/lib/python2.7/site-packages/chardet/charsetprober.pyt   reset,   s    c         C   s   d  S(   N(   R   (   R	   (    (    sJ   /data/av2000/b2b/venv/lib/python2.7/site-packages/chardet/charsetprober.pyt   charset_name/   s    c         C   s   d  S(   N(    (   R	   t   buf(    (    sJ   /data/av2000/b2b/venv/lib/python2.7/site-packages/chardet/charsetprober.pyt   feed3   s    c         C   s   |  j  S(   N(   R   (   R	   (    (    sJ   /data/av2000/b2b/venv/lib/python2.7/site-packages/chardet/charsetprober.pyt   state6   s    c         C   s   d S(   Ng        (    (   R	   (    (    sJ   /data/av2000/b2b/venv/lib/python2.7/site-packages/chardet/charsetprober.pyt   get_confidence:   s    c         C   s   t  j d d |   }  |  S(   Ns   ([ -])+t    (   t   ret   sub(   R   (    (    sJ   /data/av2000/b2b/venv/lib/python2.7/site-packages/chardet/charsetprober.pyt   filter_high_byte_only=   s    c         C   sz   t    } t j d |   } xX | D]P } | j | d   | d } | j   re | d k  re d } n  | j |  q" W| S(   s5  
        We define three types of bytes:
        alphabet: english alphabets [a-zA-Z]
        international: international characters [-˙]
        marker: everything else [^a-zA-Z-˙]

        The input buffer can be thought to contain a series of words delimited
        by markers. This function works to filter all words that contain at
        least one international character. All contiguous sequences of markers
        are replaced by a single space ascii character.

        This filter applies to all scripts which do not use English characters.
        s%   [a-zA-Z]*[-˙]+[a-zA-Z]*[^a-zA-Z-˙]?i˙˙˙˙s   R   (   t	   bytearrayR   t   findallt   extendt   isalpha(   R   t   filteredt   wordst   wordt	   last_char(    (    sJ   /data/av2000/b2b/venv/lib/python2.7/site-packages/chardet/charsetprober.pyt   filter_international_wordsB   s    			
	c         C   sč   t    } t } d } x˛ t t |    D] } |  | | d !} | d k rT t } n | d k ri t } n  | d k  r( | j   r( | | k rš | rš | j |  | | ! | j d  n  | d } q( q( W| sä | j |  |  n  | S(   sČ  
        Returns a copy of ``buf`` that retains only the sequences of English
        alphabet and high byte characters that are not between <> characters.
        Also retains English alphabet and high byte characters immediately
        before occurrences of >.

        This filter can be applied to all scripts which contain both English
        characters and extended ASCII characters, but is currently only used by
        ``Latin1Prober``.
        i    i   t   >t   <s   R   (   R   t   Falset   ranget   lent   TrueR   R   (   R   R   t   in_tagt   prevt   currt   buf_char(    (    sJ   /data/av2000/b2b/venv/lib/python2.7/site-packages/chardet/charsetprober.pyt   filter_with_english_lettersg   s"    			N(   R   t
   __module__t   SHORTCUT_THRESHOLDR   R
   R   t   propertyR   R   R   R   t   staticmethodR   R   R)   (    (    (    sJ   /data/av2000/b2b/venv/lib/python2.7/site-packages/chardet/charsetprober.pyR   #   s   			%(   R   R   t   enumsR    t   objectR   (    (    (    sJ   /data/av2000/b2b/venv/lib/python2.7/site-packages/chardet/charsetprober.pyt   <module>   s   