
r
\c           @   s   d  Z  d d l Z d d l Z d d l Z d d l m Z d d l m Z m Z m	 Z	 d d l
 m Z d d l m Z d d l m Z d d	 l m Z d
 e f d     YZ d S(   s  
Module containing the UniversalDetector detector class, which is the primary
class a user of ``chardet`` should use.

:author: Mark Pilgrim (initial port to Python)
:author: Shy Shalom (original C code)
:author: Dan Blanchard (major refactoring for 3.0)
:author: Ian Cordasco
iNi   (   t   CharSetGroupProber(   t
   InputStatet   LanguageFiltert   ProbingState(   t   EscCharSetProber(   t   Latin1Prober(   t   MBCSGroupProber(   t   SBCSGroupProbert   UniversalDetectorc           B   s   e  Z d  Z d Z e j d  Z e j d  Z e j d  Z i d d 6d d 6d	 d
 6d d 6d d 6d d 6d d 6d d 6Z	 e
 j d  Z d   Z d   Z d   Z RS(   sq  
    The ``UniversalDetector`` class underlies the ``chardet.detect`` function
    and coordinates all of the different charset probers.

    To get a ``dict`` containing an encoding and its confidence, you can simply
    run:

    .. code::

            u = UniversalDetector()
            u.feed(some_bytes)
            u.close()
            detected = u.result

    g?s   [-]s   (|~{)s   [-]s   Windows-1252s
   iso-8859-1s   Windows-1250s
   iso-8859-2s   Windows-1251s
   iso-8859-5s   Windows-1256s
   iso-8859-6s   Windows-1253s
   iso-8859-7s   Windows-1255s
   iso-8859-8s   Windows-1254s
   iso-8859-9s   Windows-1257s   iso-8859-13c         C   sq   d  |  _ g  |  _ d  |  _ d  |  _ d  |  _ d  |  _ d  |  _ | |  _ t	 j
 t  |  _ d  |  _ |  j   d  S(   N(   t   Nonet   _esc_charset_probert   _charset_proberst   resultt   donet	   _got_datat   _input_statet
   _last_chart   lang_filtert   loggingt	   getLoggert   __name__t   loggert   _has_win_bytest   reset(   t   selfR   (    (    sN   /data/av2000/b2b/venv/lib/python2.7/site-packages/chardet/universaldetector.pyt   __init__Q   s    									c         C   s   i d d 6d d 6d d 6|  _ t |  _ t |  _ t |  _ t j |  _ d |  _	 |  j
 rg |  j
 j   n  x |  j D] } | j   qq Wd S(   s   
        Reset the UniversalDetector and all of its probers back to their
        initial states.  This is called by ``__init__``, so you only need to
        call this directly in between analyses of different documents.
        t   encodingg        t
   confidencet   languaget    N(   R	   R   t   FalseR   R   R   R   t
   PURE_ASCIIR   R   R
   R   R   (   R   t   prober(    (    sN   /data/av2000/b2b/venv/lib/python2.7/site-packages/chardet/universaldetector.pyR   ^   s    					c         C   sy  |  j  r d St |  s d St | t  s; t |  } n  |  j s{| j t j  rw i d d 6d d 6d d 6|  _ n | j t j	 t j
 f  r i d d 6d d 6d d 6|  _ n | j d	  r i d
 d 6d d 6d d 6|  _ nl | j d  ri d d 6d d 6d d 6|  _ n< | j t j t j f  rOi d d 6d d 6d d 6|  _ n  t |  _ |  j d d k	 r{t |  _  d Sn  |  j t j k r|  j j |  rt j |  _ q|  j t j k r|  j j |  j |  rt j |  _ qn  | d |  _ |  j t j k r|  j s(t |  j  |  _ n  |  j j |  t j k rui |  j j d 6|  j j   d 6|  j j  d 6|  _ t |  _  qun |  j t j k ru|  j! st" |  j  g |  _! |  j t# j$ @r|  j! j% t&    n  |  j! j% t'    n  x` |  j! D]U } | j |  t j k ri | j d 6| j   d 6| j  d 6|  _ t |  _  PqqW|  j( j |  rut |  _) qun  d S(   s  
        Takes a chunk of a document and feeds it through all of the relevant
        charset probers.

        After calling ``feed``, you can check the value of the ``done``
        attribute to see if you need to continue feeding the
        ``UniversalDetector`` more data, or if it has made a prediction
        (in the ``result`` attribute).

        .. note::
           You should always call ``close`` when you're done feeding in your
           document if ``done`` is not already ``True``.
        Ns	   UTF-8-SIGR   g      ?R   R   R   s   UTF-32s     s   X-ISO-10646-UCS-4-3412t     s   X-ISO-10646-UCS-4-2143s   UTF-16i(*   R   t   lent
   isinstancet	   bytearrayR   t
   startswitht   codecst   BOM_UTF8R   t   BOM_UTF32_LEt   BOM_UTF32_BEt   BOM_LEt   BOM_BEt   TrueR	   R   R   R   t   HIGH_BYTE_DETECTORt   searcht	   HIGH_BYTEt   ESC_DETECTORR   t	   ESC_ASCIIR
   R   R   t   feedR   t   FOUND_ITt   charset_namet   get_confidenceR   R   R   R   t   NON_CJKt   appendR   R   t   WIN_BYTE_DETECTORR   (   R   t   byte_strR    (    (    sN   /data/av2000/b2b/venv/lib/python2.7/site-packages/chardet/universaldetector.pyR2   o   s~    		




					c   	      C   s>  |  j  r |  j St |  _  |  j s5 |  j j d  n1|  j t j k rh i d d 6d d 6d d 6|  _ n |  j t j	 k rfd } d } d } xD |  j D]9 } | s q n  | j   } | | k r | } | } q q W| rf| |  j k rf| j } | j j   } | j   } | j d	  r?|  j r?|  j j | |  } q?n  i | d 6| d 6| j d 6|  _ qfn  |  j j   t j k r7|  j d d k r7|  j j d
  x |  j D] } | sqn  t | t  rx^ | j D]+ } |  j j d | j | j | j    qWq|  j j d | j | j | j    qWq7n  |  j S(   s   
        Stop analyzing the current document and come up with a final
        prediction.

        :returns:  The ``result`` attribute, a ``dict`` with the keys
                   `encoding`, `confidence`, and `language`.
        s   no data received!t   asciiR   g      ?R   R   R   g        s   iso-8859s    no probers hit minimum thresholds   %s %s confidence = %sN(   R   R   R,   R   R   t   debugR   R   R   R/   R	   R   R5   t   MINIMUM_THRESHOLDR4   t   lowerR%   R   t   ISO_WIN_MAPt   getR   t   getEffectiveLevelR   t   DEBUGR#   R    t   probers(	   R   t   prober_confidencet   max_prober_confidencet
   max_proberR    R4   t   lower_charset_nameR   t   group_prober(    (    sN   /data/av2000/b2b/venv/lib/python2.7/site-packages/chardet/universaldetector.pyt   close   s`    				
		
(   R   t
   __module__t   __doc__R<   t   ret   compileR-   R0   R8   R>   R   t   ALLR   R   R2   RH   (    (    (    sN   /data/av2000/b2b/venv/lib/python2.7/site-packages/chardet/universaldetector.pyR   3   s"   

		m(   RJ   R&   R   RK   t   charsetgroupproberR    t   enumsR   R   R   t	   escproberR   t   latin1proberR   t   mbcsgroupproberR   t   sbcsgroupproberR   t   objectR   (    (    (    sN   /data/av2000/b2b/venv/lib/python2.7/site-packages/chardet/universaldetector.pyt   <module>$   s   