| Home | Trees | Indices | Help |
|
|---|
|
|
1 import threading
2 import re
3 import urlparse
4 import copy
5 from lxml import etree
6 from lxml.html import defs
7 from lxml import cssselect
8 from lxml.html.setmixin import SetMixin
9 try:
10 from UserDict import DictMixin
11 except ImportError:
12 # DictMixin was introduced in Python 2.4
13 from lxml.html._dictmixin import DictMixin
14 import sets
15
16 __all__ = [
17 'document_fromstring', 'fragment_fromstring', 'fragments_fromstring', 'fromstring',
18 'tostring', 'Element', 'defs', 'open_in_browser', 'submit_form',
19 'find_rel_links', 'find_class', 'make_links_absolute',
20 'resolve_base_href', 'iterlinks', 'rewrite_links', 'open_in_browser']
21
22 _rel_links_xpath = etree.XPath("descendant-or-self::a[@rel]")
23 #_class_xpath = etree.XPath(r"descendant-or-self::*[regexp:match(@class, concat('\b', $class_name, '\b'))]", {'regexp': 'http://exslt.org/regular-expressions'})
24 _class_xpath = etree.XPath("descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), concat(' ', $class_name, ' '))]")
25 _id_xpath = etree.XPath("descendant-or-self::*[@id=$id]")
26 _collect_string_content = etree.XPath("string()")
27 _css_url_re = re.compile(r'url\((.*?)\)', re.I)
28 _css_import_re = re.compile(r'@import "(.*?)"')
29 _label_xpath = etree.XPath("//label[@for=$id]")
30
32
34 """
35 Returns the base URL, given when the page was parsed.
36
37 Use with ``urlparse.urljoin(el.base_url, href)`` to get
38 absolute URLs.
39 """
40 return self.getroottree().docinfo.URL
41 base_url = property(base_url, doc=base_url.__doc__)
42
48 forms = property(forms, doc=forms.__doc__)
49
51 """
52 Return the <body> element. Can be called from a child element
53 to get the document's head.
54 """
55 return self.xpath('//body')[0]
56 body = property(body, doc=body.__doc__)
57
59 """
60 Returns the <head> element. Can be called from a child
61 element to get the document's head.
62 """
63 return self.xpath('//head')[0]
64 head = property(head, doc=head.__doc__)
65
67 """
68 Get or set any <label> element associated with this element.
69 """
70 id = self.get('id')
71 if not id:
72 return None
73 result = _label_xpath(self, id=id)
74 if not result:
75 return None
76 else:
77 return result[0]
79 id = self.get('id')
80 if not id:
81 raise TypeError(
82 "You cannot set a label for an element (%r) that has no id"
83 % self)
84 if not label.tag == 'label':
85 raise TypeError(
86 "You can only assign label to a label element (not %r)"
87 % label)
88 label.set('for', id)
93 label = property(label__get, label__set, label__del, doc=label__get.__doc__)
94
96 """
97 Removes this element from the tree, including its children and
98 text. The tail text is joined to the previous element or
99 parent.
100 """
101 parent = self.getparent()
102 assert parent is not None
103 if self.tail:
104 previous = self.getprevious()
105 if previous is None:
106 parent.text = (parent.text or '') + self.tail
107 else:
108 previous.tail = (previous.tail or '') + self.tail
109 parent.remove(self)
110
112 """
113 Remove the tag, but not its children or text. The children and text
114 are merged into the parent.
115
116 Example::
117
118 >>> h = fragment_fromstring('<div>Hello <b>World!</b></div>')
119 >>> h.find('//b').drop_tag()
120 >>> print tostring(h)
121 <div>Hello World!</div>
122 """
123 parent = self.getparent()
124 assert parent is not None
125 previous = self.getprevious()
126 if self.text and isinstance(self.tag, basestring):
127 # not a Comment, etc.
128 if previous is None:
129 parent.text = (parent.text or '') + self.text
130 else:
131 previous.tail = (previous.tail or '') + self.text
132 if self.tail:
133 if len(self):
134 last = self[-1]
135 last.tail = (last.tail or '') + self.tail
136 elif previous is None:
137 parent.text = (parent.text or '') + self.tail
138 else:
139 previous.tail = (previous.tail or '') + self.tail
140 index = parent.index(self)
141 parent[index:index+1] = self[:]
142
144 """
145 Find any links like ``<a rel="{rel}">...</a>``; returns a list of elements.
146 """
147 rel = rel.lower()
148 return [el for el in _rel_links_xpath(self)
149 if el.get('rel').lower() == rel]
150
152 """
153 Find any elements with the given class name.
154 """
155 return _class_xpath(self, class_name=class_name)
156
158 """
159 Get the first element in a document with the given id. If none is
160 found, return the default argument if provided or raise KeyError
161 otherwise.
162
163 Note that there can be more than one element with the same id,
164 and this isn't uncommon in HTML documents found in the wild.
165 Browsers return only the first match, and this function does
166 the same.
167 """
168 try:
169 # FIXME: should this check for multiple matches?
170 # browsers just return the first one
171 return _id_xpath(self, id=id)[0]
172 except IndexError:
173 if default:
174 return default[0]
175 else:
176 raise KeyError, id
177
179 """
180 Return the text content of the tag (and the text in any children).
181 """
182 return _collect_string_content(self)
183
185 """
186 Run the CSS expression on this element and its children,
187 returning a list of the results.
188
189 Equivalent to lxml.cssselect.CSSSelect(expr)(self) -- note
190 that pre-compiling the expression can provide a substantial
191 speedup.
192 """
193 return cssselect.CSSSelect(expr)(self)
194
195 ########################################
196 ## Link functions
197 ########################################
198
200 """
201 Make all links in the document absolute, given the
202 ``base_url`` for the document (the full URL where the document
203 came from), or if no ``base_url`` is given, then the ``.base_url`` of the document.
204
205 If ``resolve_base_href`` is true, then any ``<base href>``
206 tags in the document are used *and* removed from the document.
207 If it is false then any such tag is ignored.
208 """
209 if base_url is None:
210 base_url = self.base_url
211 if base_url is None:
212 raise TypeError(
213 "No base_url given, and the document has no base_url")
214 if resolve_base_href:
215 self.resolve_base_href()
216 def link_repl(href):
217 return urlparse.urljoin(base_url, href)
218 self.rewrite_links(link_repl)
219
221 """
222 Find any ``<base href>`` tag in the document, and apply its
223 values to all links found in the document. Also remove the
224 tag once it has been applied.
225 """
226 base_href = None
227 basetags = self.xpath('//base[@href]')
228 for b in basetags:
229 base_href = b.get('href')
230 b.drop_tree()
231 if not base_href:
232 return
233 self.make_links_absolute(base_href, resolve_base_href=False)
234
236 """
237 Yield (element, attribute, link, pos), where attribute may be None
238 (indicating the link is in the text). ``pos`` is the position
239 where the link occurs; often 0, but sometimes something else in
240 the case of links in stylesheets or style tags.
241
242 Note: <base href> is *not* taken into account in any way. The
243 link you get is exactly the link in the document.
244 """
245 link_attrs = defs.link_attrs
246 for el in self.getiterator():
247 attribs = el.attrib
248 for attrib in link_attrs:
249 if attrib in attribs:
250 yield (el, attrib, attribs[attrib], 0)
251 if el.tag == 'style' and el.text:
252 for match in _css_url_re.finditer(el.text):
253 yield (el, None, match.group(1), match.start(1))
254 for match in _css_import_re.finditer(el.text):
255 yield (el, None, match.group(1), match.start(1))
256 if 'style' in attribs:
257 for match in _css_url_re.finditer(attribs['style']):
258 yield (el, 'style', match.group(1), match.start(1))
259
262 """
263 Rewrite all the links in the document. For each link
264 ``link_repl_func(link)`` will be called, and the return value
265 will replace the old link.
266
267 Note that links may not be absolute (unless you first called
268 ``make_links_absolute()``), and may be internal (e.g.,
269 ``'#anchor'``). They can also be values like
270 ``'mailto:email'`` or ``'javascript:expr'``.
271
272 If you give ``base_href`` then all links passed to
273 ``link_repl_func()`` will take that into account.
274
275 If the ``link_repl_func`` returns None, the attribute or
276 tag text will be removed completely.
277 """
278 if base_href is not None:
279 # FIXME: this can be done in one pass with a wrapper
280 # around link_repl_func
281 self.make_links_absolute(base_href, resolve_base_href=resolve_base_href)
282 elif resolve_base_href:
283 self.resolve_base_href()
284 for el, attrib, link, pos in self.iterlinks():
285 new_link = link_repl_func(link)
286 if new_link == link:
287 continue
288 if new_link is None:
289 # Remove the attribute or element content
290 if attrib is None:
291 el.text = ''
292 else:
293 del el.attrib[attrib]
294 continue
295 if attrib is None:
296 new = el.text[:pos] + new_link + el.text[pos+len(link):]
297 el.text = new
298 else:
299 cur = el.attrib[attrib]
300 if not pos and len(cur) == len(link):
301 # Most common case
302 el.attrib[attrib] = new_link
303 else:
304 new = cur[:pos] + new_link + cur[pos+len(link):]
305 el.attrib[attrib] = new
306
307
309 """
310 An object that represents a method on an element as a function;
311 the function takes either an element or an HTML string. It
312 returns whatever the function normally returns, or if the function
313 works in-place (and so returns None) it returns a serialized form
314 of the resulting document.
315 """
317 self.name = name
318 self.copy = copy
319 self.__doc__ = getattr(source_class, self.name).__doc__
321 if isinstance(doc, basestring):
322 if 'copy' in kw:
323 raise TypeError(
324 "The keyword 'copy' can only be used with element inputs to %s, not a string input" % self.name)
325 return_string = True
326 doc = fromstring(doc, **kw)
327 else:
328 if 'copy' in kw:
329 copy = kw.pop('copy')
330 else:
331 copy = self.copy
332 return_string = False
333 if copy:
334 doc = copy.deepcopy(doc)
335 meth = getattr(doc, self.name)
336 result = meth(*args, **kw)
337 # FIXME: this None test is a bit sloppy
338 if result is None:
339 # Then return what we got in
340 if return_string:
341 return tostring(doc)
342 else:
343 return doc
344 else:
345 return result
346
347 find_rel_links = _MethodFunc('find_rel_links', copy=False)
348 find_class = _MethodFunc('find_class', copy=False)
349 make_links_absolute = _MethodFunc('make_links_absolute', copy=True)
350 resolve_base_href = _MethodFunc('resolve_base_href', copy=True)
351 iterlinks = _MethodFunc('iterlinks', copy=False)
352 rewrite_links = _MethodFunc('rewrite_links', copy=True)
353
356
359
362
365
366
368 """A lookup scheme for HTML Element classes.
369
370 To create a lookup instance with different Element classes, pass a tag
371 name mapping of Element classes in the ``classes`` keyword argument and/or
372 a tag name mapping of Mixin classes in the ``mixins`` keyword argument.
373 The special key '*' denotes a Mixin class that should be mixed into all
374 Element classes.
375 """
376 _default_element_classes = {}
377
379 etree.CustomElementClassLookup.__init__(self)
380 if classes is None:
381 classes = self._default_element_classes.copy()
382 if mixins:
383 mixers = {}
384 for name, value in mixins:
385 if name == '*':
386 for n in classes.keys():
387 mixers.setdefault(n, []).append(value)
388 else:
389 mixers.setdefault(name, []).append(value)
390 for name, mix_bases in mixers.items():
391 cur = classes.get(name, HtmlElement)
392 bases = tuple(mix_bases + [cur])
393 classes[name] = type(cur.__name__, bases, {})
394 self._element_classes = classes
395
397 if node_type == 'element':
398 return self._element_classes.get(name.lower(), HtmlElement)
399 elif node_type == 'comment':
400 return HtmlComment
401 elif node_type == 'PI':
402 return HtmlProcessingInstruction
403 elif node_type == 'entity':
404 return HtmlEntity
405 # Otherwise normal lookup
406 return None
407
408 ################################################################################
409 # parsing
410 ################################################################################
411
413 value = etree.HTML(html, html_parser, **kw)
414 if value is None:
415 raise etree.ParserError(
416 "Document is empty")
417 return value
418
420 """
421 Parses several HTML elements, returning a list of elements.
422
423 The first item in the list may be a string (though leading
424 whitespace is removed). If no_leading_text is true, then it will
425 be an error if there is leading text, and it will always be a list
426 of only elements.
427 """
428 # FIXME: check what happens when you give html with a body, head, etc.
429 start = html[:20].lstrip().lower()
430 if not start.startswith('<html') and not start.startswith('<!doctype'):
431 html = '<html><body>%s</body></html>' % html
432 doc = document_fromstring(html, **kw)
433 assert doc.tag == 'html'
434 bodies = [e for e in doc if e.tag == 'body']
435 assert len(bodies) == 1, ("too many bodies: %r in %r" % (bodies, html))
436 body = bodies[0]
437 elements = []
438 if no_leading_text and body.text and body.text.strip():
439 raise etree.ParserError(
440 "There is leading text: %r" % body.text)
441 if body.text and body.text.strip():
442 elements.append(body.text)
443 elements.extend(body)
444 # FIXME: removing the reference to the parent artificial document
445 # would be nice
446 return elements
447
449 """
450 Parses a single HTML element; it is an error if there is more than
451 one element, or if anything but whitespace precedes or follows the
452 element.
453
454 If create_parent is true (or is a tag name) then a parent node
455 will be created to encapsulate the HTML in a single element.
456 """
457 if create_parent:
458 if not isinstance(create_parent, basestring):
459 create_parent = 'div'
460 return fragment_fromstring('<%s>%s</%s>' % (
461 create_parent, html, create_parent), **kw)
462 elements = fragments_fromstring(html, no_leading_text=True)
463 if not elements:
464 raise etree.ParserError(
465 "No elements found")
466 if len(elements) > 1:
467 raise etree.ParserError(
468 "Multiple elements found (%s)"
469 % ', '.join([_element_name(e) for e in elements]))
470 el = elements[0]
471 if el.tail and el.tail.strip():
472 raise etree.ParserError(
473 "Element followed by text: %r" % el.tail)
474 el.tail = None
475 return el
476
478 """
479 Parse the html, returning a single element/document.
480
481 This tries to minimally parse the chunk of text, without knowing if it
482 is a fragment or a document.
483 """
484 start = html[:10].lstrip().lower()
485 if start.startswith('<html') or start.startswith('<!doctype'):
486 # Looks like a full HTML document
487 return document_fromstring(html, **kw)
488 # otherwise, lets parse it out...
489 doc = document_fromstring(html, **kw)
490 bodies = doc.findall('body')
491 if bodies:
492 body = bodies[0]
493 if len(bodies) > 1:
494 # Somehow there are multiple bodies, which is bad, but just
495 # smash them into one body
496 for other_body in bodies[1:]:
497 if other_body.text:
498 if len(body):
499 body[-1].tail = (body[-1].tail or '') + other_body.text
500 else:
501 body.text = (body.text or '') + other_body.text
502 body.extend(other_body)
503 # We'll ignore tail
504 # I guess we are ignoring attributes too
505 other_body.drop_tree()
506 else:
507 body = None
508 heads = doc.findall('head')
509 if heads:
510 # Well, we have some sort of structure, so lets keep it all
511 head = heads[0]
512 if len(heads) > 1:
513 for other_head in heads[1:]:
514 head.extend(other_head)
515 # We don't care about text or tail in a head
516 other_head.drop_tree()
517 return doc
518 if (len(body) == 1 and (not body.text or not body.text.strip())
519 and (not body[-1].tail or not body[-1].tail.strip())):
520 # The body has just one element, so it was probably a single
521 # element passed in
522 return body[0]
523 # Now we have a body which represents a bunch of tags which have the
524 # content that was passed in. We will create a fake container, which
525 # is the body tag, except <body> implies too much structure.
526 if _contains_block_level_tag(body):
527 body.tag = 'div'
528 else:
529 body.tag = 'span'
530 return body
531
533 """
534 Parse a filename, URL, or file-like object into an HTML document.
535
536 You may pass the keyword argument ``base_url='http://...'`` to set
537 the base URL.
538 """
539 if parser is None:
540 parser = html_parser
541 return etree.parse(filename, parser, **kw)
542
544 # FIXME: I could do this with XPath, but would that just be
545 # unnecessarily slow?
546 for el in el.getiterator():
547 if el.tag in defs.block_tags:
548 return True
549 return False
550
552 if isinstance(el, etree.CommentBase):
553 return 'comment'
554 elif isinstance(el, basestring):
555 return 'string'
556 else:
557 return el.tag
558
559 ################################################################################
560 # form handling
561 ################################################################################
562
564 """
565 Represents a <form> element.
566 """
567
569 """
570 Returns an accessor for all the input elements in the form.
571
572 See `InputGetter` for more information about the object.
573 """
574 return InputGetter(self)
575 inputs = property(inputs, doc=inputs.__doc__)
576
578 """
579 Dictionary-like object that represents all the fields in this
580 form. You can set values in this dictionary to effect the
581 form.
582 """
583 return FieldsDict(self.inputs)
585 prev_keys = self.fields.keys()
586 for key, value in value.iteritems():
587 if key in prev_keys:
588 prev_keys.remove(key)
589 self.fields[key] = value
590 for key in prev_keys:
591 if key is None:
592 # Case of an unnamed input; these aren't really
593 # expressed in form_values() anyway.
594 continue
595 self.fields[key] = None
596
597 fields = property(fields__get, fields__set, doc=fields__get.__doc__)
598
600 if self.get('name'):
601 return self.get('name')
602 elif self.get('id'):
603 return '#' + self.get('id')
604 return str(self.body.findall('form').index(self))
605
607 """
608 Return a list of tuples of the field values for the form.
609 This is suitable to be passed to ``urllib.urlencode()``.
610 """
611 results = []
612 for el in self.inputs:
613 name = el.name
614 if not name:
615 continue
616 if el.tag == 'textarea':
617 results.append((name, el.value))
618 elif el.tag == 'select':
619 value = el.value
620 if el.multiple:
621 for v in value:
622 results.append((name, v))
623 elif value is not None:
624 results.append((name, el.value))
625 else:
626 assert el.tag == 'input', (
627 "Unexpected tag: %r" % el)
628 if el.checkable and not el.checked:
629 continue
630 if el.type in ('submit', 'image', 'reset'):
631 continue
632 value = el.value
633 if value is not None:
634 results.append((name, el.value))
635 return results
636
638 """
639 Get/set the form's ``action`` attribute.
640 """
641 base_url = self.base_url
642 action = self.get('action')
643 if base_url and action is not None:
644 return urlparse.urljoin(base_url, action)
645 else:
646 return action
652 action = property(action__get, action__set, action__del, doc=action__get.__doc__)
653
655 """
656 Get/set the form's method. Always returns a capitalized
657 string, and defaults to ``'GET'``
658 """
659 return self.get('method', 'GET').upper()
662 method = property(method__get, method__set, doc=method__get.__doc__)
663
664 HtmlElementClassLookup._default_element_classes['form'] = FormElement
665
667 """
668 Helper function to submit a form. Returns a file-like object, as from
669 ``urllib.urlopen()``. This object also has a ``.geturl()`` function,
670 which shows the URL if there were any redirects.
671
672 You can use this like::
673
674 >>> form = doc.forms[0]
675 >>> form.inputs['foo'].value = 'bar' # etc
676 >>> response = form.submit()
677 >>> doc = parse(response)
678 >>> doc.make_links_absolute(response.geturl())
679
680 To change the HTTP requester, pass a function as ``open_http`` keyword
681 argument that opens the URL for you. The function must have the following
682 signature::
683
684 open_http(method, URL, values)
685
686 The action is one of 'GET' or 'POST', the URL is the target URL as a
687 string, and the values are a sequence of ``(name, value)`` tuples with the
688 form data.
689 """
690 values = form.form_values()
691 if extra_values:
692 if hasattr(extra_values, 'items'):
693 extra_values = extra_values.items()
694 values.extend(extra_values)
695 if open_http is None:
696 open_http = open_http_urllib
697 return open_http(form.method, form.action, values)
698
700 import urllib
701 ## FIXME: should test that it's not a relative URL or something
702 if method == 'GET':
703 if '?' in url:
704 url += '&'
705 else:
706 url += '?'
707 url += urllib.urlencode(values)
708 data = None
709 else:
710 data = urllib.urlencode(values)
711 return urllib.urlopen(url, data)
712
733
735
736 """
737 An accessor that represents all the input fields in a form.
738
739 You can get fields by name from this, with
740 ``form.inputs['field_name']``. If there are a set of checkboxes
741 with the same name, they are returned as a list (a `CheckboxGroup`
742 which also allows value setting). Radio inputs are handled
743 similarly.
744
745 You can also iterate over this to get all input elements. This
746 won't return the same thing as if you get all the names, as
747 checkboxes and radio elements are returned individually.
748 """
749
750 _name_xpath = etree.XPath(".//*[@name = $name and (name(.) = 'select' or name(.) = 'input' or name(.) = 'textarea')]")
751 _all_xpath = etree.XPath(".//*[name() = 'select' or name() = 'input' or name() = 'textarea']")
752
755
760
761 ## FIXME: there should be more methods, and it's unclear if this is
762 ## a dictionary-like object or list-like object
763
765 results = self._name_xpath(self.form, name=name)
766 if results:
767 type = results[0].get('type')
768 if type == 'radio' and len(results) > 1:
769 group = RadioGroup(results)
770 group.name = name
771 return group
772 elif type == 'checkbox' and len(results) > 1:
773 group = CheckboxGroup(results)
774 group.name = name
775 return group
776 else:
777 # I don't like throwing away elements like this
778 return results[0]
779 else:
780 raise KeyError(
781 "No input element with the name %r" % name)
782
786
792
794 ## FIXME: kind of dumb to turn a list into an iterator, only
795 ## to have it likely turned back into a list again :(
796 return iter(self._all_xpath(self.form))
797
799
800 """
801 Mix-in for all input elements (input, select, and textarea)
802 """
803
804
815 name = property(name__get, name__set, name__del, doc=name__get.__doc__)
816
825
827 """
828 ``<textarea>`` element. You can get the name with ``.name`` and
829 get/set the value with ``.value``
830 """
831
833 """
834 Get/set the value (which is the contents of this element)
835 """
836 return self.text or ''
840 self.text = ''
841 value = property(value__get, value__set, value__del, doc=value__get.__doc__)
842
843 HtmlElementClassLookup._default_element_classes['textarea'] = TextareaElement
844
846 """
847 ``<select>`` element. You can get the name with ``.name``.
848
849 ``.value`` will be the value of the selected option, unless this
850 is a multi-select element (``<select multiple>``), in which case
851 it will be a set-like object. In either case ``.value_options``
852 gives the possible values.
853
854 The boolean attribute ``.multiple`` shows if this is a
855 multi-select.
856 """
857
859 """
860 Get/set the value of this select (the selected option).
861
862 If this is a multi-select, this is a set-like object that
863 represents all the selected options.
864 """
865 if self.multiple:
866 return MultipleSelectOptions(self)
867 for el in self.getiterator('option'):
868 if 'selected' in el.attrib:
869 value = el.get('value')
870 # FIXME: If value is None, what to return?, get_text()?
871 return value
872 return None
873
875 if self.multiple:
876 if isinstance(value, basestring):
877 raise TypeError(
878 "You must pass in a sequence")
879 self.value.clear()
880 self.value.update(value)
881 return
882 if value is not None:
883 for el in self.getiterator('option'):
884 # FIXME: also if el.get('value') is None?
885 if el.get('value') == value:
886 checked_option = el
887 break
888 else:
889 raise ValueError(
890 "There is no option with the value of %r" % value)
891 for el in self.getiterator('option'):
892 if 'selected' in el.attrib:
893 del el.attrib['selected']
894 if value is not None:
895 checked_option.set('selected', '')
896
898 # FIXME: should del be allowed at all?
899 if self.multiple:
900 self.value.clear()
901 else:
902 self.value = None
903
904 value = property(value__get, value__set, value__del, doc=value__get.__doc__)
905
907 """
908 All the possible values this select can have (the ``value``
909 attribute of all the ``<option>`` elements.
910 """
911 return [el.get('value') for el in self.getiterator('option')]
912 value_options = property(value_options, doc=value_options.__doc__)
913
915 """
916 Boolean attribute: is there a ``multiple`` attribute on this element.
917 """
918 return 'multiple' in self.attrib
920 if value:
921 self.set('multiple', '')
922 elif 'multiple' in self.attrib:
923 del self.attrib['multiple']
924 multiple = property(multiple__get, multiple__set, doc=multiple__get.__doc__)
925
926 HtmlElementClassLookup._default_element_classes['select'] = SelectElement
927
929 """
930 Represents all the selected options in a ``<select multiple>`` element.
931
932 You can add to this set-like option to select an option, or remove
933 to unselect the option.
934 """
935
938
940 """
941 Iterator of all the ``<option>`` elements.
942 """
943 return self.select.getiterator('option')
944 options = property(options)
945
949
951 for option in self.options:
952 if option.get('value') == item:
953 option.set('selected', '')
954 break
955 else:
956 raise ValueError(
957 "There is no option with the value %r" % item)
958
960 for option in self.options:
961 if option.get('value') == item:
962 if 'selected' in option.attrib:
963 del option.attrib['selected']
964 else:
965 raise ValueError(
966 "The option %r is not currently selected" % item)
967 break
968 else:
969 raise ValueError(
970 "There is not option with the value %r" % item)
971
977
979 """
980 This object represents several ``<input type=radio>`` elements
981 that have the same name.
982
983 You can use this like a list, but also use the property
984 ``.value`` to check/uncheck inputs. Also you can use
985 ``.value_options`` to get the possible values.
986 """
987
989 """
990 Get/set the value, which checks the radio with that value (and
991 unchecks any other value).
992 """
993 for el in self:
994 if 'checked' in el.attrib:
995 return el.get('value')
996 return None
997
999 if value is not None:
1000 for el in self:
1001 if el.get('value') == value:
1002 checked_option = el
1003 break
1004 else:
1005 raise ValueError(
1006 "There is no radio input with the value %r" % value)
1007 for el in self:
1008 if 'checked' in el.attrib:
1009 del el.attrib['checked']
1010 if value is not None:
1011 checked_option.set('checked', '')
1012
1014 self.value = None
1015
1016 value = property(value__get, value__set, value__del, doc=value__get.__doc__)
1017
1019 """
1020 Returns a list of all the possible values.
1021 """
1022 return [el.get('value') for el in self]
1023 value_options = property(value_options, doc=value_options.__doc__)
1024
1029
1031 """
1032 Represents a group of checkboxes (``<input type=checkbox>``) that
1033 have the same name.
1034
1035 In addition to using this like a list, the ``.value`` attribute
1036 returns a set-like object that you can add to or remove from to
1037 check and uncheck checkboxes. You can also use ``.value_options``
1038 to get the possible values.
1039 """
1040
1042 """
1043 Return a set-like object that can be modified to check or
1044 uncheck individual checkboxes according to their value.
1045 """
1046 return CheckboxValues(self)
1048 self.value.clear()
1049 if not hasattr(value, '__iter__'):
1050 raise ValueError(
1051 "A CheckboxGroup (name=%r) must be set to a sequence (not %r)"
1052 % (self[0].name, value))
1053 self.value.update(value)
1056 value = property(value__get, value__set, value__del, doc=value__get.__doc__)
1057
1061
1063
1064 """
1065 Represents the values of the checked checkboxes in a group of
1066 checkboxes with the same name.
1067 """
1068
1071
1077
1079 for el in self.group:
1080 if el.get('value') == value:
1081 el.set('checked', '')
1082 break
1083 else:
1084 raise KeyError("No checkbox with value %r" % value)
1085
1087 for el in self.group:
1088 if el.get('value') == value:
1089 if 'checked' in el.attrib:
1090 del el.attrib['checked']
1091 else:
1092 raise KeyError(
1093 "The checkbox with value %r was already unchecked" % value)
1094 break
1095 else:
1096 raise KeyError(
1097 "No checkbox with value %r" % value)
1098
1104
1106 """
1107 Represents an ``<input>`` element.
1108
1109 You can get the type with ``.type`` (which is lower-cased and
1110 defaults to ``'text'``).
1111
1112 Also you can get and set the value with ``.value``
1113
1114 Checkboxes and radios have the attribute ``input.checkable ==
1115 True`` (for all others it is false) and a boolean attribute
1116 ``.checked``.
1117
1118 """
1119
1120 ## FIXME: I'm a little uncomfortable with the use of .checked
1122 """
1123 Get/set the value of this element, using the ``value`` attribute.
1124
1125 Also, if this is a checkbox and it has no value, this defaults
1126 to ``'on'``. If it is a checkbox or radio that is not
1127 checked, this returns None.
1128 """
1129 if self.checkable:
1130 if self.checked:
1131 return self.get('value') or 'on'
1132 else:
1133 return None
1134 return self.get('value')
1136 if self.checkable:
1137 if not value:
1138 self.checked = False
1139 else:
1140 self.checked = True
1141 if isinstance(value, basestring):
1142 self.set('value', value)
1143 else:
1144 self.set('value', value)
1146 if self.checkable:
1147 self.checked = False
1148 else:
1149 if 'value' in self.attrib:
1150 del self.attrib['value']
1151 value = property(value__get, value__set, value__del, doc=value__get.__doc__)
1152
1154 """
1155 Return the type of this element (using the type attribute).
1156 """
1157 return self.get('type', 'text').lower()
1160 type = property(type__get, type__set, doc=type__get.__doc__)
1161
1163 """
1164 Boolean: can this element be checked?
1165 """
1166 return self.type in ['checkbox', 'radio']
1167 checkable = property(checkable__get, doc=checkable__get.__doc__)
1168
1170 """
1171 Boolean attribute to get/set the presence of the ``checked``
1172 attribute.
1173
1174 You can only use this on checkable input types.
1175 """
1176 if not self.checkable:
1177 raise AttributeError('Not a checkable input type')
1178 return 'checked' in self.attrib
1180 if not self.checkable:
1181 raise AttributeError('Not a checkable input type')
1182 if value:
1183 self.set('checked', '')
1184 else:
1185 if 'checked' in self.attrib:
1186 del self.attrib['checked']
1187 checked = property(checked__get, checked__set, doc=checked__get.__doc__)
1188
1189 HtmlElementClassLookup._default_element_classes['input'] = InputElement
1190
1192 """
1193 Represents a ``<label>`` element.
1194
1195 Label elements are linked to other elements with their ``for``
1196 attribute. You can access this element with ``label.for_element``.
1197 """
1198
1200 """
1201 Get/set the element this label points to. Return None if it
1202 can't be found.
1203 """
1204 id = self.get('for')
1205 if not id:
1206 return None
1207 return self.body.get_element_by_id(id)
1209 id = other.get('id')
1210 if not id:
1211 raise TypeError(
1212 "Element %r has no id attribute" % other)
1213 self.set('for', id)
1217 for_element = property(for_element__get, for_element__set, for_element__del,
1218 doc=for_element__get.__doc__)
1219
1220 HtmlElementClassLookup._default_element_classes['label'] = LabelElement
1221
1222 ############################################################
1223 ## Serialization
1224 ############################################################
1225
1226 # This isn't a general match, but it's a match for what libxml2
1227 # specifically serialises:
1228 __replace_meta_content_type = re.compile(
1229 r'<meta http-equiv="Content-Type".*?>').sub
1230
1232 """
1233 return HTML string representation of the document given
1234
1235 note: this will create a meta http-equiv="Content" tag in the head
1236 and may replace any that are present
1237 """
1238 assert doc is not None
1239 html = etree.tostring(doc, method="html", pretty_print=pretty_print)
1240 if not include_meta_content_type:
1241 html = __replace_meta_content_type('', html)
1242 return html
1243
1245 """
1246 Open the HTML document in a web browser (saving it to a temporary
1247 file to open it).
1248 """
1249 import os
1250 import webbrowser
1251 try:
1252 write_doc = doc.write
1253 except AttributeError:
1254 write_doc = etree.ElementTree(element=doc).write
1255 fn = os.tempnam() + '.html'
1256 write_doc(fn, method="html")
1257 url = 'file://' + fn.replace(os.path.sep, '/')
1258 print url
1259 webbrowser.open(url)
1260
1261 ################################################################################
1262 # configure Element class lookup
1263 ################################################################################
1264
1267 super(HTMLParser, self).__init__(**kwargs)
1268 self.setElementClassLookup(HtmlElementClassLookup())
1269
1273
1274 html_parser = HTMLParser()
1275
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0beta1 on Sun Oct 7 07:08:41 2007 | http://epydoc.sourceforge.net |