| Home | Trees | Indices | Help |
|
|---|
|
|
1 import threading
2 import re
3 import urlparse
4 import copy
5 from lxml import etree
6 from lxml.html import defs
7 from lxml import cssselect
8 from lxml.html.setmixin import SetMixin
9 try:
10 from UserDict import DictMixin
11 except ImportError:
12 # DictMixin was introduced in Python 2.4
13 from lxml.html._dictmixin import DictMixin
14 import sets
15
16 __all__ = ['document_fromstring', 'tostring', 'Element', 'defs',
17 'find_rel_links', 'find_class', 'make_links_absolute',
18 'resolve_base_href', 'iterlinks', 'rewrite_links', 'open_in_browser']
19
20 _rel_links_xpath = etree.XPath("descendant-or-self::a[@rel]")
21 #_class_xpath = etree.XPath(r"descendant-or-self::*[regexp:match(@class, concat('\b', $class_name, '\b'))]", {'regexp': 'http://exslt.org/regular-expressions'})
22 _class_xpath = etree.XPath("descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), concat(' ', $class_name, ' '))]")
23 _id_xpath = etree.XPath("descendant-or-self::*[@id=$id]")
24 _collect_string_content = etree.XPath("string()")
25 _css_url_re = re.compile(r'url\((.*?)\)', re.I)
26 _css_import_re = re.compile(r'@import "(.*?)"')
27 _label_xpath = etree.XPath("//label[@for=$id]")
28
30
32 """
33 Returns the base URL, given when the page was parsed.
34
35 Use with ``urlparse.urljoin(el.base_url, href)`` to get
36 absolute URLs.
37 """
38 return self.getroottree().docinfo.URL
39 base_url = property(base_url, doc=base_url.__doc__)
40
46 forms = property(forms, doc=forms.__doc__)
47
49 """
50 Return the <body> element. Can be called from a child element
51 to get the document's head.
52 """
53 return self.xpath('//body')[0]
54 body = property(body, doc=body.__doc__)
55
57 """
58 Returns the <head> element. Can be called from a child
59 element to get the document's head.
60 """
61 return self.xpath('//head')[0]
62 head = property(head, doc=head.__doc__)
63
65 """
66 Get or set any <label> element associated with this element.
67 """
68 id = self.get('id')
69 if not id:
70 return None
71 result = _label_xpath(self, id=id)
72 if not result:
73 return None
74 else:
75 return result[0]
77 id = self.get('id')
78 if not id:
79 raise TypeError(
80 "You cannot set a label for an element (%r) that has no id"
81 % self)
82 if not label.tag == 'label':
83 raise TypeError(
84 "You can only assign label to a label element (not %r)"
85 % label)
86 label.set('for', id)
91 label = property(label__get, label__set, label__del, doc=label__get.__doc__)
92
94 """
95 Removes this element from the tree, including its children and
96 text. The tail text is joined to the previous element or
97 parent.
98 """
99 parent = self.getparent()
100 assert parent is not None
101 if self.tail:
102 previous = self.getprevious()
103 if previous is None:
104 parent.text = (parent.text or '') + self.tail
105 else:
106 previous.tail = (previous.tail or '') + self.tail
107 parent.remove(self)
108
110 """
111 Remove the tag, but not its children or text. The children and text
112 are merged into the parent.
113
114 Example::
115
116 >>> h = fragment_fromstring('<div>Hello <b>World!</b></div>')
117 >>> h.find('//b').drop_tag()
118 >>> print tostring(h)
119 <div>Hello World!</div>
120 """
121 parent = self.getparent()
122 assert parent is not None
123 previous = self.getprevious()
124 if self.text and isinstance(self.tag, basestring):
125 # not a Comment, etc.
126 if previous is None:
127 parent.text = (parent.text or '') + self.text
128 else:
129 previous.tail = (previous.tail or '') + self.text
130 if self.tail:
131 if len(self):
132 last = self[-1]
133 last.tail = (last.tail or '') + self.tail
134 elif previous is None:
135 parent.text = (parent.text or '') + self.tail
136 else:
137 previous.tail = (previous.tail or '') + self.tail
138 index = parent.index(self)
139 parent[index:index+1] = self[:]
140
142 """
143 Find any links like ``<a rel="{rel}">...</a>``; returns a list of elements.
144 """
145 rel = rel.lower()
146 return [el for el in _rel_links_xpath(self)
147 if el.get('rel').lower() == rel]
148
150 """
151 Find any elements with the given class name.
152 """
153 return _class_xpath(self, class_name=class_name)
154
156 """
157 Get the first element in a document with the given id. If none is
158 found, return the default argument if provided or raise KeyError
159 otherwise.
160
161 Note that there can be more than one element with the same id,
162 and this isn't uncommon in HTML documents found in the wild.
163 Browsers return only the first match, and this function does
164 the same.
165 """
166 try:
167 # FIXME: should this check for multiple matches?
168 # browsers just return the first one
169 return _id_xpath(self, id=id)[0]
170 except IndexError:
171 if default:
172 return default[0]
173 else:
174 raise KeyError, id
175
177 """
178 Return the text content of the tag (and the text in any children).
179 """
180 return _collect_string_content(self)
181
183 """
184 Run the CSS expression on this element and its children,
185 returning a list of the results.
186
187 Equivalent to lxml.cssselect.CSSSelect(expr)(self) -- note
188 that pre-compiling the expression can provide a substantial
189 speedup.
190 """
191 return cssselect.CSSSelect(expr)(self)
192
193 ########################################
194 ## Link functions
195 ########################################
196
198 """
199 Make all links in the document absolute, given the
200 ``base_url`` for the document (the full URL where the document
201 came from), or if no ``base_url`` is given, then the ``.base_url`` of the document.
202
203 If ``resolve_base_href`` is true, then any ``<base href>``
204 tags in the document are used *and* removed from the document.
205 If it is false then any such tag is ignored.
206 """
207 if base_url is None:
208 base_url = self.base_url
209 if base_url is None:
210 raise TypeError(
211 "No base_url given, and the document has no base_url")
212 if resolve_base_href:
213 self.resolve_base_href()
214 def link_repl(href):
215 return urlparse.urljoin(base_url, href)
216 self.rewrite_links(link_repl)
217
219 """
220 Find any ``<base href>`` tag in the document, and apply its
221 values to all links found in the document. Also remove the
222 tag once it has been applied.
223 """
224 base_href = None
225 basetags = self.xpath('//base[@href]')
226 for b in basetags:
227 base_href = b.get('href')
228 b.drop_tree()
229 if not base_href:
230 return
231 self.make_links_absolute(base_href, resolve_base_href=False)
232
234 """
235 Yield (element, attribute, link, pos), where attribute may be None
236 (indicating the link is in the text). ``pos`` is the position
237 where the link occurs; often 0, but sometimes something else in
238 the case of links in stylesheets or style tags.
239
240 Note: <base href> is *not* taken into account in any way. The
241 link you get is exactly the link in the document.
242 """
243 link_attrs = defs.link_attrs
244 for el in self.getiterator():
245 attribs = el.attrib
246 for attrib in link_attrs:
247 if attrib in attribs:
248 yield (el, attrib, attribs[attrib], 0)
249 if el.tag == 'style' and el.text:
250 for match in _css_url_re.finditer(el.text):
251 yield (el, None, match.group(1), match.start(1))
252 for match in _css_import_re.finditer(el.text):
253 yield (el, None, match.group(1), match.start(1))
254 if 'style' in attribs:
255 for match in _css_url_re.finditer(attribs['style']):
256 yield (el, 'style', match.group(1), match.start(1))
257
260 """
261 Rewrite all the links in the document. For each link
262 ``link_repl_func(link)`` will be called, and the return value
263 will replace the old link.
264
265 Note that links may not be absolute (unless you first called
266 ``make_links_absolute()``), and may be internal (e.g.,
267 ``'#anchor'``). They can also be values like
268 ``'mailto:email'`` or ``'javascript:expr'``.
269
270 If you give ``base_href`` then all links passed to
271 ``link_repl_func()`` will take that into account.
272
273 If the ``link_repl_func`` returns None, the attribute or
274 tag text will be removed completely.
275 """
276 if base_href is not None:
277 # FIXME: this can be done in one pass with a wrapper
278 # around link_repl_func
279 self.make_links_absolute(base_href, resolve_base_href=resolve_base_href)
280 elif resolve_base_href:
281 self.resolve_base_href()
282 for el, attrib, link, pos in self.iterlinks():
283 new_link = link_repl_func(link)
284 if new_link == link:
285 continue
286 if new_link is None:
287 # Remove the attribute or element content
288 if attrib is None:
289 el.text = ''
290 else:
291 del el.attrib[attrib]
292 continue
293 if attrib is None:
294 new = el.text[:pos] + new_link + el.text[pos+len(link):]
295 el.text = new
296 else:
297 cur = el.attrib[attrib]
298 if not pos and len(cur) == len(link):
299 # Most common case
300 el.attrib[attrib] = new_link
301 else:
302 new = cur[:pos] + new_link + cur[pos+len(link):]
303 el.attrib[attrib] = new
304
305
307 """
308 An object that represents a method on an element as a function;
309 the function takes either an element or an HTML string. It
310 returns whatever the function normally returns, or if the function
311 works in-place (and so returns None) it returns a serialized form
312 of the resulting document.
313 """
315 self.name = name
316 self.copy = copy
317 self.__doc__ = getattr(source_class, self.name).__doc__
319 if isinstance(doc, basestring):
320 if 'copy' in kw:
321 raise TypeError(
322 "The keyword 'copy' can only be used with element inputs to %s, not a string input" % self.name)
323 return_string = True
324 doc = fromstring(doc, **kw)
325 else:
326 if 'copy' in kw:
327 copy = kw.pop('copy')
328 else:
329 copy = self.copy
330 return_string = False
331 if copy:
332 doc = copy.deepcopy(doc)
333 meth = getattr(doc, self.name)
334 result = meth(*args, **kw)
335 # FIXME: this None test is a bit sloppy
336 if result is None:
337 # Then return what we got in
338 if return_string:
339 return tostring(doc)
340 else:
341 return doc
342 else:
343 return result
344
345 find_rel_links = _MethodFunc('find_rel_links', copy=False)
346 find_class = _MethodFunc('find_class', copy=False)
347 make_links_absolute = _MethodFunc('make_links_absolute', copy=True)
348 resolve_base_href = _MethodFunc('resolve_base_href', copy=True)
349 iterlinks = _MethodFunc('iterlinks', copy=False)
350 rewrite_links = _MethodFunc('rewrite_links', copy=True)
351
354
357
360
363
364
366 """A lookup scheme for HTML Element classes.
367
368 To create a lookup instance with different Element classes, pass a tag
369 name mapping of Element classes in the ``classes`` keyword argument and/or
370 a tag name mapping of Mixin classes in the ``mixins`` keyword argument.
371 The special key '*' denotes a Mixin class that should be mixed into all
372 Element classes.
373 """
374 _default_element_classes = {}
375
377 etree.CustomElementClassLookup.__init__(self)
378 if classes is None:
379 classes = self._default_element_classes.copy()
380 if mixins:
381 mixers = {}
382 for name, value in mixins:
383 if name == '*':
384 for n in classes.keys():
385 mixers.setdefault(n, []).append(value)
386 else:
387 mixers.setdefault(name, []).append(value)
388 for name, mix_bases in mixers.items():
389 cur = classes.get(name, HtmlElement)
390 bases = tuple(mix_bases + [cur])
391 classes[name] = type(cur.__name__, bases, {})
392 self._element_classes = classes
393
395 if node_type == 'element':
396 return self._element_classes.get(name.lower(), HtmlElement)
397 elif node_type == 'comment':
398 return HtmlComment
399 elif node_type == 'PI':
400 return HtmlProcessingInstruction
401 elif node_type == 'entity':
402 return HtmlEntity
403 # Otherwise normal lookup
404 return None
405
406
407 html_parser = etree.HTMLParser()
408
410 value = etree.HTML(html, html_parser, **kw)
411 if value is None:
412 raise etree.ParserError(
413 "Document is empty")
414 return value
415
417 """
418 Parses several HTML elements, returning a list of elements.
419
420 The first item in the list may be a string (though leading
421 whitespace is removed). If no_leading_text is true, then it will
422 be an error if there is leading text, and it will always be a list
423 of only elements.
424 """
425 # FIXME: check what happens when you give html with a body, head, etc.
426 start = html[:20].lstrip().lower()
427 if not start.startswith('<html') and not start.startswith('<!doctype'):
428 html = '<html><body>%s</body></html>' % html
429 doc = document_fromstring(html, **kw)
430 assert doc.tag == 'html'
431 bodies = [e for e in doc if e.tag == 'body']
432 assert len(bodies) == 1, ("too many bodies: %r in %r" % (bodies, html))
433 body = bodies[0]
434 elements = []
435 if no_leading_text and body.text and body.text.strip():
436 raise etree.ParserError(
437 "There is leading text: %r" % body.text)
438 if body.text and body.text.strip():
439 elements.append(body.text)
440 elements.extend(body)
441 # FIXME: removing the reference to the parent artificial document
442 # would be nice
443 return elements
444
446 """
447 Parses a single HTML element; it is an error if there is more than
448 one element, or if anything but whitespace precedes or follows the
449 element.
450
451 If create_parent is true (or is a tag name) then a parent node
452 will be created to encapsulate the HTML in a single element.
453 """
454 if create_parent:
455 if not isinstance(create_parent, basestring):
456 create_parent = 'div'
457 return fragment_fromstring('<%s>%s</%s>' % (
458 create_parent, html, create_parent), **kw)
459 elements = fragments_fromstring(html, no_leading_text=True)
460 if not elements:
461 raise etree.ParserError(
462 "No elements found")
463 if len(elements) > 1:
464 raise etree.ParserError(
465 "Multiple elements found (%s)"
466 % ', '.join([_element_name(e) for e in elements]))
467 el = elements[0]
468 if el.tail and el.tail.strip():
469 raise etree.ParserError(
470 "Element followed by text: %r" % el.tail)
471 el.tail = None
472 return el
473
475 """
476 Parse the html, returning a single element/document.
477
478 This tries to minimally parse the chunk of text, without knowing if it
479 is a fragment or a document.
480 """
481 start = html[:10].lstrip().lower()
482 if start.startswith('<html') or start.startswith('<!doctype'):
483 # Looks like a full HTML document
484 return document_fromstring(html, **kw)
485 # otherwise, lets parse it out...
486 doc = document_fromstring(html, **kw)
487 bodies = doc.findall('body')
488 if bodies:
489 body = bodies[0]
490 if len(bodies) > 1:
491 # Somehow there are multiple bodies, which is bad, but just
492 # smash them into one body
493 for other_body in bodies[1:]:
494 if other_body.text:
495 if len(body):
496 body[-1].tail = (body[-1].tail or '') + other_body.text
497 else:
498 body.text = (body.text or '') + other_body.text
499 body.extend(other_body)
500 # We'll ignore tail
501 # I guess we are ignoring attributes too
502 other_body.drop_tree()
503 else:
504 body = None
505 heads = doc.findall('head')
506 if heads:
507 # Well, we have some sort of structure, so lets keep it all
508 head = heads[0]
509 if len(heads) > 1:
510 for other_head in heads[1:]:
511 head.extend(other_head)
512 # We don't care about text or tail in a head
513 other_head.drop_tree()
514 return doc
515 if (len(body) == 1 and (not body.text or not body.text.strip())
516 and (not body[-1].tail or not body[-1].tail.strip())):
517 # The body has just one element, so it was probably a single
518 # element passed in
519 return body[0]
520 # Now we have a body which represents a bunch of tags which have the
521 # content that was passed in. We will create a fake container, which
522 # is the body tag, except <body> implies too much structure.
523 if _contains_block_level_tag(body):
524 body.tag = 'div'
525 else:
526 body.tag = 'span'
527 return body
528
530 """
531 Parse a filename, URL, or file-like object into an HTML document.
532
533 You may pass the keyword argument ``base_url='http://...'`` to set
534 the base URL.
535 """
536 return etree.parse(filename, html_parser, **kw)
537
539 # FIXME: I could do this with XPath, but would that just be
540 # unnecessarily slow?
541 for el in el.getiterator():
542 if el.tag in defs.block_tags:
543 return True
544 return False
545
547 if isinstance(el, etree.CommentBase):
548 return 'comment'
549 elif isinstance(el, basestring):
550 return 'string'
551 else:
552 return el.tag
553
557
559 """
560 Represents a <form> element.
561 """
562
564 """
565 Returns an accessor for all the input elements in the form.
566
567 See `InputGetter` for more information about the object.
568 """
569 return InputGetter(self)
570 inputs = property(inputs, doc=inputs.__doc__)
571
573 """
574 Dictionary-like object that represents all the fields in this
575 form. You can set values in this dictionary to effect the
576 form.
577 """
578 return FieldsDict(self.inputs)
580 prev_keys = self.fields.keys()
581 for key, value in value.iteritems():
582 if key in prev_keys:
583 prev_keys.remove(key)
584 self.fields[key] = value
585 for key in prev_keys:
586 # FIXME: but right now I don't even allow
587 # deleting, and I'm not sure what it would
588 # mean if I did.
589 del self.fields[key]
590
591 fields = property(fields__get, fields__set, doc=fields__get.__doc__)
592
594 if self.get('name'):
595 return self.get('name')
596 elif self.get('id'):
597 return '#' + self.get('id')
598 return str(self.body.findall('form').index(self))
599
601 """
602 Return a list of tuples of the field values for the form.
603 This is suitable to be passed to ``urllib.urlencode()``.
604 """
605 results = []
606 for el in self.inputs:
607 name = el.name
608 if not name:
609 continue
610 if el.tag == 'textarea':
611 results.append((name, el.value))
612 elif el.tag == 'select':
613 value = el.value
614 if el.multiple:
615 for v in value:
616 results.append((name, v))
617 elif value is not None:
618 results.append((name, el.value))
619 else:
620 assert el.tag == 'input', (
621 "Unexpected tag: %r" % el)
622 if el.checkable and not el.checked:
623 continue
624 if el.type in ('submit', 'image', 'reset'):
625 continue
626 value = el.value
627 if value is not None:
628 results.append((name, el.value))
629 return results
630
632 """
633 Get/set the form's ``action`` attribute.
634 """
635 base_url = self.base_url
636 action = self.get('action')
637 if base_url and action is not None:
638 return urlparse.urljoin(base_url, action)
639 else:
640 return action
646 action = property(action__get, action__set, action__del, doc=action__get.__doc__)
647
649 """
650 Get/set the form's method. Always returns a capitalized
651 string, and defaults to ``'GET'``
652 """
653 return self.get('method', 'GET').upper()
656 method = property(method__get, method__set, doc=method__get.__doc__)
657
658 HtmlElementClassLookup._default_element_classes['form'] = FormElement
659
661 """
662 Helper function to submit a form. Returns a file-like object, as from
663 ``urllib.urlopen()``. This object also has a ``.geturl()`` function,
664 which shows the URL if there were any redirects.
665
666 You can use this like::
667
668 >>> form = doc.forms[0]
669 >>> form.inputs['foo'].value = 'bar' # etc
670 >>> response = form.submit()
671 >>> doc = parse(response)
672 >>> doc.make_links_absolute(response.geturl())
673
674 To change the HTTP requester, pass a function as ``open_http`` keyword
675 argument that opens the URL for you. The function must have the following
676 signature::
677
678 open_http(method, URL, values)
679
680 The action is one of 'GET' or 'POST', the URL is the target URL as a
681 string, and the values are a sequence of ``(name, value)`` tuples with the
682 form data.
683 """
684 values = form.form_values()
685 if extra_values:
686 if hasattr(extra_values, 'items'):
687 extra_values = extra_values.items()
688 values.extend(extra_values)
689 if open_http is None:
690 open_http = open_http_urllib
691 return open_http(form.method, form.action, values)
692
694 import urllib
695 ## FIXME: should test that it's not a relative URL or something
696 if method == 'GET':
697 if '?' in url:
698 url += '&'
699 else:
700 url += '?'
701 url += urllib.urlencode(values)
702 data = None
703 else:
704 data = urllib.urlencode(values)
705 return urllib.urlopen(url, data)
706
727
729
730 """
731 An accessor that represents all the input fields in a form.
732
733 You can get fields by name from this, with
734 ``form.inputs['field_name']``. If there are a set of checkboxes
735 with the same name, they are returned as a list (a `CheckboxGroup`
736 which also allows value setting). Radio inputs are handled
737 similarly.
738
739 You can also iterate over this to get all input elements. This
740 won't return the same thing as if you get all the names, as
741 checkboxes and radio elements are returned individually.
742 """
743
744 _name_xpath = etree.XPath(".//*[@name = $name and (name(.) = 'select' or name(.) = 'input' or name(.) = 'textarea')]")
745 _all_xpath = etree.XPath(".//*[name() = 'select' or name() = 'input' or name() = 'textarea']")
746
749
754
755 ## FIXME: there should be more methods, and it's unclear if this is
756 ## a dictionary-like object or list-like object
757
759 results = self._name_xpath(self.form, name=name)
760 if results:
761 type = results[0].get('type')
762 if type == 'radio' and len(results) > 1:
763 group = RadioGroup(results)
764 group.name = name
765 return group
766 elif type == 'checkbox' and len(results) > 1:
767 group = CheckboxGroup(results)
768 group.name = name
769 return group
770 else:
771 # I don't like throwing away elements like this
772 return results[0]
773 else:
774 raise KeyError(
775 "No input element with the name %r" % name)
776
780
786
788 ## FIXME: kind of dumb to turn a list into an iterator, only
789 ## to have it likely turned back into a list again :(
790 return iter(self._all_xpath(self.form))
791
793
794 """
795 Mix-in for all input elements (input, select, and textarea)
796 """
797
798
809 name = property(name__get, name__set, name__del, doc=name__get.__doc__)
810
819
821 """
822 ``<textarea>`` element. You can get the name with ``.name`` and
823 get/set the value with ``.value``
824 """
825
827 """
828 Get/set the value (which is the contents of this element)
829 """
830 return self.text or ''
834 self.text = ''
835 value = property(value__get, value__set, value__del, doc=value__get.__doc__)
836
837 HtmlElementClassLookup._default_element_classes['textarea'] = TextareaElement
838
840 """
841 ``<select>`` element. You can get the name with ``.name``.
842
843 ``.value`` will be the value of the selected option, unless this
844 is a multi-select element (``<select multiple>``), in which case
845 it will be a set-like object. In either case ``.value_options``
846 gives the possible values.
847
848 The boolean attribute ``.multiple`` shows if this is a
849 multi-select.
850 """
851
853 """
854 Get/set the value of this select (the selected option).
855
856 If this is a multi-select, this is a set-like object that
857 represents all the selected options.
858 """
859 if self.multiple:
860 return MultipleSelectOptions(self)
861 for el in self.getiterator('option'):
862 if 'selected' in el.attrib:
863 value = el.get('value')
864 # FIXME: If value is None, what to return?, get_text()?
865 return value
866 return None
867
869 if self.multiple:
870 if isinstance(value, basestring):
871 raise TypeError(
872 "You must pass in a sequence")
873 self.value.clear()
874 self.value.update(value)
875 return
876 if value is not None:
877 for el in self.getiterator('option'):
878 # FIXME: also if el.get('value') is None?
879 if el.get('value') == value:
880 checked_option = el
881 break
882 else:
883 raise ValueError(
884 "There is no option with the value of %r" % value)
885 for el in self.getiterator('option'):
886 if 'selected' in el.attrib:
887 del el.attrib['selected']
888 if value is not None:
889 checked_option.set('selected', '')
890
892 # FIXME: should del be allowed at all?
893 if self.multiple:
894 self.value.clear()
895 else:
896 self.value = None
897
898 value = property(value__get, value__set, value__del, doc=value__get.__doc__)
899
901 """
902 All the possible values this select can have (the ``value``
903 attribute of all the ``<option>`` elements.
904 """
905 return [el.get('value') for el in self.getiterator('option')]
906 value_options = property(value_options, doc=value_options.__doc__)
907
909 """
910 Boolean attribute: is there a ``multiple`` attribute on this element.
911 """
912 return 'multiple' in self.attrib
914 if value:
915 self.set('multiple', '')
916 elif 'multiple' in self.attrib:
917 del self.attrib['multiple']
918 multiple = property(multiple__get, multiple__set, doc=multiple__get.__doc__)
919
920 HtmlElementClassLookup._default_element_classes['select'] = SelectElement
921
923 """
924 Represents all the selected options in a ``<select multiple>`` element.
925
926 You can add to this set-like option to select an option, or remove
927 to unselect the option.
928 """
929
932
934 """
935 Iterator of all the ``<option>`` elements.
936 """
937 return self.select.getiterator('option')
938 options = property(options)
939
943
945 for option in self.options:
946 if option.get('value') == item:
947 option.set('selected', '')
948 break
949 else:
950 raise ValueError(
951 "There is no option with the value %r" % item)
952
954 for option in self.options:
955 if option.get('value') == item:
956 if 'selected' in option.attrib:
957 del option.attrib['selected']
958 else:
959 raise ValueError(
960 "The option %r is not currently selected" % item)
961 break
962 else:
963 raise ValueError(
964 "There is not option with the value %r" % item)
965
971
973 """
974 This object represents several ``<input type=radio>`` elements
975 that have the same name.
976
977 You can use this like a list, but also use the property
978 ``.value`` to check/uncheck inputs. Also you can use
979 ``.value_options`` to get the possible values.
980 """
981
983 """
984 Get/set the value, which checks the radio with that value (and
985 unchecks any other value).
986 """
987 for el in self:
988 if 'checked' in el.attrib:
989 return el.get('value')
990 return None
991
993 if value is not None:
994 for el in self:
995 if el.get('value') == value:
996 checked_option = el
997 break
998 else:
999 raise ValueError(
1000 "There is no radio input with the value %r" % value)
1001 for el in self:
1002 if 'checked' in el.attrib:
1003 del el.attrib['checked']
1004 if value is not None:
1005 checked_option.set('checked', '')
1006
1008 self.value = None
1009
1010 value = property(value__get, value__set, value__del, doc=value__get.__doc__)
1011
1013 """
1014 Returns a list of all the possible values.
1015 """
1016 return [el.get('value') for el in self]
1017 value_options = property(value_options, doc=value_options.__doc__)
1018
1023
1025 """
1026 Represents a group of checkboxes (``<input type=checkbox>``) that
1027 have the same name.
1028
1029 In addition to using this like a list, the ``.value`` attribute
1030 returns a set-like object that you can add to or remove from to
1031 check and uncheck checkboxes. You can also use ``.value_options``
1032 to get the possible values.
1033 """
1034
1036 """
1037 Return a set-like object that can be modified to check or
1038 uncheck individual checkboxes according to their value.
1039 """
1040 return CheckboxValues(self)
1046 value = property(value__get, value__set, value__del, doc=value__get.__doc__)
1047
1051
1053
1054 """
1055 Represents the values of the checked checkboxes in a group of
1056 checkboxes with the same name.
1057 """
1058
1061
1067
1069 for el in self.group:
1070 if el.get('value') == value:
1071 el.set('checked', '')
1072 break
1073 else:
1074 raise KeyError("No checkbox with value %r" % value)
1075
1077 for el in self.group:
1078 if el.get('value') == value:
1079 if 'checked' in el.attrib:
1080 del el.attrib['checked']
1081 else:
1082 raise KeyError(
1083 "The checkbox with value %r was already unchecked" % value)
1084 break
1085 else:
1086 raise KeyError(
1087 "No checkbox with value %r" % value)
1088
1094
1096 """
1097 Represents an ``<input>`` element.
1098
1099 You can get the type with ``.type`` (which is lower-cased and
1100 defaults to ``'text'``).
1101
1102 Also you can get and set the value with ``.value``
1103
1104 Checkboxes and radios have the attribute ``input.checkable ==
1105 True`` (for all others it is false) and a boolean attribute
1106 ``.checked``.
1107
1108 """
1109
1110 ## FIXME: I'm a little uncomfortable with the use of .checked
1112 """
1113 Get/set the value of this element, using the ``value`` attribute.
1114
1115 Also, if this is a checkbox and it has no value, this defaults
1116 to ``'on'``. If it is a checkbox or radio that is not
1117 checked, this returns None.
1118 """
1119 if self.checkable:
1120 if self.checked:
1121 return self.get('value') or 'on'
1122 else:
1123 return None
1124 return self.get('value')
1126 if self.checkable:
1127 if not value:
1128 self.checked = False
1129 else:
1130 self.checked = True
1131 if isinstance(value, basestring):
1132 self.set('value', value)
1133 else:
1134 self.set('value', value)
1136 if self.checkable:
1137 self.checked = False
1138 else:
1139 if 'value' in self.attrib:
1140 del self.attrib['value']
1141 value = property(value__get, value__set, value__del, doc=value__get.__doc__)
1142
1144 """
1145 Return the type of this element (using the type attribute).
1146 """
1147 return self.get('type', 'text').lower()
1150 type = property(type__get, type__set, doc=type__get.__doc__)
1151
1153 """
1154 Boolean: can this element be checked?
1155 """
1156 return self.type in ['checkbox', 'radio']
1157 checkable = property(checkable__get, doc=checkable__get.__doc__)
1158
1160 """
1161 Boolean attribute to get/set the presence of the ``checked``
1162 attribute.
1163
1164 You can only use this on checkable input types.
1165 """
1166 if not self.checkable:
1167 raise AttributeError('Not a checkable input type')
1168 return 'checked' in self.attrib
1170 if not self.checkable:
1171 raise AttributeError('Not a checkable input type')
1172 if value:
1173 self.set('checked', '')
1174 else:
1175 if 'checked' in self.attrib:
1176 del self.attrib['checked']
1177 checked = property(checked__get, checked__set, doc=checked__get.__doc__)
1178
1179 HtmlElementClassLookup._default_element_classes['input'] = InputElement
1180
1182 """
1183 Represents a ``<label>`` element.
1184
1185 Label elements are linked to other elements with their ``for``
1186 attribute. You can access this element with ``label.for_element``.
1187 """
1188
1190 """
1191 Get/set the element this label points to. Return None if it
1192 can't be found.
1193 """
1194 id = self.get('for')
1195 if not id:
1196 return None
1197 return self.body.get_element_by_id(id)
1199 id = other.get('id')
1200 if not id:
1201 raise TypeError(
1202 "Element %r has no id attribute" % other)
1203 self.set('for', id)
1207 for_element = property(for_element__get, for_element__set, for_element__del,
1208 doc=for_element__get.__doc__)
1209
1210 HtmlElementClassLookup._default_element_classes['label'] = LabelElement
1211
1212 ############################################################
1213 ## Serialization
1214 ############################################################
1215
1216 _html_xsl = """\
1217 <xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
1218 <xsl:output method="html" encoding="UTF-8" />
1219 <xsl:template match="/">
1220 <xsl:copy-of select="."/>
1221 </xsl:template>
1222 </xsl:transform>
1223 """
1224
1225 _pretty_html_xsl = """\
1226 <xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
1227 <xsl:output method="html" encoding="UTF-8" indent="yes" />
1228 <xsl:template match="/">
1229 <xsl:copy-of select="."/>
1230 </xsl:template>
1231 </xsl:transform>
1232 """
1233
1234 _local_transforms = threading.local()
1235 # FIXME: should we just lazily compile these?
1236 _local_transforms.html_transform = etree.XSLT(etree.XML(_html_xsl))
1237 _local_transforms.pretty_html_transform = etree.XSLT(etree.XML(_pretty_html_xsl))
1238
1239 # This isn't a general match, but it's a match for what XSLT specifically creates:
1240 __replace_meta_content_type = re.compile(
1241 r'<meta http-equiv="Content-Type".*?>').sub
1242
1244 """
1245 return HTML string representation of the document given
1246
1247 note: this will create a meta http-equiv="Content" tag in the head
1248 and may replace any that are present
1249 """
1250 assert doc is not None
1251 if pretty:
1252 try:
1253 pretty_html_transform = _local_transforms.pretty_html_transform
1254 except AttributeError:
1255 pretty_html_transform = _local_transforms.pretty_html_transform = etree.XSLT(etree.XML(_pretty_html_xsl))
1256 html = str(pretty_html_transform(doc))
1257 else:
1258 try:
1259 html_transform = _local_transforms.html_transform
1260 except AttributeError:
1261 html_transform = _local_transforms.html_transform = etree.XSLT(etree.XML(_html_xsl))
1262 html = str(html_transform(doc))
1263 if not include_meta_content_type:
1264 html = __replace_meta_content_type('', html)
1265 return html
1266
1268 """
1269 Open the HTML document in a web browser (saving it to a temporary
1270 file to open it).
1271 """
1272 import os
1273 import webbrowser
1274 fn = os.tempnam() + '.html'
1275 f = open(fn, 'wb')
1276 f.write(tostring(doc, include_meta_content_type=True))
1277 f.close()
1278 url = 'file://' + fn.replace(os.path.sep, '/')
1279 print url
1280 webbrowser.open(url)
1281
1282 ################################################################################
1283 # configure Element class lookup
1284
1285 html_parser.setElementClassLookup(HtmlElementClassLookup())
1286
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0beta1 on Sun Sep 16 00:12:56 2007 | http://epydoc.sourceforge.net |