mirror of
https://github.com/csd4ni3l/browser.git
synced 2025-11-05 04:57:57 +01:00
350 lines
11 KiB
Python
350 lines
11 KiB
Python
from utils.constants import SELF_CLOSING_TAGS, HEAD_TAGS, INHERITED_PROPERTIES
|
|
import html.entities
|
|
class Element:
|
|
def __init__(self, tag, attributes, parent):
|
|
self.tag = tag
|
|
self.attributes = attributes
|
|
self.children = []
|
|
self.parent = parent
|
|
|
|
def __repr__(self):
|
|
attrs = [" " + k + "=\"" + v + "\"" for k, v in self.attributes.items()]
|
|
attr_str = ""
|
|
for attr in attrs:
|
|
attr_str += attr
|
|
return "<" + self.tag + attr_str + ">"
|
|
|
|
class Text:
|
|
def __init__(self, text, parent):
|
|
self.text = text
|
|
self.children = []
|
|
self.parent = parent
|
|
|
|
def __repr__(self):
|
|
return repr(self.text)
|
|
|
|
class HTML():
|
|
def __init__(self, raw_html):
|
|
self.raw_html = raw_html
|
|
self.unfinished = []
|
|
|
|
self.parse()
|
|
|
|
def parse(self):
|
|
text = ""
|
|
in_tag = False
|
|
for c in self.raw_html:
|
|
if c == "<":
|
|
in_tag = True
|
|
if text: self.add_text(text) # start of new tag means before everything was content/text
|
|
text = ""
|
|
elif c == ">":
|
|
in_tag = False
|
|
self.add_tag(text) # end of a tag means everything in-between were tags
|
|
text = ""
|
|
else:
|
|
text += c
|
|
|
|
if not in_tag and text:
|
|
self.add_text(text)
|
|
|
|
return self.finish()
|
|
|
|
def add_text(self, text):
|
|
if text.isspace(): return
|
|
self.implicit_tags(None)
|
|
parent = self.unfinished[-1]
|
|
node = Text(text, parent)
|
|
parent.children.append(node)
|
|
|
|
def get_attributes(self, text):
|
|
parts = text.split()
|
|
tag = parts[0].casefold()
|
|
attributes = {}
|
|
|
|
for attrpair in parts[1:]:
|
|
if "=" in attrpair:
|
|
key, value = attrpair.split("=", 1)
|
|
if len(value) > 2 and value[0] in ["'", "\""]:
|
|
value = value[1:-1]
|
|
attributes[key.casefold()] = value
|
|
else:
|
|
attributes[attrpair.casefold()] = ""
|
|
|
|
return tag, attributes
|
|
|
|
def add_tag(self, tag):
|
|
tag, attributes = self.get_attributes(tag)
|
|
|
|
if tag.startswith("!"): return
|
|
|
|
self.implicit_tags(tag)
|
|
|
|
if tag.startswith("/"):
|
|
if len(self.unfinished) == 1: return
|
|
node = self.unfinished.pop()
|
|
parent = self.unfinished[-1]
|
|
parent.children.append(node)
|
|
elif tag in SELF_CLOSING_TAGS:
|
|
parent = self.unfinished[-1]
|
|
node = Element(tag, attributes, parent)
|
|
parent.children.append(node)
|
|
else:
|
|
parent = self.unfinished[-1] if self.unfinished else None
|
|
node = Element(tag, attributes, parent)
|
|
self.unfinished.append(node)
|
|
|
|
def implicit_tags(self, tag):
|
|
while True:
|
|
open_tags = [node.tag for node in self.unfinished]
|
|
if open_tags == [] and tag != "html":
|
|
self.add_tag("html")
|
|
elif open_tags == ["html"] and tag not in ["head", "body", "/html"]:
|
|
if tag in HEAD_TAGS:
|
|
self.add_tag("head")
|
|
else:
|
|
self.add_tag("body")
|
|
elif open_tags == ["html", "head"] and tag not in ["/head"] + HEAD_TAGS:
|
|
self.add_tag("/head")
|
|
else:
|
|
break
|
|
|
|
def finish(self):
|
|
if not self.unfinished:
|
|
self.implicit_tags(None)
|
|
|
|
while len(self.unfinished) > 1:
|
|
node = self.unfinished.pop()
|
|
parent = self.unfinished[-1]
|
|
parent.children.append(node)
|
|
return self.unfinished.pop()
|
|
|
|
@staticmethod
|
|
def print_tree(node, indent=0):
|
|
print(" " * indent, node)
|
|
for child in node.children:
|
|
HTML.print_tree(child, indent + 2)
|
|
|
|
@staticmethod
|
|
def to_json(tree: Element | Text):
|
|
if isinstance(tree, Text):
|
|
return ["text", tree.text, [HTML.to_json(child) for child in tree.children]]
|
|
elif isinstance(tree, Element):
|
|
return ["element", tree.tag, tree.attributes, [HTML.to_json(child) for child in tree.children]]
|
|
|
|
@staticmethod
|
|
def from_json(json_list, parent=None):
|
|
if json_list[0] == "text":
|
|
text = Text(json_list[1], parent)
|
|
text.children = [HTML.from_json(child, text) for child in json_list[2]]
|
|
return text
|
|
elif json_list[0] == "element":
|
|
element = Element(json_list[1], json_list[2], parent)
|
|
element.children = [HTML.from_json(child, element) for child in json_list[3]]
|
|
return element
|
|
|
|
class TagSelector:
|
|
def __init__(self, tag):
|
|
self.tag = tag
|
|
self.priority = 1
|
|
|
|
def matches(self, node):
|
|
return isinstance(node, Element) and self.tag == node.tag
|
|
|
|
class DescendantSelector:
|
|
def __init__(self, ancestor, descendant):
|
|
self.ancestor = ancestor
|
|
self.descendant = descendant
|
|
self.priority = ancestor.priority + descendant.priority
|
|
|
|
def matches(self, node):
|
|
if not self.descendant.matches(node): return False
|
|
while node.parent:
|
|
if self.ancestor.matches(node.parent): return True
|
|
node = node.parent
|
|
return False
|
|
|
|
def cascade_priority(rule):
|
|
selector, body = rule
|
|
return selector.priority
|
|
|
|
def get_inline_styles(node):
|
|
all_rules = []
|
|
|
|
for node in node.children:
|
|
if isinstance(node, Element) and node.tag == "style":
|
|
all_rules.extend(CSSParser(node.children[0].text).parse()) # node's first children will just be a text element that contains the css
|
|
|
|
all_rules.extend(get_inline_styles(node))
|
|
|
|
return all_rules
|
|
|
|
class CSSParser:
|
|
def __init__(self, s):
|
|
self.s = s
|
|
self.i = 0
|
|
|
|
def whitespace(self):
|
|
while self.i < len(self.s) and self.s[self.i].isspace():
|
|
self.i += 1
|
|
|
|
def literal(self, literal):
|
|
if not (self.i < len(self.s) and self.s[self.i] == literal):
|
|
raise Exception("Parsing error")
|
|
self.i += 1
|
|
|
|
def word(self):
|
|
start = self.i
|
|
while self.i < len(self.s):
|
|
if self.s[self.i].isalnum() or self.s[self.i] in "#-.%":
|
|
self.i += 1
|
|
else:
|
|
break
|
|
if not (self.i > start):
|
|
raise Exception("Parsing error")
|
|
return self.s[start:self.i]
|
|
|
|
def pair(self):
|
|
prop = self.word()
|
|
|
|
self.whitespace()
|
|
self.literal(":")
|
|
self.whitespace()
|
|
|
|
val = self.word()
|
|
|
|
return prop.casefold(), val
|
|
|
|
def ignore_until(self, chars):
|
|
while self.i < len(self.s):
|
|
if self.s[self.i] in chars:
|
|
return self.s[self.i]
|
|
else:
|
|
self.i += 1
|
|
return None
|
|
|
|
def body(self):
|
|
pairs = {}
|
|
while self.i < len(self.s) and self.s[self.i] != "}":
|
|
try:
|
|
prop, val = self.pair()
|
|
pairs[prop] = val
|
|
|
|
self.whitespace()
|
|
|
|
self.literal(";")
|
|
|
|
self.whitespace()
|
|
except Exception:
|
|
why = self.ignore_until([";", "}"])
|
|
if why == ";":
|
|
self.literal(";")
|
|
self.whitespace()
|
|
else:
|
|
break
|
|
|
|
return pairs
|
|
|
|
def selector(self):
|
|
out = TagSelector(self.word().casefold())
|
|
self.whitespace()
|
|
while self.i < len(self.s) and self.s[self.i] != "{":
|
|
tag = self.word()
|
|
descendant = TagSelector(tag.casefold())
|
|
out = DescendantSelector(out, descendant)
|
|
self.whitespace()
|
|
return out
|
|
|
|
def parse(self):
|
|
rules = []
|
|
while self.i < len(self.s):
|
|
try:
|
|
self.whitespace()
|
|
|
|
selector = self.selector()
|
|
|
|
self.literal("{")
|
|
|
|
self.whitespace()
|
|
|
|
body = self.body()
|
|
|
|
self.literal("}")
|
|
|
|
rules.append((selector, body))
|
|
except Exception:
|
|
why = self.ignore_until(["}"])
|
|
if why == "}":
|
|
self.literal("}")
|
|
self.whitespace()
|
|
else:
|
|
break
|
|
return rules
|
|
|
|
@classmethod
|
|
def convert_selector_to_json(self, selector):
|
|
if isinstance(selector, TagSelector):
|
|
return ["tag", selector.tag, selector.priority]
|
|
elif isinstance(selector, DescendantSelector):
|
|
return ["descendant", self.convert_selector_to_json(selector.ancestor), self.convert_selector_to_json(selector.descendant)]
|
|
|
|
@classmethod
|
|
def get_selector_from_json(self, selector_list):
|
|
if selector_list[0] == "tag":
|
|
selector = TagSelector(selector_list[1])
|
|
selector.priority = selector_list[2]
|
|
return selector
|
|
elif selector_list[0] == "descendant":
|
|
return DescendantSelector(self.get_selector_from_json(selector_list[1]), self.get_selector_from_json(selector_list[2]))
|
|
|
|
@classmethod
|
|
def to_json(self, rules_list: list[tuple[TagSelector | DescendantSelector, dict[str, str]]]):
|
|
return [[self.convert_selector_to_json(rule[0]), rule[1]] for rule in rules_list]
|
|
|
|
@classmethod
|
|
def from_json(self, rules_list):
|
|
return [(self.get_selector_from_json(rule[0]), rule[1]) for rule in rules_list]
|
|
|
|
def style(node, rules):
|
|
node.style = {}
|
|
|
|
for property, default_value in INHERITED_PROPERTIES.items():
|
|
if node.parent:
|
|
node.style[property] = node.parent.style[property]
|
|
else:
|
|
node.style[property] = default_value
|
|
|
|
for selector, body in rules:
|
|
if not selector.matches(node): continue
|
|
for property, value in body.items():
|
|
node.style[property] = value
|
|
|
|
if isinstance(node, Element) and "style" in node.attributes:
|
|
pairs = CSSParser(node.attributes["style"]).body()
|
|
for property, value in pairs.items():
|
|
node.style[property] = value
|
|
|
|
if node.style["font-size"].endswith("%"):
|
|
if node.parent:
|
|
parent_font_size = node.parent.style["font-size"]
|
|
else:
|
|
parent_font_size = INHERITED_PROPERTIES["font-size"]
|
|
|
|
node_pct = float(node.style["font-size"][:-1]) / 100
|
|
parent_px = float(parent_font_size[:-2])
|
|
node.style["font-size"] = str(node_pct * parent_px) + "px"
|
|
|
|
for child in node.children:
|
|
style(child, rules)
|
|
|
|
def tree_to_list(tree, list):
|
|
list.append(tree)
|
|
for child in tree.children:
|
|
tree_to_list(child, list)
|
|
return list
|
|
|
|
def replace_symbols(text):
|
|
for key, value in html.entities.html5.items():
|
|
text = text.replace(f"&{key};", value)
|
|
|
|
return text |