[+] Python analysis

This commit is contained in:
2026-03-13 21:11:33 -04:00
parent b0035728bd
commit 7d83b8a583
2 changed files with 163 additions and 24 deletions
+1
View File
@@ -13,5 +13,6 @@ dependencies = [
"python-telegram-bot>=22.6", "python-telegram-bot>=22.6",
"requests>=2.32.5", "requests>=2.32.5",
"starlette>=0.52.1", "starlette>=0.52.1",
"tqdm>=4.67.3",
"uvicorn>=0.41.0", "uvicorn>=0.41.0",
] ]
+162 -24
View File
@@ -6,20 +6,63 @@ import db
from bot import channel_html from bot import channel_html
def exp1(): def totals():
pop = [] total_channels = db.Channel.select().where(db.Channel.hidden == False).count()
r = re.compile(r"([\d ]+) subscribers") print(f'总频道数量: {total_channels}')
for channel in tqdm(db.Channel.select()):
html = channel_html(channel.username)
m = r.search(html)
pop.append((channel.username, int(m.group(1).replace(" ", "")) if m else 0))
pop.sort(key=lambda x: x[1], reverse=True)
for channel, subscribers in pop:
print(f"{channel} - {subscribers}")
def exp2(name): def get_tallest():
tallest = db.Channel.select().where(db.Channel.hidden == False).order_by(db.Channel.height.desc()).first()
equally_tall = db.Channel.select().where((db.Channel.height == tallest.height) & (db.Channel.hidden == False))
for ch in equally_tall:
print(f'高度最高: {ch.username} - {ch.height}')
def get_most_subscribed():
chans = []
groups = []
bots = []
people = []
r_chan = re.compile(r"([\d ]+) subscribers")
r_grp = re.compile(r"([\d ]+) members")
# Select non-hidden channels
for entity in tqdm(db.Channel.select().where(db.Channel.hidden == False)):
html = channel_html(entity.username)
if m := r_chan.search(html):
chans.append((
entity.username,
int(m.group(1).replace(" ", "")),
db.get_votes(entity.username),
entity.name
))
elif m := r_grp.search(html):
groups.append((entity.username, int(m.group(1).replace(" ", "")), db.get_votes(entity.username), entity.name))
elif "Start Bot" in html and entity.username.endswith("bot"):
bots.append((entity.username, 0, db.get_votes(entity.username), entity.name))
elif "Send Message" in html:
people.append((entity.username, 0, db.get_votes(entity.username), entity.name))
chans.sort(key=lambda x: x[1], reverse=True)
print(f'订阅者最多: {chans[0][0]} - {chans[0][1]}')
chans.sort(key=lambda x: x[2], reverse=True)
print(f'水最多: {chans[0][0]} - {chans[0][2]}')
chans.sort(key=lambda x: len(x[0]), reverse=True)
print(f'最长频道: {chans[0][0]} - {len(chans[0][0])} characters')
chans.sort(key=lambda x: len(x[3]), reverse=True)
print(f'最长名字: {chans[0][0]} ({chans[0][3]}) - {len(chans[0][3])} characters')
print(f'总群数量: {len(groups)}')
groups.sort(key=lambda x: x[1], reverse=True)
print(f'群成员最多: {groups[0][0]} - {groups[0][1]}')
groups.sort(key=lambda x: x[2], reverse=True)
print(f'群水最多: {groups[0][0]} - {groups[0][2]}')
print(f'总机器人数量: {len(bots)}')
print(f'总个人账户数量: {len(people)}')
def leaf_and_non_leaf_count(name):
# Count leaf and nodes in children (leaf is a channel without children) # Count leaf and nodes in children (leaf is a channel without children)
xl = db.channel_info(name) xl = db.channel_info(name)
leaf_count = 0 leaf_count = 0
@@ -32,16 +75,24 @@ def exp2(name):
print(f"Leaf: {leaf_count}, Node: {node_count}") print(f"Leaf: {leaf_count}, Node: {node_count}")
def exp3(): def get_most_leafs():
# Find the channel with the most leafs and the channel with the most non-leafs # Find the channel with the most leafs and the channel with the most non-leafs
most_leafs = None most_leafs = None
most_non_leafs = None most_non_leafs = None
most_leafs_count = 0 most_leafs_count = 0
most_non_leafs_count = 0 most_non_leafs_count = 0
total_leaf_count = 0
total_non_leaf_count = 0
for channel in tqdm(db.Channel.select()): for channel in tqdm(db.Channel.select().where(db.Channel.hidden == False)):
if channel.height == 0: if channel.height == 0:
continue continue
if channel.children:
total_leaf_count += 1
else:
total_non_leaf_count += 1
leaf_count = 0 leaf_count = 0
non_leaf_count = 0 non_leaf_count = 0
for child in channel.children: for child in channel.children:
@@ -58,17 +109,104 @@ def exp3():
most_non_leafs = channel most_non_leafs = channel
most_non_leafs_count = non_leaf_count most_non_leafs_count = non_leaf_count
print(f"Most Leafs: {most_leafs.username} - {most_leafs_count}") print(f"最多树叶: {most_leafs.username}")
print(f"Most Non Leafs: {most_non_leafs.username} - {most_non_leafs_count}") leaf_and_non_leaf_count(most_leafs.username)
print(f"最多树枝: {most_non_leafs.username}")
leaf_and_non_leaf_count(most_non_leafs.username)
print(f"总树叶数量: {total_leaf_count}")
print(f"总树枝数量: {total_non_leaf_count}")
def rank_by_centrality(mode="closeness"):
nodes = list(db.Channel.select().where(db.Channel.hidden == False))
adj = {n.username: [] for n in nodes}
for n in nodes:
if n.parent_id and n.parent_id in adj:
adj[n.username].append(n.parent_id)
adj[n.parent_id].append(n.username)
if mode == "closeness":
scores = []
for start in tqdm(adj.keys(), desc="Closeness Centrality"):
visited = {start: 0}
queue = [start]
head = 0
while head < len(queue):
curr = queue[head]
head += 1
dist = visited[curr]
for nxt in adj[curr]:
if nxt not in visited:
visited[nxt] = dist + 1
queue.append(nxt)
if len(visited) > 1:
avg_len = sum(visited.values()) / (len(visited) - 1)
scores.append((start, avg_len, len(visited)))
scores.sort(key=lambda x: x[1])
print(f"\n--- Top Closeness Centrality (smaller better) ---")
for i, (u, score, reachable) in enumerate(scores[:10]):
print(f"{i+1}. {u}: {score:.4f}")
elif mode == "betweenness":
betweenness = {u: 0 for u in adj}
# Calculate total paths in the graph (sum of paths in each connected component)
total_paths = 0
visited_global = set()
for start in adj:
if start not in visited_global:
q = [start]
visited_global.add(start)
comp_size = 0
while q:
curr = q.pop(0)
comp_size += 1
for nxt in adj[curr]:
if nxt not in visited_global:
visited_global.add(nxt)
q.append(nxt)
total_paths += comp_size * (comp_size - 1) // 2
for start in tqdm(adj.keys(), desc="Betweenness Centrality"):
visited = {start}
queue = [start]
head = 0
parents = {start: None}
order = []
while head < len(queue):
curr = queue[head]
order.append(curr)
head += 1
for nxt in adj[curr]:
if nxt not in visited:
visited.add(nxt)
parents[nxt] = curr
queue.append(nxt)
subtree_size = {u: 1 for u in order}
for u in reversed(order):
p = parents[u]
if p is not None:
subtree_size[p] += subtree_size[u]
if p is not None and p != start:
betweenness[p] += subtree_size[u]
for u in betweenness:
betweenness[u] //= 2
scores = [(u, betweenness[u]) for u in betweenness]
scores.sort(key=lambda x: x[1], reverse=True)
print(f"\n--- Top Betweenness Centrality (larger better) ---")
for i, (u, score) in enumerate(scores[:10]):
pct = (score / total_paths * 100) if total_paths > 0 else 0
print(f"{i+1}. {u}: {score} ({pct:.3f}%)")
if __name__ == '__main__': if __name__ == '__main__':
# exp1() totals()
get_tallest()
# exp2("XLDFDZ") get_most_subscribed()
# exp2("Billchenla") get_most_leafs()
exp3()
rank_by_centrality("closeness")
rank_by_centrality("betweenness")