I was recently helping a friend research a CMS site group and found that the CMS did not have a sitemap function. Python3 was already installed on the server, so I could only temporarily generate one using a script. I’ll record it here.

Script content:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env python

import datetime
import mysql.connector
import xml.etree.cElementTree as ET
from lxml import etree

# Database connection parameters
config = {
'host': '10.80.0.3', # For example: '192.168.1.100'
'user': 'wnote_r',
'password': 'Wnote#Pss2024',
'database': 'wnote',
'raise_on_warnings': True
}

# sitemap path
sdir = '/opt/wwwroot/'

def mselect(site, n1, n2):
# Connect to the database
cnx = mysql.connector.connect(**config)
cursor = cnx.cursor()

# Get the article ID
article_ids = []
tag_ids = []

# Query all records in the article table
query1 = "select id from article order by newstime desc limit {};".format(n1)
cursor.execute(query1)
for row in cursor:
article_ids.append(row[0])

# Query all tags
query2 = "select id from phome_ecms_book order by newstime desc limit {};".format(n2)
cursor.execute(query2)
for row in cursor:
tag_ids.append(row[0])

# Generate URL list
article_urls = [f"https://{site}/article/{id}.html" for id in article_ids]
tag_urls = [f"https://{site}/tags/{id}.html" for id in tag_ids]

# Merge URL list
urls = article_urls + tag_urls

# Create the XML tree structure of Sitemap
root = ET.Element('urlset',
{'xmlns': 'http://www.sitemaps.org/schemas/sitemap/0.9',
'xmlns:xsi': 'http://www.w3.org/2001/XMLSchema-instance',
'xmlns:mobile': 'http://www.baidu.com/schemas/sitemap-mobile/1/',
'xsi:schemaLocation': 'http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd'})

# Generate sitemap file and construct URL element
for url in urls:
s_url = ET.SubElement(root, 'url')
s_loc = ET.SubElement(s_url, 'loc')
s_loc.text = url
s_lastmod = ET.SubElement(s_url, 'lastmod')
s_lastmod.text = datetime.now().strftime('%Y-%m-%dT%H:%M:%S+08:00')
s_changefreq = ET.SubElement(s_url, 'changefreq')
s_changefreq.text = 'always'
s_priority = ET.SubElement(s_url, 'priority')
s_priority.text = '0.95'

# Generate XML string
sitemap_str = ET.tostring(root, encoding='unicode')

# Save to file
with open(spfile, "w", encoding="utf-8") as f:
f.write(sitemap_str)

# Close database connection
cursor.close()
cnx.close()

with open('sites.list', 'r') as files:
for tmp in files:
site = tmp.strip().split()[0]
spfile = sdir + site + '/' + tmp.strip().split()[1]
n1 = tmp.strip().split()[2]
n2 = tmp.strip().split()[3]
print(site, spfile)
mselect(site, n1, n2)

The content of sites.list is:

1
2
www.xxxxx.com 100 120
www.xxxxx.com 200 300

Summary

In fact, Python scripts are more convenient to write. If you have performance requirements, it is better to use Golang.