2
0
mirror of https://github.com/offen/website.git synced 2025-01-12 13:20:20 +01:00

update robots.txt, sm links, sitemap plugin

This commit is contained in:
Hendrik Niefeld 2019-10-19 20:46:44 +02:00
parent eceb284302
commit 3d06e78368
8 changed files with 389 additions and 12 deletions
build
homepage
content/pages
pelicanconf.py
plugins/sitemap
theme
static/css
templates

View File

@ -1,3 +1,5 @@
User-agent: *
Disallow: /vault/
Disallow: /auditorium/
Disallow: /legal-notice/
Disallow: /404.html

View File

@ -24,6 +24,4 @@ We are happy to work with [NLnet Foundation][nlnet-foundation]{: target="_blank"
*Feel free to contact us with any kind of feedback.* From criticism and praise to contributions or support, everything is welcome. Get in touch.
[hioffen@posteo.de][hioffen@posteo.de]
[hioffen@posteo.de]: mailto:hioffen@posteo.de
[hioffen@posteo.de](mailto:hioffen@posteo.de) [[GPG Key]](/theme/74B041E23DB29D552644CEB1B18C633D6967FE3F.asc){: target="_blank"}

View File

@ -26,9 +26,6 @@ THEME = './theme'
# Delete the output directory before generating new files.
DELETE_OUTPUT_DIRECTORY = True
PLUGIN_PATHS = ['./plugins']
PLUGINS = ['assets']
# dont create following standard pages
AUTHORS_SAVE_AS = None
ARCHIVES_SAVE_AS = None
@ -38,9 +35,29 @@ TAGS_SAVE_AS = None
# keep this for access to page variable
DIRECT_TEMPLATES = []
PLUGIN_PATHS = ['./plugins']
PLUGINS = ['assets','sitemap']
# generate Sitemap
SITEMAP = {
'format': 'xml',
'priorities': {
'indexes': 1,
'articles': 0.5,
'pages': 0.5
},
'changefreqs': {
'articles': 'monthly',
'indexes': 'daily',
'pages': 'monthly'
}
}
GITHUB_ORG = 'https://github.com/offen'
CONTACT_EMAIL = 'hioffen@posteo.de'
GPG_KEY_FILE = '74B041E23DB29D552644CEB1B18C633D6967FE3F.asc'
PATREON_URL = 'https://www.patreon.com/bePatron?u=21484999'
PATREON_URL = 'https://www.patreon.com/offen'
LINKEDIN_URL = 'https://www.linkedin.com/company/hioffen'
TWITTER_URL = 'https://twitter.com/hioffen'
OFFEN_ACCOUNT_ID = '9b63c4d8-65c0-438c-9d30-cc4b01173393'

View File

@ -0,0 +1,79 @@
Sitemap
-------
This plugin generates plain-text or XML sitemaps. You can use the ``SITEMAP``
variable in your settings file to configure the behavior of the plugin.
The ``SITEMAP`` variable must be a Python dictionary and can contain these keys:
- ``format``, which sets the output format of the plugin (``xml`` or ``txt``)
- ``priorities``, which is a dictionary with three keys:
- ``articles``, the priority for the URLs of the articles and their
translations
- ``pages``, the priority for the URLs of the static pages
- ``indexes``, the priority for the URLs of the index pages, such as tags,
author pages, categories indexes, archives, etc...
All the values of this dictionary must be decimal numbers between ``0`` and ``1``.
- ``changefreqs``, which is a dictionary with three items:
- ``articles``, the update frequency of the articles
- ``pages``, the update frequency of the pages
- ``indexes``, the update frequency of the index pages
Valid frequency values are ``always``, ``hourly``, ``daily``, ``weekly``, ``monthly``,
``yearly`` and ``never``.
You can exclude URLs from being included in the sitemap via regular expressions.
For example, to exclude all URLs containing ``tag/`` or ``category/`` you can
use the following ``SITEMAP`` setting.
.. code-block:: python
SITEMAP = {
'exclude': ['tag/', 'category/']
}
If a key is missing or a value is incorrect, it will be replaced with the
default value.
You can also exclude an individual URL by adding metadata to it setting ``private``
to ``True``.
The sitemap is saved in ``<output_path>/sitemap.<format>``.
.. note::
``priorities`` and ``changefreqs`` are information for search engines.
They are only used in the XML sitemaps.
For more information: <http://www.sitemaps.org/protocol.html#xmlTagDefinitions>
**Example**
Here is an example configuration (it's also the default settings):
.. code-block:: python
# Where your plug-ins reside
PLUGIN_PATHS = ['/where/you/cloned/it/pelican-plugins/', ]
PLUGINS=['sitemap',]
SITEMAP = {
'format': 'xml',
'priorities': {
'articles': 0.5,
'indexes': 0.5,
'pages': 0.5
},
'changefreqs': {
'articles': 'monthly',
'indexes': 'daily',
'pages': 'monthly'
}
}

View File

@ -0,0 +1 @@
from .sitemap import *

View File

@ -0,0 +1,271 @@
# -*- coding: utf-8 -*-
'''
Sitemap
-------
The sitemap plugin generates plain-text or XML sitemaps.
'''
from __future__ import unicode_literals
import re
import collections
import os.path
from datetime import datetime
from logging import warning, info
from codecs import open
from pytz import timezone
from pelican import signals, contents
from pelican.utils import get_date
TXT_HEADER = """{0}/index.html
{0}/archives.html
{0}/tags.html
{0}/categories.html
"""
XML_HEADER = """<?xml version="1.0" encoding="utf-8"?>
<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
"""
XML_URL = """
<url>
<loc>{0}/{1}</loc>
<lastmod>{2}</lastmod>
<changefreq>{3}</changefreq>
<priority>{4}</priority>
</url>
"""
XML_FOOTER = """
</urlset>
"""
def format_date(date):
if date.tzinfo:
tz = date.strftime('%z')
tz = tz[:-2] + ':' + tz[-2:]
else:
tz = "-00:00"
return date.strftime("%Y-%m-%dT%H:%M:%S") + tz
class SitemapGenerator(object):
def __init__(self, context, settings, path, theme, output_path, *null):
self.output_path = output_path
self.context = context
self.now = datetime.now()
self.siteurl = settings.get('SITEURL')
self.default_timezone = settings.get('TIMEZONE', 'UTC')
self.timezone = getattr(self, 'timezone', self.default_timezone)
self.timezone = timezone(self.timezone)
self.format = 'xml'
self.changefreqs = {
'articles': 'monthly',
'indexes': 'daily',
'pages': 'monthly'
}
self.priorities = {
'articles': 0.5,
'indexes': 0.5,
'pages': 0.5
}
self.sitemapExclude = []
config = settings.get('SITEMAP', {})
if not isinstance(config, dict):
warning("sitemap plugin: the SITEMAP setting must be a dict")
else:
fmt = config.get('format')
pris = config.get('priorities')
chfreqs = config.get('changefreqs')
self.sitemapExclude = config.get('exclude', [])
if fmt not in ('xml', 'txt'):
warning("sitemap plugin: SITEMAP['format'] must be `txt' or `xml'")
warning("sitemap plugin: Setting SITEMAP['format'] on `xml'")
elif fmt == 'txt':
self.format = fmt
return
valid_keys = ('articles', 'indexes', 'pages')
valid_chfreqs = ('always', 'hourly', 'daily', 'weekly', 'monthly',
'yearly', 'never')
if isinstance(pris, dict):
# We use items for Py3k compat. .iteritems() otherwise
for k, v in pris.items():
if k in valid_keys and not isinstance(v, (int, float)):
default = self.priorities[k]
warning("sitemap plugin: priorities must be numbers")
warning("sitemap plugin: setting SITEMAP['priorities']"
"['{0}'] on {1}".format(k, default))
pris[k] = default
self.priorities.update(pris)
elif pris is not None:
warning("sitemap plugin: SITEMAP['priorities'] must be a dict")
warning("sitemap plugin: using the default values")
if isinstance(chfreqs, dict):
# .items() for py3k compat.
for k, v in chfreqs.items():
if k in valid_keys and v not in valid_chfreqs:
default = self.changefreqs[k]
warning("sitemap plugin: invalid changefreq `{0}'".format(v))
warning("sitemap plugin: setting SITEMAP['changefreqs']"
"['{0}'] on '{1}'".format(k, default))
chfreqs[k] = default
self.changefreqs.update(chfreqs)
elif chfreqs is not None:
warning("sitemap plugin: SITEMAP['changefreqs'] must be a dict")
warning("sitemap plugin: using the default values")
def write_url(self, page, fd):
if getattr(page, 'status', 'published') != 'published':
return
if getattr(page, 'private', 'False') == 'True':
return
# We can disable categories/authors/etc by using False instead of ''
if not page.save_as:
return
page_path = os.path.join(self.output_path, page.save_as)
if not os.path.exists(page_path):
return
lastdate = getattr(page, 'date', self.now)
try:
lastdate = self.get_date_modified(page, lastdate)
except ValueError:
warning("sitemap plugin: " + page.save_as + " has invalid modification date,")
warning("sitemap plugin: using date value as lastmod.")
lastmod = format_date(lastdate)
if isinstance(page, contents.Article):
pri = self.priorities['articles']
chfreq = self.changefreqs['articles']
elif isinstance(page, contents.Page):
pri = self.priorities['pages']
chfreq = self.changefreqs['pages']
else:
pri = self.priorities['indexes']
chfreq = self.changefreqs['indexes']
pageurl = '' if page.url == 'index.html' else page.url
#Exclude URLs from the sitemap:
if self.format == 'xml':
flag = False
for regstr in self.sitemapExclude:
if re.match(regstr, pageurl):
flag = True
break
if not flag:
fd.write(XML_URL.format(self.siteurl, pageurl, lastmod, chfreq, pri))
else:
fd.write(self.siteurl + '/' + pageurl + '\n')
def get_date_modified(self, page, default):
if hasattr(page, 'modified'):
if isinstance(page.modified, datetime):
return page.modified
return get_date(page.modified)
else:
return default
def set_url_wrappers_modification_date(self, wrappers):
for (wrapper, articles) in wrappers:
lastmod = datetime.min.replace(tzinfo=self.timezone)
for article in articles:
lastmod = max(lastmod, article.date.replace(tzinfo=self.timezone))
try:
modified = self.get_date_modified(article, datetime.min).replace(tzinfo=self.timezone)
lastmod = max(lastmod, modified)
except ValueError:
# Supressed: user will be notified.
pass
setattr(wrapper, 'modified', str(lastmod))
def generate_output(self, writer):
path = os.path.join(self.output_path, 'sitemap.{0}'.format(self.format))
pages = self.context['pages'] + self.context['articles'] \
+ [ c for (c, a) in self.context['categories']] \
+ [ t for (t, a) in self.context['tags']] \
+ [ a for (a, b) in self.context['authors']]
self.set_url_wrappers_modification_date(self.context['categories'])
self.set_url_wrappers_modification_date(self.context['tags'])
self.set_url_wrappers_modification_date(self.context['authors'])
for article in self.context['articles']:
pages += article.translations
info('writing {0}'.format(path))
with open(path, 'w', encoding='utf-8') as fd:
if self.format == 'xml':
fd.write(XML_HEADER)
else:
fd.write(TXT_HEADER.format(self.siteurl))
FakePage = collections.namedtuple('FakePage',
['status',
'date',
'url',
'save_as'])
for standard_page_url in ['index.html',
'archives.html',
'tags.html',
'categories.html']:
fake = FakePage(status='published',
date=self.now,
url=standard_page_url,
save_as=standard_page_url)
self.write_url(fake, fd)
# add template pages
# We use items for Py3k compat. .iteritems() otherwise
for path, template_page_url in self.context['TEMPLATE_PAGES'].items():
# don't add duplicate entry for index page
if template_page_url == 'index.html':
continue
fake = FakePage(status='published',
date=self.now,
url=template_page_url,
save_as=template_page_url)
self.write_url(fake, fd)
for page in pages:
self.write_url(page, fd)
if self.format == 'xml':
fd.write(XML_FOOTER)
def get_generators(generators):
return SitemapGenerator
def register():
signals.get_generators.connect(get_generators)

View File

@ -145,7 +145,7 @@ CARDS
margin: 0 0 2px 0;
}
.footer-card:nth-child(1),
.footer-card:nth-child(2) {
.footer-card:nth-child(3) {
margin: 0 0 20px 0;
}
/* Mobile Styles */
@ -184,12 +184,13 @@ CARDS
justify-content: space-between;
}
.footer-card:nth-child(1),
.footer-card:nth-child(2) {
.footer-card:nth-child(2),
.footer-card:nth-child(3) {
flex-grow: 1;
}
.footer-card:nth-child(3) {
.footer-card:nth-child(4) {
text-align: right;
flex-grow: 10;
flex-grow: 8;
}
}

View File

@ -130,8 +130,16 @@
<p>
<a href="{{PATREON_URL}}" rel="noopener" target="_blank">Patreon</a>
</p>
</div>
<div class="footer-card">
<p>
<a href="/legal-notice/" rel="noopener">Legal Notice</a>
<a href="{{LINKEDIN_URL}}" rel="noopener" target="_blank">LinkedIn</a>
</p>
<p>
<a href="{{TWITTER_URL}}" rel="noopener" target="_blank">Twitter</a>
</p>
<p>
<a href="/legal-notice/">Legal Notice</a>
</p>
</div>
<div class="footer-card">