cloudron-box/setup/start.sh

#!/bin/bash

set -eu -o pipefail

# This script is run after the box code is switched. This means that this script
# should pretty much always succeed. No network logic/download code here.

function log() {
  echo -e "$(date +'%Y-%m-%dT%H:%M:%S')" "==> start: $1"
}

log "Cloudron Start"

readonly USER="yellowtent"
readonly HOME_DIR="/home/${USER}"
readonly BOX_SRC_DIR="${HOME_DIR}/box"
readonly PLATFORM_DATA_DIR="${HOME_DIR}/platformdata"
readonly APPS_DATA_DIR="${HOME_DIR}/appsdata"
readonly BOX_DATA_DIR="${HOME_DIR}/boxdata/box"
readonly MAIL_DATA_DIR="${HOME_DIR}/boxdata/mail"

readonly script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
readonly json="$(realpath ${script_dir}/../node_modules/.bin/json)"
readonly ubuntu_version=$(lsb_release -rs)

cp -f "${script_dir}/../scripts/cloudron-support" /usr/bin/cloudron-support
cp -f "${script_dir}/../scripts/cloudron-translation-update" /usr/bin/cloudron-translation-update

# this needs to match the cloudron/base:2.0.0 gid
if ! getent group media; then
    addgroup --gid 500 --system media
fi

log "Configuring docker"
cp "${script_dir}/start/docker-cloudron-app.apparmor" /etc/apparmor.d/docker-cloudron-app
systemctl enable apparmor
systemctl restart apparmor

usermod ${USER} -a -G docker
# unbound (which starts after box code) relies on this interface to exist. dockerproxy also relies on this.
docker network create --subnet=172.18.0.0/16 --ip-range=172.18.0.0/20 cloudron || true

mkdir -p "${BOX_DATA_DIR}"
mkdir -p "${APPS_DATA_DIR}"
mkdir -p "${MAIL_DATA_DIR}"

# keep these in sync with paths.js
log "Ensuring directories"

mkdir -p "${PLATFORM_DATA_DIR}/graphite"
mkdir -p "${PLATFORM_DATA_DIR}/mysql"
mkdir -p "${PLATFORM_DATA_DIR}/postgresql"
mkdir -p "${PLATFORM_DATA_DIR}/mongodb"
mkdir -p "${PLATFORM_DATA_DIR}/redis"
mkdir -p "${PLATFORM_DATA_DIR}/addons/mail/banner" \
         "${PLATFORM_DATA_DIR}/addons/mail/dkim"
mkdir -p "${PLATFORM_DATA_DIR}/collectd/collectd.conf.d"
mkdir -p "${PLATFORM_DATA_DIR}/logrotate.d"
mkdir -p "${PLATFORM_DATA_DIR}/acme"
mkdir -p "${PLATFORM_DATA_DIR}/backup"
mkdir -p "${PLATFORM_DATA_DIR}/logs/backup" \
         "${PLATFORM_DATA_DIR}/logs/updater" \
         "${PLATFORM_DATA_DIR}/logs/tasks" \
         "${PLATFORM_DATA_DIR}/logs/crash" \
         "${PLATFORM_DATA_DIR}/logs/collectd"
mkdir -p "${PLATFORM_DATA_DIR}/update"
mkdir -p "${PLATFORM_DATA_DIR}/sftp/ssh" # sftp keys
mkdir -p "${PLATFORM_DATA_DIR}/firewall"

# ensure backups folder exists and is writeable
mkdir -p /var/backups
chmod 777 /var/backups

log "Configuring journald"
sed -e "s/^#SystemMaxUse=.*$/SystemMaxUse=100M/" \
    -e "s/^#ForwardToSyslog=.*$/ForwardToSyslog=no/" \
    -i /etc/systemd/journald.conf

# When rotating logs, systemd kills journald too soon sometimes
# See https://github.com/systemd/systemd/issues/1353 (this is upstream default)
sed -e "s/^WatchdogSec=.*$/WatchdogSec=3min/" \
    -i /lib/systemd/system/systemd-journald.service

usermod -a -G systemd-journal ${USER} # Give user access to system logs
if [[ ! -d /var/log/journal ]]; then # in some images, this directory is not created making system log to /run/systemd instead
    mkdir -p /var/log/journal
    chown root:systemd-journal /var/log/journal
    chmod g+s /var/log/journal  # sticky bit for group propagation
fi
systemctl daemon-reload
systemctl restart systemd-journald

# Give user access to nginx logs (uses adm group)
usermod -a -G adm ${USER}

log "Setting up unbound"
# DO uses Google nameservers by default. This causes RBL queries to fail (host 2.0.0.127.zen.spamhaus.org)
# We do not use dnsmasq because it is not a recursive resolver and defaults to the value in the interfaces file (which is Google DNS!)
# We listen on 0.0.0.0 because there is no way control ordering of docker (which creates the 172.18.0.0/16) and unbound
# If IP6 is not enabled, dns queries seem to fail on some hosts. -s returns false if file missing or 0 size
ip6=$([[ -s /proc/net/if_inet6 ]] && echo "yes" || echo "no")
cp -f "${script_dir}/start/unbound.conf" /etc/unbound/unbound.conf.d/cloudron-network.conf
# update the root anchor after a out-of-disk-space situation (see #269)
unbound-anchor -a /var/lib/unbound/root.key

log "Adding systemd services"
cp -r "${script_dir}/start/systemd/." /etc/systemd/system/
[[ "${ubuntu_version}" == "16.04" ]] && sed -e 's/MemoryMax/MemoryLimit/g' -i /etc/systemd/system/box.service
[[ "${ubuntu_version}" == "16.04" ]] && sed -e 's/Type=notify/Type=simple/g' -i /etc/systemd/system/unbound.service
systemctl daemon-reload
systemctl enable --now cloudron-syslog
systemctl enable unbound
systemctl enable box
systemctl enable cloudron-firewall
systemctl enable --now cloudron-disable-thp

# update firewall rules
systemctl restart cloudron-firewall

# For logrotate
systemctl enable --now cron

# ensure unbound runs
systemctl restart unbound

# ensure cloudron-syslog runs
systemctl restart cloudron-syslog

log "Configuring sudoers"
rm -f /etc/sudoers.d/${USER}
cp "${script_dir}/start/sudoers" /etc/sudoers.d/${USER}

log "Configuring collectd"
rm -rf /etc/collectd /var/log/collectd.log
ln -sfF "${PLATFORM_DATA_DIR}/collectd" /etc/collectd
cp "${script_dir}/start/collectd/collectd.conf" "${PLATFORM_DATA_DIR}/collectd/collectd.conf"
if [[ "${ubuntu_version}" == "20.04" ]]; then
    # https://bugs.launchpad.net/ubuntu/+source/collectd/+bug/1872281
    if ! grep -q LD_PRELOAD /etc/default/collectd; then
        echo -e "\nLD_PRELOAD=/usr/lib/python3.8/config-3.8-x86_64-linux-gnu/libpython3.8.so" >> /etc/default/collectd
    fi
fi
systemctl restart collectd

log "Configuring logrotate"
if ! grep -q "^include ${PLATFORM_DATA_DIR}/logrotate.d" /etc/logrotate.conf; then
    echo -e "\ninclude ${PLATFORM_DATA_DIR}/logrotate.d\n" >> /etc/logrotate.conf
fi
rm -f "${PLATFORM_DATA_DIR}/logrotate.d/"*
cp "${script_dir}/start/logrotate/"* "${PLATFORM_DATA_DIR}/logrotate.d/"

# logrotate files have to be owned by root, this is here to fixup existing installations where we were resetting the owner to yellowtent
chown root:root "${PLATFORM_DATA_DIR}/logrotate.d/"

log "Adding motd message for admins"
cp "${script_dir}/start/cloudron-motd" /etc/update-motd.d/92-cloudron

log "Configuring nginx"
# link nginx config to system config
unlink /etc/nginx 2>/dev/null || rm -rf /etc/nginx
ln -s "${PLATFORM_DATA_DIR}/nginx" /etc/nginx
mkdir -p "${PLATFORM_DATA_DIR}/nginx/applications"
mkdir -p "${PLATFORM_DATA_DIR}/nginx/cert"
cp "${script_dir}/start/nginx/nginx.conf" "${PLATFORM_DATA_DIR}/nginx/nginx.conf"
cp "${script_dir}/start/nginx/mime.types" "${PLATFORM_DATA_DIR}/nginx/mime.types"
if ! grep -q "^Restart=" /etc/systemd/system/multi-user.target.wants/nginx.service; then
    # default nginx service file does not restart on crash
    echo -e "\n[Service]\nRestart=always\n" >> /etc/systemd/system/multi-user.target.wants/nginx.service
fi

# worker_rlimit_nofile in nginx config can be max this number
mkdir -p /etc/systemd/system/nginx.service.d
if ! grep -q "^LimitNOFILE=" /etc/systemd/system/nginx.service.d/cloudron.conf; then
    echo -e "[Service]\nLimitNOFILE=16384\n" > /etc/systemd/system/nginx.service.d/cloudron.conf
fi

systemctl daemon-reload
systemctl start nginx

# restart mysql to make sure it has latest config
if [[ ! -f /etc/mysql/mysql.cnf ]] || ! diff -q "${script_dir}/start/mysql.cnf" /etc/mysql/mysql.cnf >/dev/null; then
    # wait for all running mysql jobs
    cp "${script_dir}/start/mysql.cnf" /etc/mysql/mysql.cnf
    while true; do
        if ! systemctl list-jobs | grep mysql; then break; fi
        log "Waiting for mysql jobs..."
        sleep 1
    done
    log "Stopping mysql"
    systemctl stop mysql
    while mysqladmin ping 2>/dev/null; do
        log "Waiting for mysql to stop..."
        sleep 1
    done
fi

# the start/stop of mysql is separate to make sure it got reloaded with latest config and it's up and running before we start the new box code
# when using 'system restart mysql', it seems to restart much later and the box code loses connection during platform startup (dangerous!)
log "Starting mysql"
systemctl start mysql
while ! mysqladmin ping 2>/dev/null; do
    log "Waiting for mysql to start..."
    sleep 1
done

readonly mysql_root_password="password"
mysqladmin -u root -ppassword password password # reset default root password
if [[ "${ubuntu_version}" == "20.04" ]]; then
    # mysql 8 added a new caching_sha2_password scheme which mysqljs does not support
    mysql -u root -p${mysql_root_password} -e "ALTER USER 'root'@'localhost' IDENTIFIED WITH mysql_native_password BY '${mysql_root_password}';"
fi
mysql -u root -p${mysql_root_password} -e 'CREATE DATABASE IF NOT EXISTS box'

# set HOME explicity, because it's not set when the installer calls it. this is done because
# paths.js uses this env var and some of the migrate code requires box code
log "Migrating data"
cd "${BOX_SRC_DIR}"
if ! HOME=${HOME_DIR} BOX_ENV=cloudron DATABASE_URL=mysql://root:${mysql_root_password}@127.0.0.1/box "${BOX_SRC_DIR}/node_modules/.bin/db-migrate" up; then
    log "DB migration failed"
    exit 1
fi

rm -f /etc/cloudron/cloudron.conf

log "Changing ownership"
# note, change ownership after db migrate. this allow db migrate to move files around as root and then we can fix it up here
# be careful of what is chown'ed here. subdirs like mysql,redis etc are owned by the containers and will stop working if perms change
chown -R "${USER}" /etc/cloudron
chown "${USER}:${USER}" -R "${PLATFORM_DATA_DIR}/nginx" "${PLATFORM_DATA_DIR}/collectd" "${PLATFORM_DATA_DIR}/addons" "${PLATFORM_DATA_DIR}/acme" "${PLATFORM_DATA_DIR}/backup" "${PLATFORM_DATA_DIR}/logs" "${PLATFORM_DATA_DIR}/update" "${PLATFORM_DATA_DIR}/sftp" "${PLATFORM_DATA_DIR}/firewall"
chown "${USER}:${USER}" "${PLATFORM_DATA_DIR}/INFRA_VERSION" 2>/dev/null || true
chown "${USER}:${USER}" "${PLATFORM_DATA_DIR}"
chown "${USER}:${USER}" "${APPS_DATA_DIR}"

chown "${USER}:${USER}" -R "${BOX_DATA_DIR}"
# do not chown the boxdata/mail directory entirely; dovecot gets upset
chown "${USER}:${USER}" "${MAIL_DATA_DIR}"

log "Starting Cloudron"
systemctl start box

sleep 2 # give systemd sometime to start the processes

log "Almost done"