Files
cloudron-box/setup/start.sh
Girish Ramakrishnan aa71a734b9 Fix issue where mysql was restarting after new box code has started up
not 100% sure because of missing log timestamps, but mysql restarts after the box
has started up. As seen from logs below, we try to mark the apps for restart on
platform update. But this failed because mysql was restarting at that time.
This ended up with e2e test failing.

box:apps restartAppsUsingAddons: marking nc4801.autoupdatetest.domain.io for restart
box:apps restartAppsUsingAddons: error marking nc4801.autoupdatetest.domain.io for restart: {"name":"BoxError","reason":"Database Error","details":{"fatal":true,"code":"PROTOCOL_CONNECTION_LOST"},"message":"Connection lost: The server closed the connection.","nestedError":{"fatal":true,"code":"PROTOCOL_CONNECTION_LOST"}}
box:apps restartAppsUsingAddons: marking wekan1398.autoupdatetest.domain.io for restart
box:database Connection 51 error: Connection lost: The server closed the connection. PROTOCOL_CONNECTION_LOST
box:database Connection 52 error: Connection lost: The server closed the connection. PROTOCOL_CONNECTION_LOST
Box GET /api/v1/cloudron/status 500 Internal Server Error connect ECONNREFUSED 127.0.0.1:3306 41.251 ms - 217
2021-03-02 23:27:31 -08:00

265 lines
11 KiB
Bash
Executable File

#!/bin/bash
set -eu -o pipefail
# This script is run after the box code is switched. This means that this script
# should pretty much always succeed. No network logic/download code here.
function log() {
echo -e "$(date +'%Y-%m-%dT%H:%M:%S')" "==> start: $1"
}
log "Cloudron Start"
readonly USER="yellowtent"
readonly HOME_DIR="/home/${USER}"
readonly BOX_SRC_DIR="${HOME_DIR}/box"
readonly PLATFORM_DATA_DIR="${HOME_DIR}/platformdata" # platform data
readonly APPS_DATA_DIR="${HOME_DIR}/appsdata" # app data
readonly BOX_DATA_DIR="${HOME_DIR}/boxdata" # box data
readonly script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
readonly json="$(realpath ${script_dir}/../node_modules/.bin/json)"
readonly ubuntu_version=$(lsb_release -rs)
cp -f "${script_dir}/../scripts/cloudron-support" /usr/bin/cloudron-support
cp -f "${script_dir}/../scripts/cloudron-translation-update" /usr/bin/cloudron-translation-update
# this needs to match the cloudron/base:2.0.0 gid
if ! getent group media; then
addgroup --gid 500 --system media
fi
log "Configuring docker"
cp "${script_dir}/start/docker-cloudron-app.apparmor" /etc/apparmor.d/docker-cloudron-app
systemctl enable apparmor
systemctl restart apparmor
usermod ${USER} -a -G docker
# unbound (which starts after box code) relies on this interface to exist. dockerproxy also relies on this.
docker network create --subnet=172.18.0.0/16 --ip-range=172.18.0.0/20 cloudron || true
mkdir -p "${BOX_DATA_DIR}"
mkdir -p "${APPS_DATA_DIR}"
# keep these in sync with paths.js
log "Ensuring directories"
mkdir -p "${PLATFORM_DATA_DIR}/graphite"
mkdir -p "${PLATFORM_DATA_DIR}/mysql"
mkdir -p "${PLATFORM_DATA_DIR}/postgresql"
mkdir -p "${PLATFORM_DATA_DIR}/mongodb"
mkdir -p "${PLATFORM_DATA_DIR}/redis"
mkdir -p "${PLATFORM_DATA_DIR}/addons/mail/banner"
mkdir -p "${PLATFORM_DATA_DIR}/collectd/collectd.conf.d"
mkdir -p "${PLATFORM_DATA_DIR}/logrotate.d"
mkdir -p "${PLATFORM_DATA_DIR}/acme"
mkdir -p "${PLATFORM_DATA_DIR}/backup"
mkdir -p "${PLATFORM_DATA_DIR}/logs/backup" \
"${PLATFORM_DATA_DIR}/logs/updater" \
"${PLATFORM_DATA_DIR}/logs/tasks" \
"${PLATFORM_DATA_DIR}/logs/crash" \
"${PLATFORM_DATA_DIR}/logs/collectd"
mkdir -p "${PLATFORM_DATA_DIR}/update"
mkdir -p "${BOX_DATA_DIR}/appicons"
mkdir -p "${BOX_DATA_DIR}/firewall"
mkdir -p "${BOX_DATA_DIR}/profileicons"
mkdir -p "${BOX_DATA_DIR}/certs"
mkdir -p "${BOX_DATA_DIR}/acme" # acme keys
mkdir -p "${BOX_DATA_DIR}/mail/dkim"
mkdir -p "${BOX_DATA_DIR}/well-known" # .well-known documents
mkdir -p "${BOX_DATA_DIR}/sftp/ssh" # sftp keys
# ensure backups folder exists and is writeable
mkdir -p /var/backups
chmod 777 /var/backups
log "Configuring journald"
sed -e "s/^#SystemMaxUse=.*$/SystemMaxUse=100M/" \
-e "s/^#ForwardToSyslog=.*$/ForwardToSyslog=no/" \
-i /etc/systemd/journald.conf
# When rotating logs, systemd kills journald too soon sometimes
# See https://github.com/systemd/systemd/issues/1353 (this is upstream default)
sed -e "s/^WatchdogSec=.*$/WatchdogSec=3min/" \
-i /lib/systemd/system/systemd-journald.service
# Give user access to system logs
usermod -a -G systemd-journal ${USER}
mkdir -p /var/log/journal # in some images, this directory is not created making system log to /run/systemd instead
chown root:systemd-journal /var/log/journal
systemctl daemon-reload
systemctl restart systemd-journald
setfacl -n -m u:${USER}:r /var/log/journal/*/system.journal
# Give user access to nginx logs (uses adm group)
usermod -a -G adm ${USER}
log "Setting up unbound"
# DO uses Google nameservers by default. This causes RBL queries to fail (host 2.0.0.127.zen.spamhaus.org)
# We do not use dnsmasq because it is not a recursive resolver and defaults to the value in the interfaces file (which is Google DNS!)
# We listen on 0.0.0.0 because there is no way control ordering of docker (which creates the 172.18.0.0/16) and unbound
# If IP6 is not enabled, dns queries seem to fail on some hosts. -s returns false if file missing or 0 size
ip6=$([[ -s /proc/net/if_inet6 ]] && echo "yes" || echo "no")
cp -f "${script_dir}/start/unbound.conf" /etc/unbound/unbound.conf.d/cloudron-network.conf
# update the root anchor after a out-of-disk-space situation (see #269)
unbound-anchor -a /var/lib/unbound/root.key
log "Adding systemd services"
cp -r "${script_dir}/start/systemd/." /etc/systemd/system/
[[ "${ubuntu_version}" == "16.04" ]] && sed -e 's/MemoryMax/MemoryLimit/g' -i /etc/systemd/system/box.service
systemctl daemon-reload
systemctl enable --now cloudron-syslog
systemctl enable unbound
systemctl enable box
systemctl enable cloudron-firewall
systemctl enable --now cloudron-disable-thp
# update firewall rules
systemctl restart cloudron-firewall
# For logrotate
systemctl enable --now cron
# ensure unbound runs
systemctl restart unbound
# ensure cloudron-syslog runs
systemctl restart cloudron-syslog
log "Configuring sudoers"
rm -f /etc/sudoers.d/${USER}
cp "${script_dir}/start/sudoers" /etc/sudoers.d/${USER}
log "Configuring collectd"
rm -rf /etc/collectd /var/log/collectd.log
ln -sfF "${PLATFORM_DATA_DIR}/collectd" /etc/collectd
cp "${script_dir}/start/collectd/collectd.conf" "${PLATFORM_DATA_DIR}/collectd/collectd.conf"
if [[ "${ubuntu_version}" == "20.04" ]]; then
# https://bugs.launchpad.net/ubuntu/+source/collectd/+bug/1872281
if ! grep -q LD_PRELOAD /etc/default/collectd; then
echo -e "\nLD_PRELOAD=/usr/lib/python3.8/config-3.8-x86_64-linux-gnu/libpython3.8.so" >> /etc/default/collectd
fi
fi
systemctl restart collectd
log "Configuring logrotate"
if ! grep -q "^include ${PLATFORM_DATA_DIR}/logrotate.d" /etc/logrotate.conf; then
echo -e "\ninclude ${PLATFORM_DATA_DIR}/logrotate.d\n" >> /etc/logrotate.conf
fi
rm -f "${PLATFORM_DATA_DIR}/logrotate.d/"*
cp "${script_dir}/start/logrotate/"* "${PLATFORM_DATA_DIR}/logrotate.d/"
# logrotate files have to be owned by root, this is here to fixup existing installations where we were resetting the owner to yellowtent
chown root:root "${PLATFORM_DATA_DIR}/logrotate.d/"
log "Adding motd message for admins"
cp "${script_dir}/start/cloudron-motd" /etc/update-motd.d/92-cloudron
log "Configuring nginx"
# link nginx config to system config
unlink /etc/nginx 2>/dev/null || rm -rf /etc/nginx
ln -s "${PLATFORM_DATA_DIR}/nginx" /etc/nginx
mkdir -p "${PLATFORM_DATA_DIR}/nginx/applications"
mkdir -p "${PLATFORM_DATA_DIR}/nginx/cert"
cp "${script_dir}/start/nginx/nginx.conf" "${PLATFORM_DATA_DIR}/nginx/nginx.conf"
cp "${script_dir}/start/nginx/mime.types" "${PLATFORM_DATA_DIR}/nginx/mime.types"
if ! grep -q "^Restart=" /etc/systemd/system/multi-user.target.wants/nginx.service; then
# default nginx service file does not restart on crash
echo -e "\n[Service]\nRestart=always\n" >> /etc/systemd/system/multi-user.target.wants/nginx.service
fi
# worker_rlimit_nofile in nginx config can be max this number
mkdir -p /etc/systemd/system/nginx.service.d
if ! grep -q "^LimitNOFILE=" /etc/systemd/system/nginx.service.d/cloudron.conf; then
echo -e "[Service]\nLimitNOFILE=16384\n" > /etc/systemd/system/nginx.service.d/cloudron.conf
fi
systemctl daemon-reload
systemctl start nginx
# restart mysql to make sure it has latest config
if [[ ! -f /etc/mysql/mysql.cnf ]] || ! diff -q "${script_dir}/start/mysql.cnf" /etc/mysql/mysql.cnf >/dev/null; then
# wait for all running mysql jobs
cp "${script_dir}/start/mysql.cnf" /etc/mysql/mysql.cnf
while true; do
if ! systemctl list-jobs | grep mysql; then break; fi
log "Waiting for mysql jobs..."
sleep 1
done
log "Stopping mysql"
systemctl stop mysql
while mysqladmin ping 2>/dev/null; do
log "Waiting for mysql to stop..."
sleep 1
done
fi
# the start/stop of mysql is separate to make sure it got reloaded with latest config and it's up and running before we start the new box code
# when using 'system restart mysql', it seems to restart much later and the box code loses connection during platform startup (dangerous!)
log "Starting mysql"
systemctl start mysql
while ! mysqladmin ping 2>/dev/null; do
log "Waiting for mysql to start..."
sleep 1
done
readonly mysql_root_password="password"
mysqladmin -u root -ppassword password password # reset default root password
if [[ "${ubuntu_version}" == "20.04" ]]; then
# mysql 8 added a new caching_sha2_password scheme which mysqljs does not support
mysql -u root -p${mysql_root_password} -e "ALTER USER 'root'@'localhost' IDENTIFIED WITH mysql_native_password BY '${mysql_root_password}';"
fi
mysql -u root -p${mysql_root_password} -e 'CREATE DATABASE IF NOT EXISTS box'
# set HOME explicity, because it's not set when the installer calls it. this is done because
# paths.js uses this env var and some of the migrate code requires box code
log "Migrating data"
cd "${BOX_SRC_DIR}"
if ! HOME=${HOME_DIR} BOX_ENV=cloudron DATABASE_URL=mysql://root:${mysql_root_password}@127.0.0.1/box "${BOX_SRC_DIR}/node_modules/.bin/db-migrate" up; then
log "DB migration failed"
exit 1
fi
rm -f /etc/cloudron/cloudron.conf
if [[ ! -f "${BOX_DATA_DIR}/dhparams.pem" ]]; then
log "Generating dhparams (takes forever)"
openssl dhparam -out "${BOX_DATA_DIR}/dhparams.pem" 2048
cp "${BOX_DATA_DIR}/dhparams.pem" "${PLATFORM_DATA_DIR}/addons/mail/dhparams.pem"
else
cp "${BOX_DATA_DIR}/dhparams.pem" "${PLATFORM_DATA_DIR}/addons/mail/dhparams.pem"
fi
if [[ ! -f "${BOX_DATA_DIR}/sftp/ssh/ssh_host_rsa_key" ]]; then
# the key format in Ubuntu 20 changed, so we create keys in legacy format. for older ubuntu, just re-use the host keys
# see https://github.com/proftpd/proftpd/issues/793
if [[ "${ubuntu_version}" == "20.04" ]]; then
ssh-keygen -m PEM -t rsa -f "${BOX_DATA_DIR}/sftp/ssh/ssh_host_rsa_key" -q -N ""
else
cp /etc/ssh/ssh_host_rsa_key* ${BOX_DATA_DIR}/sftp/ssh
fi
fi
log "Changing ownership"
# be careful of what is chown'ed here. subdirs like mysql,redis etc are owned by the containers and will stop working if perms change
chown -R "${USER}" /etc/cloudron
chown "${USER}:${USER}" -R "${PLATFORM_DATA_DIR}/nginx" "${PLATFORM_DATA_DIR}/collectd" "${PLATFORM_DATA_DIR}/addons" "${PLATFORM_DATA_DIR}/acme" "${PLATFORM_DATA_DIR}/backup" "${PLATFORM_DATA_DIR}/logs" "${PLATFORM_DATA_DIR}/update"
chown "${USER}:${USER}" "${PLATFORM_DATA_DIR}/INFRA_VERSION" 2>/dev/null || true
chown "${USER}:${USER}" "${PLATFORM_DATA_DIR}"
chown "${USER}:${USER}" "${APPS_DATA_DIR}"
# do not chown the boxdata/mail directory; dovecot gets upset
chown "${USER}:${USER}" "${BOX_DATA_DIR}"
find "${BOX_DATA_DIR}" -mindepth 1 -maxdepth 1 -not -path "${BOX_DATA_DIR}/mail" -exec chown -R "${USER}:${USER}" {} \;
chown "${USER}:${USER}" "${BOX_DATA_DIR}/mail"
chown "${USER}:${USER}" -R "${BOX_DATA_DIR}/mail/dkim" # this is owned by box currently since it generates the keys
log "Starting Cloudron"
systemctl start box
sleep 2 # give systemd sometime to start the processes
log "Almost done"