#!/bin/bash set -eu -o pipefail # scripts requires root if [[ ${EUID} -ne 0 ]]; then echo "This script should be run as root. Run with sudo" exit 1 fi readonly RED='\033[31m' readonly GREEN='\033[32m' readonly YELLOW='\033[33m' readonly DONE='\033[m' readonly PASTEBIN="https://paste.cloudron.io" readonly LINE="\n========================================================\n" readonly HELP_MESSAGE=" Cloudron Support and Diagnostics Tool Options: --disable-dnssec Disable DNSSEC --enable-remote-access Enable SSH Remote Access for the Cloudron support team --send-diagnostics Collects server diagnostics and uploads it to ${PASTEBIN} --troubleshoot Dashboard down? Run tests to identify the potential problem --owner-login Login as owner --use-external-dns Forwards all DNS requests to Google (8.8.8.8) and Cloudflare (1.1.1.1) DNS servers --help Show this message " function success() { echo -e "[${GREEN}OK${DONE}]\t${1}" } function info() { echo -e "\t${1}" } function warn() { echo -e "[${YELLOW}WARN${DONE}]\t${1}" } function fail() { echo -e "[${RED}FAIL${DONE}]\t${1}" >&2 } function enable_remote_access() { local -r cloudron_support_public_key="ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGWS+930b8QdzbchGljt3KSljH9wRhYvht8srrtQHdzg support@cloudron.io" local -r ssh_user="cloudron-support" local -r keys_file="/home/cloudron-support/.ssh/authorized_keys" echo -n "Enabling Remote Access for the Cloudron support team..." mkdir -p $(dirname "${keys_file}") # .ssh does not exist sometimes touch "${keys_file}" # required for concat to work if ! grep -q "${cloudron_support_public_key}" "${keys_file}"; then echo -e "\n${cloudron_support_public_key}" >> "${keys_file}" chmod 600 "${keys_file}" chown "${ssh_user}" "${keys_file}" fi echo "Done" } function wait_systemd_service() { local -r service="$1" for i in {1..3}; do ts=$(systemctl show "${service}" -p ActiveEnterTimestamp | sed 's/ActiveEnterTimestamp=//g') start=$(date '+%s' --date="${ts}") now=$(date '+%s') up_time=$(( $now - $start )) (( up_time > 10 )) && return 0 info "Service '${service}' just started $up_time secs ago, checking health again in 10s" sleep 11 done return 1 } function check_host_mysql() { if ! systemctl is-active -q mysql; then info "MySQL is down. Trying to restart MySQL ..." systemctl restart mysql if ! systemctl is-active -q mysql; then fail "MySQL is still down, please investigate the error by inspecting /var/log/mysql/error.log" exit 1 fi fi if ! wait_systemd_service mysql; then fail "MySQL keeps restarting, please investigate the error by inspecting /var/log/mysql/error.log" exit 1 fi success "MySQL is running" } function check_box() { [[ -f /home/yellowtent/box/VERSION ]] && version=$(cat /home/yellowtent/box/VERSION) || version='' if ! systemctl is-active -q box; then info "box v${version} is down. re-running migration script and restarting it ..." /home/yellowtent/box/setup/start.sh systemctl stop box # a restart sometimes doesn't restart, no idea systemctl start box if ! systemctl is-active -q box; then fail "box service is still down, please investigate the error by inspecting /home/yellowtent/platformdata/logs/box.log" exit 1 fi fi if ! wait_systemd_service box; then fail "box service keeps restarting, please investigate the error by inspecting /home/yellowtent/platformdata/logs/box.log" exit 1 fi success "box v${version} is running" } function owner_login() { check_host_mysql >/dev/null local -r owner_username=$(mysql -NB -uroot -ppassword -e "SELECT username FROM box.users WHERE role='owner' AND username IS NOT NULL AND active=1 ORDER BY creationTime LIMIT 1" 2>/dev/null) local -r owner_password=$(pwgen -1s 12) local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null) mysql -NB -uroot -ppassword -e "INSERT INTO box.settings (name, value) VALUES ('ghosts_config', '{\"${owner_username}\":\"${owner_password}\"}') ON DUPLICATE KEY UPDATE name='ghosts_config', value='{\"${owner_username}\":\"${owner_password}\"}'" 2>/dev/null echo "Login at https://my.${dashboard_domain} as ${owner_username} / ${owner_password} . This password may only be used once." } function send_diagnostics() { local -r log="/tmp/cloudron-support.log" echo -n "Generating Cloudron Support stats..." rm -rf $log echo -e $LINE"Linux"$LINE >> $log uname -nar &>> $log echo -e $LINE"Ubuntu"$LINE >> $log lsb_release -a &>> $log echo -e $LINE"Cloudron"$LINE >> $log cloudron_version=$(cat /home/yellowtent/box/VERSION || true) echo -e "Cloudron version: ${cloudron_version}" >> $log dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null || true) echo -e "Dashboard domain: ${dashboard_domain}" >> $log echo -e $LINE"Docker"$LINE >> $log if ! timeout --kill-after 10s 15s docker system info &>> $log 2>&1; then echo -e "Docker (system info) is not responding" >> $log fi if ! timeout --kill-after 10s 15s docker ps -a &>> $log 2>&1; then echo -e "Docker (ps) is not responding" >> $log fi echo -e $LINE"Filesystem stats"$LINE >> $log if ! timeout --kill-after 10s 15s df -h &>> $log 2>&1; then echo -e "df is not responding" >> $log fi echo -e $LINE"Appsdata stats"$LINE >> $log du -hcsL /home/yellowtent/appsdata/* &>> $log || true echo -e $LINE"Boxdata stats"$LINE >> $log du -hcsL /home/yellowtent/boxdata/* &>> $log echo -e $LINE"Backup stats (possibly misleading)"$LINE >> $log du -hcsL /var/backups/* &>> $log || true echo -e $LINE"System daemon status"$LINE >> $log systemctl status --lines=100 box mysql unbound cloudron-syslog nginx collectd docker &>> $log echo -e $LINE"Box logs"$LINE >> $log tail -n 100 /home/yellowtent/platformdata/logs/box.log &>> $log echo -e $LINE"Interface Info"$LINE >> $log ip addr &>> $log echo -e $LINE"Firewall chains"$LINE >> $log iptables -L &>> $log has_ipv6=$(cat /proc/net/if_inet6 >/dev/null 2>&1 && echo "yes" || echo "no") echo -e "IPv6: ${has_ipv6}" >> $log [[ "${has_ipv6}" == "yes" ]] && ip6tables -L &>> $log echo "Done" echo -n "Uploading information..." paste_key=$(curl -X POST ${PASTEBIN}/documents --silent --data-binary "@$log" | python3 -c "import sys, json; print(json.load(sys.stdin)['key'])") echo "Done" echo -e "\nPlease email the following link to support@cloudron.io : ${PASTEBIN}/${paste_key}" } function check_unbound() { if ! systemctl is-active -q unbound; then info "unbound is down. updating root anchor to see if it fixes it" unbound-anchor -a /var/lib/unbound/root.key systemctl restart unbound if ! systemctl is-active -q unbound; then fail "unbound is still down, please investigate the error using 'journalctl -u unbound'" exit 1 fi fi if ! wait_systemd_service unbound; then fail "unbound service keeps restarting, please investigate the error using 'journalctl -u unbound'" exit 1 fi test_resolve=$(dig cloudron.io @127.0.0.1 +short) if [[ -z "test_resolve" ]]; then fail "DNS is not resolving, maybe try forwarding all DNS requests using the --use-external-dns option" exit 1 fi success "unbound is running" } function check_dashboard_cert() { local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null) local -r nginx_conf_file="/home/yellowtent/platformdata/nginx/applications/dashboard/my.${dashboard_domain}.conf" local -r cert_file=$(sed -n -e 's/.*ssl_certificate [[:space:]]\+\(.*\);/\1/p' "${nginx_conf_file}") local -r cert_expiry_date=$(openssl x509 -enddate -noout -in "${cert_file}" | sed -e 's/notAfter=//') if ! openssl x509 -checkend 100 -noout -in "${cert_file}" >/dev/null 2>&1; then fail "Certificate has expired. Certificate expired at ${cert_expiry_date}" local -r task_id=$(mysql -NB -uroot -ppassword -e "SELECT id FROM box.tasks WHERE type='checkCerts' ORDER BY id DESC LIMIT 1" 2>/dev/null) echo -e "\tPlease check /home/yellowtent/platformdata/logs/tasks/${task_id}.log for last cert renewal logs" echo -e "\tCommon issues include expiry of domain's API key OR incoming http port 80 not being open" exit 1 fi } function check_nginx() { local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null) if ! systemctl is-active -q nginx; then fail "nginx is down. Removing extraneous dashboard domain configs ..." # we had a bug where old dashboard domain config file was kept around cd /home/yellowtent/platformdata/nginx/applications/dashboard/ && find . ! -name "my.${dashboard_domain}.conf" -type f -exec rm -f {} + # check if certificates are there. nginx will still start if certs are expired # IFS= makes sure it doesn't trim leading and trailing whitespace # -r prevents interpretation of \ escapes. find /home/yellowtent/platformdata/nginx -type f -name '*.conf' -print0 | while IFS= read -r -d '' conf; do cert_file=$(sed -ne 's/[[:blank:]]\+ssl_certificate[[:blank:]]\+\(.*\);/\1/p' "${conf}") key_file=$(sed -ne 's/[[:blank:]]\+ssl_certificate_key[[:blank:]]\+\(.*\);/\1/p' "${conf}") if [[ -n "${cert_file}" && ! -f "${cert_file}" ]]; then info "${cert_file} does not exist. removing ${conf}" rm -f "${conf}" fi if [[ -n "${key_file}" && ! -f "${key_file}" ]]; then info "${key_file} does not exist. removing ${conf}" rm -f "${conf}" fi done systemctl restart nginx if ! systemctl is-active -q nginx; then fail "nginx is still down, please investigate the error by inspecting 'journalctl -u nginx' and /var/log/nginx/error.log" exit 1 fi fi if ! wait_systemd_service nginx; then fail "nginx service keeps restarting, please investigate the error using 'journalctl -u nginx' and /var/log/nginx/error.log" exit 1 fi success "nginx is running" } # this confirms that https works properly without any proxy (cloudflare) involved function check_dashboard_site_loopback() { local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null) if ! curl --fail -s --resolve "my.${dashboard_domain}:443:127.0.0.1" "https://my.${dashboard_domain}" >/dev/null; then fail "Could not load dashboard website with loopback check" exit 1 fi } function check_node() { expected_node_version="$(sed -ne 's/readonly node_version=\(.*\)/\1/p' /home/yellowtent/box/scripts/installer.sh)" current_node_version="$(node --version | tr -d '\n' | cut -c2-)" # strip trailing newline and 'v' prefix if [[ "${current_node_version}" != "${expected_node_version}" ]]; then fail "node version is incorrect. Expecting ${expected_node_version}. Got ${current_node_version}." echo "You can try the following to fix the problem:" echo " ln -sf /usr/local/node-${expected_node_version}/bin/node /usr/bin/node" echo " ln -sf /usr/local/node-${expected_node_version}/bin/npm /usr/bin/npm" echo " systemctl restart box" exit 1 fi success "node version is correct" } function check_docker() { if ! systemctl is-active -q docker; then info "Docker is down. Trying to restart docker ..." systemctl restart docker if ! systemctl is-active -q docker; then fail "Docker is still down, please investigate the error using 'journalctl -u docker'" exit 1 fi fi if ! wait_systemd_service docker; then fail "Docker keeps restarting, please investigate the error using 'journalctl -u docker'" exit 1 fi success "docker is running" } function check_node() { expected_node_version="$(sed -ne 's/readonly node_version=\(.*\)/\1/p' /home/yellowtent/box/scripts/installer.sh)" if command -v node &> /dev/null; then current_node_version="$(node --version | tr -d '\n' | cut -c2-)" # strip trailing newline and 'v' prefix else current_node_version="" fi if [[ "${current_node_version}" != "${expected_node_version}" ]]; then fail "node version is incorrect. Expecting ${expected_node_version}. Got ${current_node_version}." echo "You can try the following to fix the problem:" echo " ln -sf /usr/local/node-${expected_node_version}/bin/node /usr/bin/node" echo " ln -sf /usr/local/node-${expected_node_version}/bin/npm /usr/bin/npm" echo " systemctl restart box" exit 1 fi success "node version is correct" } function check_dashboard_site_domain() { local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null) local -r domain_provider=$(mysql -NB -uroot -ppassword -e "SELECT provider FROM box.domains WHERE domain='${dashboard_domain}'" 2>/dev/null) # TODO: check ipv4 and ipv6 if ! output=$(curl --fail -s https://my.${dashboard_domain}); then fail "Could not load dashboard domain." if [[ "${domain_provider}" == "cloudflare" ]]; then echo "Maybe cloudflare proxying is not working. Delete the domain in Cloudflare dashboard and re-add it. This sometimes re-establishes the proxying" else echo "Hairpin NAT is not working. Please check if your router supports it" fi exit 1 fi if ! echo $output | grep -q "Cloudron Dashboard"; then fail "https://my.${dashboard_domain} is not the dashboard domain. Check if DNS is set properly to this server" host my.${dashboard_domain} 127.0.0.1 # could also result in cloudflare exit 1 fi success "Dashboard is reachable via domain name" } function check_expired_domain() { local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null) if ! command -v whois &> /dev/null; then info "Domain ${dashboard_domain} expiry check skipped because whois is not installed. Run 'apt install whois' to check" exit 0 fi local -r expdate=$(whois ${dashboard_domain} | egrep -i 'Expiration Date:|Expires on|Expiry Date:' | head -1 | awk '{print $NF}') if [[ -z "${expdate}" ]]; then warn "Domain ${dashboard_domain} expiry check skipped because whois does not have this information" exit 0 fi local -r expdate_secs=$(date -d"$expdate" +%s) local -r curdate_secs="$(date +%s)" if (( curdate_secs > expdate_secs )); then fail "Domain ${dashboard_domain} appears to be expired" exit 1 fi success "Domain ${dashboard_domain} is valid and has not expired" } function use_external_dns() { local -r conf_file="/etc/unbound/unbound.conf.d/forward-everything.conf" info "To remove the forwarding, please delete $conf_file and 'systemctl restart unbound'" cat > $conf_file < $conf_file <