#!/bin/bash

set -eu -o pipefail

# scripts requires root
if [[ ${EUID} -ne 0 ]]; then
    echo "This script should be run as root. Run with sudo"
    exit 1
fi

readonly RED='\033[31m'
readonly GREEN='\033[32m'
readonly YELLOW='\033[33m'
readonly BOLD='\033[1m'
readonly DONE='\033[m'

readonly PASTEBIN="https://paste.cloudron.io"
readonly LINE="\n========================================================\n"
readonly HELP_MESSAGE="
 Cloudron Support and Diagnostics Tool

 See https://docs.cloudron.io/troubleshooting for more information on troubleshooting.

 Options:
   --apply-db-migrations      Applies all pending DB migrations
   --check-db-migrations      Checks if the DB migrations are up to date
   --check-services           Checks if services/addons are running and healthy.
   --disable-dnssec           Disable DNSSEC
   --disable-ipv6             Disable IPv6. Use --reenable-ipv6 to re-enable.
   --enable-remote-support    Enable SSH Remote Access for the Cloudron support team
   --disable-remote-support   Disable SSH Remote Access for the Cloudron support team
   --fix-docker-version       Ensures the correct docker version is installed
   --owner-login              Login as owner
   --recreate-containers      Deletes all existing containers and recreates them without loss of data
   --recreate-docker          Deletes docker storage (containers and images) and recreates it without loss of data
   --send-diagnostics         Collects server diagnostics and uploads it to ${PASTEBIN}
   --troubleshoot             Dashboard down? Run tests to identify the potential problem
   --unbound-forward-dns      Unbound is the internal DNS server used for recursive DNS queries. This is only needed
                              if your network does not allow outbound DNS requests.
                              Options are 'google', 'cloudflare' or comma separated custom server (e.g 8.8.8.8,1.1.1.1).
   --help                     Show this message
"

function success() {
    echo -e "[${GREEN}OK${DONE}]\t${1}"
}

function info() {
    echo -e "\t${1}"
}

function warn() {
    echo -e "[${YELLOW}WARN${DONE}]\t${1}"
}

function fail() {
    echo -e "[${RED}FAIL${DONE}]\t${1}" >&2
}

function enable_remote_support() {
    local -r cloudron_support_public_key="ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGWS+930b8QdzbchGljt3KSljH9wRhYvht8srrtQHdzg support@cloudron.io"
    local -r ssh_user="cloudron-support"
    local -r keys_file="/home/cloudron-support/.ssh/authorized_keys"

    echo -e "
================= ${BOLD}SSH ACCESS DISCLAIMER${DONE} =================

By granting us SSH or remote access to your systems, you acknowledge and
agree to the following:

1. ${BOLD}Access to Customer Data${DONE}
   Our team may have visibility into customer data during the course of
   investigating or resolving issues. While we take all reasonable steps to
   respect your privacy and handle data securely, you acknowledge that such
   access may occur as part of the support process.

2. ${BOLD}No Liability for Data Loss or System Changes${DONE}
   Although we strive to exercise caution and due diligence, you acknowledge
   and accept that:
     ${BOLD}-${DONE} There is an inherent risk of data loss, corruption, or system
       disruption during troubleshooting or configuration changes.
     ${BOLD}-${DONE} We shall not be held liable for any loss of data, service
       downtime, or unintended consequences arising from our access or any
       actions taken during the support process.

3. ${BOLD}Backups and Safeguards${DONE}
   You are solely responsible for ensuring that up-to-date and complete
   backups of your systems and data exist prior to granting us access.

4. ${BOLD}Local Changes and Auto-Updates${DONE}
   Your system may receive automatic updates as part of regular maintenance or
   feature releases. Any local modifications or patches applied during support
   may be overwritten by future updates. ${BOLD}-${DONE} It is the customer's
   responsibility to track such changes and reapply them if necessary, or to
   coordinate with us for permanent integration where applicable.

5. ${BOLD}Consent to Proceed${DONE}
   By providing access, you confirm that you have read, understood, and agreed
   to the terms above and expressly authorize us to proceed with accessing
   your systems for support purposes.

=======================================================================
"
    read -p "Do you accept these terms? [y/N] " choice
    choice=${choice:-n}

    [[ ! $choice =~ ^[Yy]$ ]] && exit 1

    echo -n "Terms accepted. Enabling Remote Access for the Cloudron support team..."
    mkdir -p $(dirname "${keys_file}")       # .ssh does not exist sometimes
    touch "${keys_file}"                # required for concat to work
    if ! grep -q "${cloudron_support_public_key}" "${keys_file}"; then
        echo -e "\n${cloudron_support_public_key}" >> "${keys_file}"
        chmod 600 "${keys_file}"
        chown "${ssh_user}" "${keys_file}"
    fi

    echo "Done"
}

function disable_remote_support() {
    local -r cloudron_support_public_key="ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIGWS+930b8QdzbchGljt3KSljH9wRhYvht8srrtQHdzg support@cloudron.io"
    local -r ssh_user="cloudron-support"
    local -r keys_file="/home/cloudron-support/.ssh/authorized_keys"

    echo -n "Disabling Remote Access for the Cloudron support team..."
    mkdir -p $(dirname "${keys_file}")       # .ssh does not exist sometimes
    touch "${keys_file}"                     # required for del below to work
    if grep -q "${cloudron_support_public_key}" "${keys_file}"; then
        sed "/${cloudron_support_public_key}/d" -i "${keys_file}"
    fi

    echo "Done"
}

function wait_systemd_service() {
    local -r service="$1"

    for i in {1..3}; do
        ts=$(systemctl show "${service}" -p ActiveEnterTimestamp | sed 's/ActiveEnterTimestamp=//g')
        start=$(date '+%s' --date="${ts}")
        now=$(date '+%s')

        up_time=$(( $now - $start ))
        (( up_time > 10 )) && return 0

        info "Service '${service}' just started $up_time secs ago, checking health again in 10s"
        sleep 11
    done

    return 1
}

function check_host_mysql() {
    if ! systemctl is-active -q mysql; then
        info "MySQL is down. Trying to restart MySQL ..."

        systemctl restart mysql

        if ! systemctl is-active -q mysql; then
            fail "MySQL is still down, please investigate the error by inspecting /var/log/mysql/error.log"
            exit 1
        fi
    fi

    if ! wait_systemd_service mysql; then
        fail "MySQL keeps restarting, please investigate the error by inspecting /var/log/mysql/error.log"
        exit 1
    fi

    success "MySQL is running"
}

function check_box() {
    [[ -f /home/yellowtent/box/VERSION ]] && version=$(cat /home/yellowtent/box/VERSION) || version='<unknown>'

    if ! systemctl is-active -q box; then
        info "box v${version} is down. re-running migration script and restarting it ..."

        /home/yellowtent/box/setup/start.sh
        systemctl stop box # a restart sometimes doesn't restart, no idea
        systemctl start box

        if ! systemctl is-active -q box; then
            fail "box service is still down, please investigate the error by inspecting /home/yellowtent/platformdata/logs/box.log"
            exit 1
        fi
    fi

    if ! wait_systemd_service box; then
        fail "box service keeps restarting, please investigate the error by inspecting /home/yellowtent/platformdata/logs/box.log"
        exit 1
    fi

    success "box v${version} is running"
}

function check_netplan() {
    if ! output=$(netplan get all 2>/dev/null); then
        fail "netplan is not working"
        exit 1
    fi

    if [[ -z "${output}" ]]; then
        warn "netplan configuration is empty. this might be OK depending on your networking setup"
    else
        success "netplan is good"
    fi
}

function owner_login() {
    check_host_mysql >/dev/null

    local -r owner_username=$(mysql -NB -uroot -ppassword -e "SELECT username FROM box.users WHERE role='owner' AND username IS NOT NULL AND active=1 ORDER BY creationTime LIMIT 1" 2>/dev/null)
    local -r owner_password=$(pwgen -1s 12)
    local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)
    mysql -NB -uroot -ppassword -e "INSERT INTO box.settings (name, value) VALUES ('ghosts_config', '{\"${owner_username}\":\"${owner_password}\"}') ON DUPLICATE KEY UPDATE name='ghosts_config', value='{\"${owner_username}\":\"${owner_password}\"}'" 2>/dev/null
    echo "Login at https://my.${dashboard_domain} as ${owner_username} / ${owner_password} . This password may only be used once."
}

function send_diagnostics() {
    local -r log="/tmp/cloudron-support.log"

    echo -n "Generating Cloudron Support stats..."

    rm -rf $log

    echo -e $LINE"Linux"$LINE >> $log
    uname -nar &>> $log

    echo -e $LINE"Ubuntu"$LINE >> $log
    lsb_release -a &>> $log

    echo -e $LINE"Cloudron"$LINE >> $log
    cloudron_version=$(cat /home/yellowtent/box/VERSION || true)
    echo -e "Cloudron version: ${cloudron_version}" >> $log
    dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null || true)
    echo -e "Dashboard domain: ${dashboard_domain}" >> $log

    echo -e $LINE"Docker"$LINE >> $log
    if ! timeout --kill-after 10s 15s docker system info &>> $log 2>&1; then
        echo -e "Docker (system info) is not responding" >> $log
    fi

    if ! timeout --kill-after 10s 15s docker ps -a &>> $log 2>&1; then
        echo -e "Docker (ps) is not responding" >> $log
    fi

    echo -e $LINE"Filesystem stats"$LINE >> $log
    if ! timeout --kill-after 10s 15s df -h &>> $log 2>&1; then
        echo -e "df is not responding" >> $log
    fi

    echo -e $LINE"Appsdata stats"$LINE >> $log
    du -hcsL /home/yellowtent/appsdata/* &>> $log || true

    echo -e $LINE"Boxdata stats"$LINE >> $log
    du -hcsL /home/yellowtent/boxdata/* &>> $log

    echo -e $LINE"Backup stats (possibly misleading)"$LINE >> $log
    du -hcsL /var/backups/* &>> $log || true

    echo -e $LINE"System daemon status"$LINE >> $log
    systemctl status --lines=100 --no-pager --full box mysql unbound cloudron-syslog nginx docker &>> $log

    echo -e $LINE"Box logs"$LINE >> $log
    tail -n 100 /home/yellowtent/platformdata/logs/box.log &>> $log

    echo -e $LINE"Interface Info"$LINE >> $log
    ip addr &>> $log

    echo -e $LINE"Firewall chains"$LINE >> $log
    iptables -L &>> $log
    has_ipv6=$(cat /proc/net/if_inet6 >/dev/null 2>&1 && echo "yes" || echo "no")
    echo -e "IPv6: ${has_ipv6}" >> $log
    [[ "${has_ipv6}" == "yes" ]] && ip6tables -L &>> $log

    echo "Done"

    echo -n "Uploading information..."
    paste_key=$(curl -X POST ${PASTEBIN}/documents --silent --data-binary "@$log" | python3 -c "import sys, json; print(json.load(sys.stdin)['key'])")
    echo "Done"

    echo -e "\nPlease email the following link to support@cloudron.io : ${PASTEBIN}/${paste_key}"
}

function check_dns() {
    if host cloudron.io &>/dev/null; then
        success "DNS is resolving via systemd-resolved"
        return
    fi

    if ! systemctl is-active -q systemd-resolved; then
        warn "systemd-resolved is not in use. see 'systemctl status systemd-resolved'"
    fi

    if [[ -L /etc/resolv.conf ]]; then
        target=$(readlink /etc/resolv.conf)
        if [[ "$target" != *"/run/systemd/resolve/stub-resolv.conf" ]]; then
            warn "/etc/resolv.conf is symlinked to $target instead of '../run/systemd/resolve/stub-resolv.conf'"
        fi
    else
        warn "/etc/resolv.conf is not symlinked to '../run/systemd/resolve/stub-resolv.conf'"
    fi

    if ! grep -q "^nameserver 127.0.0.53" /etc/resolv.conf; then
        warn "/etc/resolv.conf is not using systemd-resolved. it is missing the line 'nameserver 127.0.0.53'"
    fi

    fail "DNS is not resolving"
    host cloudron.io || true
    exit 1
}

function check_unbound() {
    if ! systemctl is-active -q unbound; then
        info "unbound is down. restarting to see if it fixes it" # unbound-anchor is part of ExecStartPre
        systemctl restart unbound

        if ! systemctl is-active -q unbound; then
            fail "unbound is still down, please investigate the error using 'journalctl -u unbound'"
            exit 1
        fi
    fi

    if ! wait_systemd_service unbound; then
        fail "unbound service keeps restarting, please investigate the error using 'journalctl -u unbound'"
        exit 1
    fi

    if ! host cloudron.io 127.0.0.150 &>/dev/null; then
        if ! host -t NS . 198.41.0.4 &>/dev/null; then # the IP is DNS A root server IP
            fail "Unbound is not resolving. Outbound DNS requests are blocked. Use 'cloudron-support --unbound-forward-dns <dns>' to forward DNS requests."
        else
            fail "Unbound is not resolving. However, Outbound DNS requests are not blocked. Investigate output of 'journactl -u unbound'"
        fi

        host cloudron.io 127.0.0.150
        exit 1
    fi

    success "unbound is running"
}

function check_dashboard_cert() {
    local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)
    local -r nginx_conf_file="/home/yellowtent/platformdata/nginx/applications/dashboard/my.${dashboard_domain}.conf"
    local -r cert_file=$(sed -n -e 's/.*ssl_certificate [[:space:]]\+\(.*\);/\1/p' "${nginx_conf_file}")

    local -r cert_expiry_date=$(openssl x509 -enddate -noout -in "${cert_file}" | sed -e 's/notAfter=//')

    if ! openssl x509 -checkend 100 -noout -in "${cert_file}" >/dev/null 2>&1; then
        fail "Certificate has expired. Certificate expired at ${cert_expiry_date}"

        local -r task_id=$(mysql -NB -uroot -ppassword -e "SELECT id FROM box.tasks WHERE type='checkCerts' ORDER BY id DESC LIMIT 1" 2>/dev/null)
        echo -e "\tPlease check /home/yellowtent/platformdata/logs/tasks/${task_id}.log for last cert renewal logs"
        echo -e "\tCommon issues include expiry of domain's API key OR incoming http port 80 not being open"
        exit 1
    fi

    success "dashboard cert is valid"
}

function check_nginx() {
    local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)

    # it is possible nginx is running but can't be restarted
    if ! systemctl reload -q nginx; then
        fail "nginx is down. Removing extraneous dashboard domain configs ..."

        # we had a bug where old dashboard domain config file was kept around
        cd /home/yellowtent/platformdata/nginx/applications/dashboard/ && find . ! -name "my.${dashboard_domain}.conf" -type f -exec rm -f {} +

        # check if certificates are there. nginx will still start if certs are expired
        # IFS= makes sure it doesn't trim leading and trailing whitespace
        # -r prevents interpretation of \ escapes.
        find /home/yellowtent/platformdata/nginx -type f -name '*.conf' -print0 | while IFS= read -r -d '' conf; do
            cert_file=$(sed -ne 's/[[:blank:]]\+ssl_certificate[[:blank:]]\+\(.*\);/\1/p' "${conf}")
            key_file=$(sed -ne 's/[[:blank:]]\+ssl_certificate_key[[:blank:]]\+\(.*\);/\1/p' "${conf}")

            if [[ -n "${cert_file}" && ! -f "${cert_file}" ]]; then
                info "${cert_file} does not exist. removing ${conf}"
                rm -f "${conf}"
            fi

            if [[ -n "${key_file}" && ! -f "${key_file}" ]]; then
                info "${key_file} does not exist. removing ${conf}"
                rm -f "${conf}"
            fi
        done

        systemctl restart nginx

        if ! systemctl is-active -q nginx; then
            fail "nginx is still down, please investigate the error by inspecting 'journalctl -u nginx' and /var/log/nginx/error.log"
            exit 1
        fi
    fi

    if ! wait_systemd_service nginx; then
        fail "nginx service keeps restarting, please investigate the error using 'journalctl -u nginx' and /var/log/nginx/error.log"
        exit 1
    fi

    success "nginx is running"
}

# this confirms that https works properly without any proxy (cloudflare) involved
function check_dashboard_site_loopback() {
    local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)

    if ! curl --fail -s --resolve "my.${dashboard_domain}:443:127.0.0.1" "https://my.${dashboard_domain}" >/dev/null; then
        fail "Could not load dashboard website with loopback check"
        exit 1
    fi

    success "dashboard is reachable via loopback"
}

function check_node() {
    expected_node_version="$(sed -ne 's/readonly node_version=\(.*\)/\1/p' /home/yellowtent/box/scripts/installer.sh)"
    current_node_version="$(node --version | tr -d '\n' | cut -c2-)" # strip trailing newline and 'v' prefix

    if [[ "${current_node_version}" != "${expected_node_version}" ]]; then
        fail "node version is incorrect. Expecting ${expected_node_version}. Got ${current_node_version}."
        echo "You can try the following to fix the problem:"
        echo "    ln -sf /usr/local/node-${expected_node_version}/bin/node /usr/bin/node"
        echo "    ln -sf /usr/local/node-${expected_node_version}/bin/npm /usr/bin/npm"
        echo "    apt remove -y nodejs"
        echo "    systemctl restart box"
        exit 1
    fi

    success "node version is correct"
}

function print_ipv6_disable_howto() {
    echo "Instead of disabling IPv6 globally, you can disable it at an interface level."
    for iface in $(ls /sys/class/net | grep -vE '^(lo|veth|docker|virbr|br|vmnet|tun|tap|wl|we)'); do
        echo -e "\tsysctl -w net.ipv6.conf.${iface}.disable_ipv6=1"
    done

    echo "For the above configuration to persist across reboots, you have to add below to /etc/sysctl.conf"
    for iface in $(ls /sys/class/net | grep -vE '^(lo|veth|docker|virbr|br|vmnet|tun|tap|wl|we)'); do
        echo -e "\tnet.ipv6.conf.${iface}.disable_ipv6=1"
    done
}

function check_ipv6() {
    ipv6_disable=$(cat /sys/module/ipv6/parameters/disable)
    if [[ "${ipv6_disable}" == "1" ]]; then
        fail "IPv6 is disabled in kernel. Cloudron requires IPv6 in kernel"
        print_ipv6_disable_howto
        exit 1
    fi

    # check if server has IPv6 address
    has_ipv6_address=0
    for iface in $(ls /sys/class/net | grep -vE '^(lo|veth|docker|virbr|br|vmnet|tun|tap|wl|we)'); do
        if ipv6=$(ip -6 addr show dev ${iface} | grep -o 'inet6 [^ ]*' | awk '{print $2}' | grep -v '^fe80'); then
            [[ -n "${ipv6}" ]]  && has_ipv6_address=1
        fi
    done

    if [[ "${has_ipv6_address}" == "0" ]]; then
        success "IPv6 is enabled in kernel. No public IPv6 address"
        return
    fi

    if ! ping6 -q -c 1 api.cloudron.io >/dev/null 2>&1; then
        fail "Server has an IPv6 address but api.cloudron.io is unreachable via IPv6 (ping6 -q -c 1 api.cloudron.io)"
        print_ipv6_disable_howto
        exit 1
    fi

    success "IPv6 is enabled and public IPv6 address is working"
}

function check_docker() {
    if ! systemctl is-active -q docker; then
        info "Docker is down. Trying to restart docker ..."
        systemctl restart docker

        if ! systemctl is-active -q docker; then
            fail "Docker is still down, please investigate the error using 'journalctl -u docker'"
            exit 1
        fi
    fi

    if ! wait_systemd_service docker; then
        fail "Docker keeps restarting, please investigate the error using 'journalctl -u docker'"
        exit 1
    fi

    success "docker is running"
}

function check_docker_version() {
    expected_docker_version="$(sed -ne 's/readonly docker_version="\(.*\)"/\1/p' /home/yellowtent/box/scripts/installer.sh)"
    if command -v docker &> /dev/null; then
        current_docker_version="$(docker version --format {{.Client.Version}})"
    else
        current_docker_version="<not found>"
    fi

    if [[ "${current_docker_version}" != "${expected_docker_version}" ]]; then
        fail "docker version is incorrect. Expecting ${expected_docker_version}. Got ${current_docker_version}."
        echo "Run cloudron-support --fix-docker-version"
        exit 1
    fi

    success "docker version is correct"
}

function check_node() {
    expected_node_version="$(sed -ne 's/readonly node_version=\(.*\)/\1/p' /home/yellowtent/box/scripts/installer.sh)"
    if command -v node &> /dev/null; then
        current_node_version="$(node --version | tr -d '\n' | cut -c2-)" # strip trailing newline and 'v' prefix
    else
        current_node_version="<not found>"
    fi

    if [[ "${current_node_version}" != "${expected_node_version}" ]]; then
        fail "node version is incorrect. Expecting ${expected_node_version}. Got ${current_node_version}."
        echo "You can try the following to fix the problem:"
        echo "    ln -sf /usr/local/node-${expected_node_version}/bin/node /usr/bin/node"
        echo "    ln -sf /usr/local/node-${expected_node_version}/bin/npm /usr/bin/npm"
        echo "    systemctl restart box"
        exit 1
    fi

    success "node version is correct"
}

function check_dashboard_site_domain() {
    local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)
    local -r domain_provider=$(mysql -NB -uroot -ppassword -e "SELECT provider FROM box.domains WHERE domain='${dashboard_domain}'" 2>/dev/null)

    # TODO: check ipv4 and ipv6 separately
    if ! output=$(curl --fail --connect-timeout 10 --max-time 20 -s https://my.${dashboard_domain}); then
        fail "Could not load dashboard domain."
        if [[ "${domain_provider}" == "cloudflare" ]]; then
            echo "Maybe cloudflare proxying is not working. Delete the domain in Cloudflare dashboard and re-add it. This sometimes re-establishes the proxying"
        else
            echo "Hairpin NAT is not working. Please check if your router supports it"
        fi
        exit 1
    fi

    if ! echo $output | grep -q "Cloudron Dashboard"; then
        fail "https://my.${dashboard_domain} is not the dashboard domain. Check if DNS is set properly to this server"
        host my.${dashboard_domain} 127.0.0.53 # could also result in cloudflare
        exit 1
    fi

    success "Dashboard is reachable via domain name"
}

function check_expired_domain() {
    local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)

    if ! command -v whois &> /dev/null; then
        info "Domain ${dashboard_domain} expiry check skipped because whois is not installed. Run 'apt install whois' to check"
        return
    fi

    local -r expdate=$(whois ${dashboard_domain} | egrep -i 'Expiration Date:|Expires on|Expiry Date:' | head -1 | awk '{print $NF}')
    if [[ -z "${expdate}" ]]; then
        warn "Domain ${dashboard_domain} expiry check skipped because whois does not have this information"
        return
    fi

    local -r expdate_secs=$(date -d"$expdate" +%s)
    local -r curdate_secs="$(date +%s)"

    if (( curdate_secs > expdate_secs )); then
        fail "Domain ${dashboard_domain} appears to be expired"
        exit 1
    fi

    success "Domain ${dashboard_domain} is valid and has not expired"
}

function unbound_forward_dns() {
    local -r conf_file="/etc/unbound/unbound.conf.d/forward-everything.conf"

    info "To remove the forwarding, please delete $conf_file and 'systemctl restart unbound'"

    case "$1" in
      google) ns_list="8.8.8.8,4.4.4.4";;     # https://developers.google.com/speed/public-dns
      cloudflare) ns_list="1.1.1.1,1.0.0.1";; # https://developers.cloudflare.com/1.1.1.1/ip-addresses/
      *) ns_list="$1";;
    esac

    forward_addrs=$(echo "$ns_list" | tr ',' '\n' | sed 's/^/    forward-addr: /')

    cat > $conf_file <<EOF
forward-zone:
    name: "."
${forward_addrs}
EOF

    systemctl restart unbound

    success "Forwarded all DNS requests to ${ns_list}"
}

function disable_dnssec() {
    local -r conf_file="/etc/unbound/unbound.conf.d/disable-dnssec.conf"

    warn "To reenable DNSSEC, please delete $conf_file and 'systemctl restart unbound'"

    cat > $conf_file <<EOF
server:
  val-permissive-mode: yes
EOF

    systemctl restart unbound

    success "DNSSEC Disabled"
}

function print_system() {
    vendor=$(cat /sys/devices/virtual/dmi/id/sys_vendor)
    product=$(cat /sys/devices/virtual/dmi/id/product_name)
    echo "Vendor: ${vendor} Product: ${product}"
    ubuntu_codename=$(lsb_release -cs)
    ubuntu_version=$(lsb_release -rs)
    linux_version=$(uname -r)
    echo "Linux: ${linux_version}"
    echo "Ubuntu: ${ubuntu_codename} ${ubuntu_version}"
    proc_count=$(grep -c ^processor /proc/cpuinfo)
    proc_name=$(lscpu | grep "Model name:" | sed -r 's/Model name:\s{1,}//g')
    env_type=$(systemd-detect-virt || echo none) # systemd-detect-virt --list gives all the possible options
    echo "Execution environment: ${env_type}"
    echo "Processor: ${proc_name} x ${proc_count}"
    ram_kb=$(grep MemTotal /proc/meminfo | awk '{print $2}')
    echo "RAM: ${ram_kb}KB"
    disk_size=$(LC_ALL=C df -h --output=source,avail / | tail -n1)
    echo "Disk: ${disk_size}"
}

function troubleshoot() {
    # note: disk space test has already been run globally
    print_system
    check_node
    check_ipv6
    check_docker
    check_docker_version
    check_host_mysql
    check_netplan
    check_dns
    check_unbound # this is less fatal after 8.0

    local -r dashboard_domain=$(mysql -NB -uroot -ppassword -e "SELECT value FROM box.settings WHERE name='dashboard_domain'" 2>/dev/null)

    if [[ -z "${dashboard_domain}" ]]; then
        [[ -f /home/yellowtent/box/VERSION ]] && version=$(cat /home/yellowtent/box/VERSION) || version='<unknown>'
        warn "Cloudron v${version} has not been set up yet. Visit https://<IP> to set up the dashboard."
    else
        check_nginx # requires mysql to be checked
        check_dashboard_cert
        check_dashboard_site_loopback # checks website via loopback
        check_db_migrations
        check_services
        check_box

        check_dashboard_site_domain # check website via domain name
        check_expired_domain
    fi
}

function cleanup_disk_space() {
    read -p "Truncate log files to reclaim space? [y/N] " choice
    choice=${choice:-n}
    if [[ $choice =~ ^[Yy]$ ]]; then
        truncate -s0 /home/yellowtent/platformdata/logs/*/*.log
        rm -f /home/yellowtent/platformdata/logs/*.log.* # delete the log.1, log.2 etc
    fi

    read -p "Prune docker system resources to reclaim space? [y/N] " choice
    choice=${choice:-n}
    if [[ $choice =~ ^[Yy]$ ]]; then
        docker images prune -fa || true
    fi

    read -p "Prune docker volumes to reclaim space? [y/N] " choice
    choice=${choice:-n}
    if [[ $choice =~ ^[Yy]$ ]]; then
        for container in $(docker ps --format "{{.ID}}"); do
            docker exec "$container" find /tmp -type f -mtime +1 -delete || true
            docker exec "$container" find /run -type f -mtime +1 -delete || true
        done
    fi
}

function check_disk_space() {
    # check if at least 10mb root partition space is available
    if [[ "`df --output="avail" / | sed -n 2p`" -lt "10240" ]]; then
        echo "No more space left on / (see df -h output)"
        cleanup_disk_space
    fi

    if [[ "`df --output="avail" / | sed -n 2p`" -lt "10240" ]]; then
        echo "Still no space despite cleaning up. If you have backups (/var/backups) on this disk, delete old backups to free some space"
        exit 1
    fi

    # check for at least 5mb free /tmp space for the log file
    if [[ "`df --output="avail" /tmp | sed -n 2p`" -lt "5120" ]]; then
        echo "Not enough space left on /tmp"
        echo "Free up some space first by deleting files from /tmp"
        exit 1
    fi
}

function do_recreate_containers() {
    echo -e "Re-creating addon and app containers\n"
    info "Follow re-create logs in a second terminal with:"
    info "$ tail -f ${logfile}"
    echo ""

    echo -n "This takes a while ."
    line_count=$(cat "${logfile}" | wc -l)
    sed -e 's/"version": ".*",/"version":"48.0.0",/' -i /home/yellowtent/platformdata/INFRA_VERSION
    systemctl restart -q box # will re-create docker network

    while ! tail -n "+${line_count}" "${logfile}" | grep -q "platform is ready"; do
        echo -n "."
        sleep 2
    done

    echo -e "\n\nDone! Addon containers successfully re-created. The apps in the dashboard will"
    echo -e "say 'Configuring (Queued)'. They will come up in a short while.\n"
}

function recreate_containers() {
    readonly logfile="/home/yellowtent/platformdata/logs/box.log"

    echo "This will re-create all the containers. Apps will go down for a while. No data will be lost."
    read -p "Do you want to proceed? [y/N] " choice
    choice=${choice:-n}

    [[ ! $choice =~ ^[Yy]$ ]] && exit 1

    do_recreate_containers
}

function download_docker_images() {
    info "Downloading addon images"

    images=$(node --input-type=module -e "import i from '/home/yellowtent/box/src/infra_version.js'; console.log(Object.keys(i.images).map(x => i.images[x]).join(' '));")

    for image_ref in ${images}; do
        info "Pulling ${image_ref}"

        ipv4_image_ref="${image_ref/registry.docker.com/registry.ipv4.docker.com}"
        ipv6_image_ref="${image_ref/registry.docker.com/registry.ipv6.docker.com}"

        while true; do
            # try IPv4 first
            if timeout --kill-after=10s 1200s docker pull "${ipv4_image_ref}"; then
                docker tag "${ipv4_image_ref}" "${image_ref%@sha256:*}"
                docker rmi "${ipv4_image_ref}" >/dev/null 2>&1 || true
                break
            fi

            info "Could not pull ${ipv4_image_ref}, trying IPv6"

            # fallback to IPv6
            if timeout --kill-after=10s 1200s docker pull "${ipv6_image_ref}"; then
                docker tag "${ipv6_image_ref}" "${image_ref%@sha256:*}"
                docker rmi "${ipv6_image_ref}" >/dev/null 2>&1 || true
                break
            fi

            info "Could not pull ${ipv6_image_ref} either, retrying in 10s"
            sleep 10
        done
    done
}

function ask_reboot() {
    read -p "Do you want to reboot ? [y/N] " choice
    choice=${choice:-n}

    [[ ! $choice =~ ^[Yy]$ ]] && exit 1
    reboot
    exit
}

function recreate_docker() {
    readonly logfile="/home/yellowtent/platformdata/logs/box.log"
    readonly stagefile="/home/yellowtent/platformdata/recreate-docker-stage"
    readonly containerd_root="/var/lib/containerd"

    if ! docker_root=$(docker info -f '{{ .DockerRootDir }}' 2>/dev/null); then
        warn "Unable to detect docker root. Assuming /var/lib/docker"
    fi
    [[ -z "${docker_root}" ]] && docker_root="/var/lib/docker"

    if [[ ! -e "${stagefile}" ]]; then
        echo "Use this command when docker storage (at $docker_root) is corrupt. It will delete"
        echo "the docker storage, re-download docker images and re-create containers. Dashboard and apps"
        echo -e "will be unreachable for a while. No data will be lost.\n"
        echo -e "The server may have to be rebooted twice for this. If so, re-run this command after every reboot.\n"
        read -p "Do you want to proceed? [y/N] " choice
        choice=${choice:-n}

        [[ ! $choice =~ ^[Yy]$ ]] && exit 1

        info "Stopping box and docker"
        systemctl stop -q box docker containerd docker.socket || true
        systemctl disable -q box docker containerd docker.socket || true

        echo -e "clearing_storage" > "${stagefile}" # init
    fi

    if grep -q "clearing_storage" "${stagefile}"; then
        info "Clearing docker storage at ${docker_root}"
        if ! rm -rf "${docker_root}/"* "${containerd_root}/"*; then
            echo -e "\nThe server has to be rebooted to clear the docker storage. After reboot,"
            echo -e "run 'cloudron-support --recreate-docker' again.\n"
            ask_reboot
        fi
        echo -e "cleared_storage" > "${stagefile}"
    fi

    if grep -q "cleared_storage" "${stagefile}"; then
        info "Starting docker afresh at ${docker_root}"
        systemctl enable --now -q docker.socket docker containerd
        sleep 5 # give docker some time to initialize the storage directory
        download_docker_images
        echo -e "downloaded_images" > "${stagefile}"
        echo -e "\nThe server has to be rebooted for docker to initialize properly. After reboot,"
        echo -e "run 'cloudron-support --recreate-docker' again.\n" # else docker network is not completely functional
        ask_reboot
    fi

    if grep -q "downloaded_images" "${stagefile}"; then
        systemctl enable -q box
        do_recreate_containers
    fi

    rm "${stagefile}"
}

function fix_docker_version() {
    ubuntu_codename=$(lsb_release -cs)
    ubuntu_version=$(lsb_release -rs)

    docker_version="$(sed -ne 's/readonly docker_version="\(.*\)"/\1/p' /home/yellowtent/box/scripts/installer.sh)"
    containerd_version="$(sed -ne 's/readonly containerd_version="\(.*\)"/\1/p' /home/yellowtent/box/scripts/installer.sh)"

    echo "downloading docker ${docker_version}"
    # copied from installer.sh
    curl --fail -sL "https://download.docker.com/linux/ubuntu/dists/${ubuntu_codename}/pool/stable/amd64/containerd.io_${containerd_version}-1_amd64.deb" -o /tmp/containerd.deb
    curl --fail -sL "https://download.docker.com/linux/ubuntu/dists/${ubuntu_codename}/pool/stable/amd64/docker-ce-cli_${docker_version}-1~ubuntu.${ubuntu_version}~${ubuntu_codename}_amd64.deb" -o /tmp/docker-ce-cli.deb
    curl --fail -sL "https://download.docker.com/linux/ubuntu/dists/${ubuntu_codename}/pool/stable/amd64/docker-ce_${docker_version}-1~ubuntu.${ubuntu_version}~${ubuntu_codename}_amd64.deb" -o /tmp/docker.deb

    echo "installing docker"
    apt install -y --allow-downgrades /tmp/containerd.deb  /tmp/docker-ce-cli.deb /tmp/docker.deb
    rm /tmp/containerd.deb /tmp/docker-ce-cli.deb /tmp/docker.deb
}

function check_db_migrations() {
    local -r last_migration_from_db="$(mysql -NB -uroot -ppassword -e "SELECT name FROM box.migrations ORDER BY run_on DESC, name DESC LIMIT 1" 2>/dev/null).js"
    local -r last_migration_file="/$(ls --ignore schema.sql --ignore package.json --ignore initial-schema.sql /home/yellowtent/box/migrations/ | sort | tail -1)"
    if [[ "${last_migration_from_db}" != "${last_migration_file}" ]]; then
        fail "Database migrations are pending. Last migration in DB: ${last_migration_from_db}. Last migration file: ${last_migration_file}."
        info "Please run 'cloudron-support --apply-db-migrations' to apply the migrations."
    else
        success "No pending database migrations"
    fi
}

function apply_db_migrations() {
    echo "Applying pending database migrations"
    bash /home/yellowtent/box/setup/start.sh && success "Database migrations applied successfully" || fail "Database migrations failed"
}

function check_services() {
    local services=("mysql" "postgresql" "mongodb" "mail" "graphite" "sftp")
    local service_ip=("172.18.30.1" "172.18.30.2" "172.18.30.3" "172.18.30.4" "172.18.30.5" "172.18.30.6")
    local service_port=("3000" "3000" "3000" "3000" "2003" "3000")

    for service in "${!services[@]}"; do
        local service_name="${services[$service]}"
        local service_state
        if ! service_state="$(docker inspect "${service_name}" --format={{.State.Status}} 2>/dev/null)"; then
            service_state="missing"
        fi
        if [[ "${service_state}" != "running" ]]; then
            if [[ "${service_state}" == "exited" ]] && [[ "${service_name}" == "mysql" || "${service_name}" == "postgresql" || "${service_name}" == "mongodb" ]]; then
                warn "Service '${service_name}' is not running (may be lazy-stopped)"
            else
                fail "Service '${service_name}' container is not running (state: ${service_state})!"
            fi
            continue
        fi

        # avoid nc since it is not part of the base install
        if ! timeout 5 bash -c "</dev/tcp/${service_ip[$service]}/${service_port[$service]}" 2>/dev/null; then
            fail "Service '${services[$service]}' is not reachable"
            continue
        fi

        if [[ ${services[$service]} != "graphite" ]]; then
            if ! grep -q "true" <<< $(curl --fail -s "http://${service_ip[$service]}:${service_port[$service]}/healthcheck"); then
                fail "Service '${services[$service]}' healthcheck failed"
                continue
            fi
        else
            if ! grep -q "Graphite Dashboard" <<< "$(curl --fail -s http://${service_ip[$service]}:8000/graphite-web/dashboard)"; then
                fail "Service '${services[$service]}' healthcheck failed"
                continue
            fi
        fi
        success "Service '${services[$service]}' is running and healthy"
    done
}

disable_ipv6_persistent() {
  cat > /etc/sysctl.d/99-disable-ipv6.conf <<'EOF'
net.ipv6.conf.all.disable_ipv6 = 1
net.ipv6.conf.default.disable_ipv6 = 1
net.ipv6.conf.lo.disable_ipv6 = 1
EOF
  sysctl --system
}

reenable_ipv6_persistent() {
    rm -f /etc/sysctl.d/99-disable-ipv6.conf
    sysctl --system
}

check_disk_space

args=$(getopt -o "" -l "admin-login,disable-dnssec,enable-remote-support,disable-remote-support,help,owner-login,recreate-containers,recreate-docker,fix-docker-version,send-diagnostics,unbound-forward-dns:,troubleshoot,check-db-migrations,apply-db-migrations,check-services,disable-ipv6,reenable-ipv6" -n "$0" -- "$@")
eval set -- "${args}"

while true; do
    case "$1" in
    --enable-ssh)
        # fall through
        ;&
    --enable-remote-support) enable_remote_support; exit 0;;
    --disable-remote-support) disable_remote_support; exit 0;;
    --admin-login)
        # fall through
        ;&
    --owner-login) owner_login; exit 0;;
    --send-diagnostics) send_diagnostics; exit 0;;
    --troubleshoot) troubleshoot; exit 0;;
    --disable-dnssec) disable_dnssec; exit 0;;
    --unbound-forward-dns) unbound_forward_dns "$2"; exit 0;;
    --recreate-containers) recreate_containers; exit 0;;
    --recreate-docker) recreate_docker; exit 0;;
    --fix-docker-version) fix_docker_version; exit 0;;
    --check-db-migrations) check_db_migrations; exit 0;;
    --apply-db-migrations) apply_db_migrations; exit 0;;
    --check-services) check_services; exit 0;;
    --disable-ipv6) disable_ipv6_persistent; exit 0;;
    --reenable-ipv6) reenable_ipv6_persistent; exit 0;;
    --help) break;;
    --) break;;
    *) echo "Unknown option $1"; exit 1;;
    esac
done

echo -e "${HELP_MESSAGE}"
