vcs-backup.sh
author František Kučera <franta-hg@frantovo.cz>
Thu, 29 Oct 2020 19:21:19 +0100
branchv_0
changeset 24 1d46fb79b1af
parent 23 1222714ae08f
permissions -rwxr-xr-x
allow the ~ character in the URL
     1 #!/bin/bash
     2 
     3 # VCS Backup
     4 # Copyright © 2019 František Kučera (Frantovo.cz, GlobalCode.info)
     5 #
     6 # This program is free software: you can redistribute it and/or modify
     7 # it under the terms of the GNU General Public License as published by
     8 # the Free Software Foundation, version 3 of the License.
     9 #
    10 # This program is distributed in the hope that it will be useful,
    11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
    12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    13 # GNU General Public License for more details.
    14 #
    15 # You should have received a copy of the GNU General Public License
    16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
    17 
    18 
    19 # VCS Backup is a configuration for setting up VCS (version control system) mirrors.
    20 # Currently Mercurial (Hg) and Git are supported.
    21 # Features:
    22 #  - mirrors remote repositories
    23 #  - creates Btrfs subvolume for each repository
    24 #  - does periodic pull to keep mirrors up to date
    25 #  - does periodic Btrfs snapshot to keep history (git push --force done on the remote repository will lead to modifications or deletions in our current mirror, but previous versions will be kept in the snapshots)
    26 #  - provides web interface for remote clonning of our mirrors (see systemd and etc folders)
    27 #  - can be controlled over SSH by a sane person / owner of the system
    28 #  - provides reports in the recfile format (to be processed using GNU Recutils or Relational pipes):
    29 #     - list of repositories/mirrors
    30 #     - results of pull operations
    31 
    32 
    33 # This is an asynchronous message-driven shell script that runs distributed across two machines and four user accounts. You have been warned :-)
    34 
    35 
    36 # Server-side configuration: / loadServerConfigFile()
    37 VCS_BACKUP_DATA_DIR="/mnt/data";
    38 VCS_BACKUP_CURRENT_DIR="$VCS_BACKUP_DATA_DIR/current";
    39 VCS_BACKUP_PUBLIC_DIR="$VCS_BACKUP_DATA_DIR/public";
    40 VCS_BACKUP_CONFIG_DIR="$VCS_BACKUP_DATA_DIR/config";
    41 VCS_BACKUP_SNAPSHOT_DIR="$VCS_BACKUP_DATA_DIR/snapshot";
    42 VCS_BACKUP_SUBVOLUME_SOCKET="/run/vcs-backup-subvolume";
    43 VCS_BACKUP_CLONE_SOCKET="/run/vcs-backup-clone/socket"; # the directory will be writable by ${VCS_BACKUP_USER}
    44 VCS_BACKUP_CLONE_CALLBACK_SOCKET="clone-callback";
    45 VCS_BACKUP_USER="vcs-backup";
    46 VCS_BACKUP_MANAGER="vcs-backup-manager";
    47 
    48 # Client-side configuration: / see loadClientConfigFile()
    49 VCS_BACKUP_SERVER="$VCS_BACKUP_MANAGER@localhost";
    50 VCS_BACKUP_SSH_COMMAND=(ssh "$VCS_BACKUP_SERVER");
    51 
    52 # Installation – check and do it by hand:
    53 # There should be already mounted Btrfs at $VCS_BACKUP_DATA_DIR
    54 installInstructions() {
    55 cp vcs-backup.sh /usr/local/bin/
    56 adduser --disabled-password "$VCS_BACKUP"
    57 adduser --disabled-password "$VCS_BACKUP_MANAGER"
    58 
    59 mkdir "$VCS_BACKUP_CURRENT_DIR";
    60 mkdir "$VCS_BACKUP_CONFIG_DIR";
    61 mkdir "$VCS_BACKUP_SNAPSHOT_DIR";
    62 mkdir "$(dirname VCS_BACKUP_CLONE_SOCKET)"
    63 
    64 chown "${VCS_BACKUP_USER}:${VCS_BACKUP_USER}" "$(dirname VCS_BACKUP_CLONE_SOCKET)"
    65 chown "${VCS_BACKUP_MANAGER}:${VCS_BACKUP_MANAGER}" "$VCS_BACKUP_CONFIG_DIR"
    66 }
    67 
    68 
    69 # --- Private functions: ---------------------------------------------------------------------------
    70 
    71 # Environment: all
    72 # $1 = VCS type: hg, git
    73 # $2 = URL
    74 isValidTypeAndURL() { ([[ "$1" == "hg" || "$1" == "git" ]]) && [[ $(echo "$2" | wc -l) == 1 ]] && [[ $(echo "$2" | grep -E '^(http|https|ssh)://([a-zA-Z0-9_-][a-zA-Z0-9_-.~]*/?)+$' | wc -l) == 1 ]]; }
    75 
    76 # Environment: all
    77 # $1 = path to the config file
    78 loadConfigFile() { if [ -f "$1" ]; then . "$1"; fi }
    79 loadClientConfigFile() { loadConfigFile ~/.config/vcs-backup/client.cfg; }
    80 loadServerConfigFile() { loadConfigFile "/etc/vcs-backup/server.cfg"; }
    81 
    82 # Environment: server
    83 # $1 = URL
    84 urlToRelativeDirectoryPath() {
    85 	echo "$1" | sed -E 's@^[^:]+://@@g';
    86 }
    87 
    88 # Environment: server
    89 # Lists relative paths (starting wiht hg or git) of all configured repositories
    90 allRepositories() { find "$VCS_BACKUP_CONFIG_DIR" -name url.txt -printf '%P\n' | sort | xargs --no-run-if-empty dirname; }
    91 
    92 # Environment: all
    93 # $1 = optional value (if missing, reads STDIN)
    94 escapeRecfileValue() { if [[ $# = 0 ]]; then awk '{ if (NR > 1) { printf "+ " } print $_ }'; else echo "${@}" | ${FUNCNAME[0]}; fi }
    95 
    96 # Environment: all
    97 # $1 = key
    98 # $2 = value
    99 printRecfileKeyValue() { echo -n "$1: "; escapeRecfileValue "$2"; }
   100 
   101 # --- Public interface functions: ------------------------------------------------------------------
   102 
   103 # Environment: client
   104 # $1 = VCS type: hg, git
   105 # $2 = URL
   106 # $3 = "public" or "private" (default), whether the repository should be available through the public web interface
   107 # $4 = "clone" (optional), if present, will also clone the backup locally
   108 vcs_backup_public_clientSubmitBackupRequest() {
   109 	if isValidTypeAndURL "$1" "$2"; then
   110 		${VCS_BACKUP_SSH_COMMAND[@]} vcs-backup.sh serverSubmitBackupRequest "$1" "$2" "$3" "$4"
   111 		if [[ "$4" == "clone" ]]; then
   112 			if   [[ "$1" == "hg"  ]]; then  hg clone "ssh://${VCS_BACKUP_SERVER}/$VCS_BACKUP_CURRENT_DIR/$1/$(urlToRelativeDirectoryPath $2)";
   113 			elif [[ "$1" == "git" ]]; then git clone "ssh://${VCS_BACKUP_SERVER}/$VCS_BACKUP_CURRENT_DIR/$1/$(urlToRelativeDirectoryPath $2)";
   114 			fi
   115 		fi
   116 	else
   117 		echo "Unsupported VCS type: '$1' or URL: '$2'" >&2;
   118 	fi
   119 }
   120 
   121 # Environment: server
   122 # User: $VCS_BACKUP_MANAGER
   123 # has same parameters as clientSubmitBackupRequest (see above)
   124 vcs_backup_public_serverSubmitBackupRequest() {
   125 	if isValidTypeAndURL "$1" "$2"; then
   126 		relativePath=$1/$(urlToRelativeDirectoryPath "$2");
   127 		absolutePath="$VCS_BACKUP_CONFIG_DIR/$relativePath";
   128 		# TODO: stop if directory already exists / only add public link?
   129 		mkdir -p "$absolutePath";
   130 		echo "$2" > "$absolutePath/url.txt"
   131 		echo "submited" > "$absolutePath/state.txt"
   132 		setfacl -m u:${VCS_BACKUP_USER}:r  "$absolutePath/url.txt"
   133 		setfacl -m u:${VCS_BACKUP_USER}:rw "$absolutePath/state.txt"
   134 
   135 		if [[ "$3" == "public" ]]; then
   136 			cd "$VCS_BACKUP_PUBLIC_DIR";
   137 			mkdir -p "$(dirname $relativePath)";
   138 			ln -rsf "../current/$relativePath" "$(dirname $relativePath)";
   139 		elif [[ "$3" == "private" && -e "$VCS_BACKUP_PUBLIC_DIR/$relativePath" ]]; then
   140 			rm "$VCS_BACKUP_PUBLIC_DIR/$relativePath";
   141 		fi
   142 
   143 		if [[ "$4" == "clone" ]]; then
   144 			callBackSocket=$absolutePath/${VCS_BACKUP_CLONE_CALLBACK_SOCKET}
   145 			socat -u "unix-recvfrom:$callBackSocket,mode=777" - | while read m; do # TODO: ,group=${VCS_BACKUP_USER} and no 777 ?
   146 				echo "Message from the service: $m";
   147 			done &
   148 			callBackPID=$!;
   149 		fi
   150 
   151 		echo "$relativePath" | socat -u - unix-send:${VCS_BACKUP_SUBVOLUME_SOCKET};
   152 
   153 		if [[ "$4" == "clone" ]]; then
   154 			echo "Waiting for a message from the service on $callBackSocket (PID $callBackPID)";
   155 			wait -n $callBackPID;
   156 		fi
   157 	else
   158 		echo "Unsupported VCS type: '$1' or URL: '$2'" >&2;
   159 	fi
   160 }
   161 
   162 # Environment: server
   163 # User: root
   164 # Should be started as a systemd/init service.
   165 # - reads messages from from the subvolume socket – message contains the relative directory path
   166 # - creates a subvolume for given repository + necesary parent directories
   167 # - sends a message to the clone service → start cloning into the created subvolume
   168 vcs_backup_public_serverStartSubvolumeService() {
   169 	socat -u "unix-recv:${VCS_BACKUP_SUBVOLUME_SOCKET},group=${VCS_BACKUP_MANAGER},mode=770" - | while read d; do
   170 		mkdir -p $(dirname "$VCS_BACKUP_CURRENT_DIR/$d");
   171 		if [[ -e "$VCS_BACKUP_CURRENT_DIR/$d" ]]; then
   172 			echo "cloned" > "$VCS_BACKUP_CONFIG_DIR/$d/state.txt";
   173 		else
   174 			btrfs subvolume create "$VCS_BACKUP_CURRENT_DIR/$d" && \
   175 			echo "subvolumeCreated" > "$VCS_BACKUP_CONFIG_DIR/$d/state.txt" && \
   176 			chown "${VCS_BACKUP_USER}:${VCS_BACKUP_USER}" "$VCS_BACKUP_CURRENT_DIR/$d"
   177 		fi
   178 		echo "$d" | socat -u - unix-send:${VCS_BACKUP_CLONE_SOCKET};
   179 	done
   180 }
   181 
   182 # Environment: server
   183 # User: $VCS_BACKUP_USER
   184 # should be started as a systemd/init service
   185 vcs_backup_public_serverStartCloneService() {
   186 	socat -u "unix-recv:${VCS_BACKUP_CLONE_SOCKET},mode=700" - | while read d; do
   187 		vcsType=$(echo "$d" | sed 's@/.*@@g');
   188 		url=$(cat "$VCS_BACKUP_CONFIG_DIR/$d/url.txt");
   189 		state=$(cat "$VCS_BACKUP_CONFIG_DIR/$d/state.txt");
   190 
   191 		if isValidTypeAndURL "$vcsType" "$url"; then
   192 			if [[ "$state" == "cloned"  ]]; then
   193 				# Already cloned repository → just pull
   194 				if   [[ "$vcsType" == "hg"  ]]; then hg -R "$VCS_BACKUP_CURRENT_DIR/$d" pull;
   195 				elif [[ "$vcsType" == "git" ]]; then git -C "$VCS_BACKUP_CURRENT_DIR/$d" fetch;
   196 				fi
   197 			else
   198 				# New repository → clone
   199 				if   [[ "$vcsType" == "hg"  ]]; then  hg clone -U       "$url" "$VCS_BACKUP_CURRENT_DIR/$d";
   200 				elif [[ "$vcsType" == "git" ]]; then git clone --mirror "$url" "$VCS_BACKUP_CURRENT_DIR/$d";
   201 				fi && echo "cloned" > "$VCS_BACKUP_CONFIG_DIR/$d/state.txt";
   202 			fi
   203 		else
   204 			echo "Unsupported VCS type: '$vcsType' or URL: '$url'" >&2;
   205 		fi
   206 
   207 		callBackSocket="$VCS_BACKUP_CONFIG_DIR/$d/$VCS_BACKUP_CLONE_CALLBACK_SOCKET";
   208 		if [[ -e "$callBackSocket" ]]; then
   209 			echo "done" | socat -u - unix-send:"$callBackSocket";
   210 		fi
   211 	done
   212 }
   213 
   214 # Environment: client
   215 # prints list of repositories in recfile format
   216 # usage example: vcs-backup.sh clientListRepositories | relpipe-in-recfile | relpipe-out-tabular
   217 vcs_backup_public_clientListRepositories() {
   218 	${VCS_BACKUP_SSH_COMMAND[@]} vcs-backup.sh serverListRepositories;
   219 }
   220 
   221 # Environment: server
   222 # User: $VCS_BACKUP_MANAGER
   223 vcs_backup_public_serverListRepositories() {
   224 	printRecfileKeyValue "%rec"  "repository";
   225 	printRecfileKeyValue "%type" "bytes int";
   226 	printRecfileKeyValue "%type" "public bool";
   227 	echo;
   228 
   229 	allRepositories | while read d; do
   230 		url=$(cat "$VCS_BACKUP_CONFIG_DIR/$d/url.txt");
   231 		state=$(cat "$VCS_BACKUP_CONFIG_DIR/$d/state.txt");
   232 		vcsType=$(echo "$d" | sed 's@/.*@@g');
   233 		sizeBytes=$(du -sb "$VCS_BACKUP_CURRENT_DIR/$d" | cut -f1);
   234 		[[ -e "$VCS_BACKUP_PUBLIC_DIR/$d" ]] && public="true" || public="false";
   235 		
   236 		if [[ "$vcsType" == "hg"  ]]; then lastCommit=$(hg log --limit 1 --template '{date|isodatesec}' -R "$VCS_BACKUP_CURRENT_DIR/$d" 2>/dev/null);
   237 		elif [[ "$vcsType" == "git" ]]; then lastCommit=$(git -C "$VCS_BACKUP_CURRENT_DIR/$d" log --max-count=1 --pretty="%ai"); 
   238 		else lastCommit=""; fi
   239 		
   240 		printRecfileKeyValue "type"            "$vcsType";
   241 		printRecfileKeyValue "url"             "$url";
   242 		printRecfileKeyValue "state"           "$state";
   243 		printRecfileKeyValue "public"          "$public";
   244 		printRecfileKeyValue "serverPath"      "$VCS_BACKUP_CURRENT_DIR/$d";
   245 		printRecfileKeyValue "size"            "$sizeBytes";
   246 		printRecfileKeyValue "lastCommit"      "$lastCommit";
   247 		echo;
   248 	done
   249 }
   250 
   251 # Environment: server
   252 # User: $VCS_BACKUP_USER
   253 # should be called from cron (usually every day)
   254 vcs_backup_public_serverPullCronTask() {
   255 	printRecfileKeyValue "%rec"  "pull";
   256 	printRecfileKeyValue "%type" "started date";
   257 	printRecfileKeyValue "%type" "finished date";
   258 	printRecfileKeyValue "%type" "duration int";
   259 	printRecfileKeyValue "%type" "resultCode int";
   260 
   261 	allRepositories | while read d; do
   262 		state=$(cat "$VCS_BACKUP_CONFIG_DIR/$d/state.txt");
   263 		vcsType=$(echo "$d" | sed 's@/.*@@g');
   264 		absolutePath="$VCS_BACKUP_CURRENT_DIR/$d";
   265 
   266 		pullStarted=$(date --iso-8601=s);
   267 		pullStartedMiliseconds=$(($(date +%s%N)/1000000));
   268 		pullFinished="";
   269 		pullDuration="";
   270 		pullResult="";
   271 		pullResultCode="";
   272 		if [[ "$state" == "cloned" ]]; then
   273 			if   [[ "$vcsType" == "hg" ]];  then pullResult=$(hg pull --force --repository "$absolutePath" 2>&1); pullResultCode=$?;
   274 			elif [[ "$vcsType" == "git" ]]; then pullResult=$(git -C "$absolutePath" fetch 2>&1); pullResultCode=$?;
   275 			fi
   276 			pullFinished=$(date --iso-8601=s);
   277 			pullFinishedMiliseconds=$(($(date +%s%N)/1000000));
   278 			pullDuration=$(( $pullFinishedMiliseconds - $pullStartedMiliseconds ));
   279 		fi
   280 
   281 		printRecfileKeyValue "type"            "$vcsType";
   282 		printRecfileKeyValue "serverPath"      "$absolutePath";
   283 		printRecfileKeyValue "state"           "$state";
   284 		printRecfileKeyValue "started"         "$pullStarted";
   285 		printRecfileKeyValue "finished"        "$pullFinished";
   286 		printRecfileKeyValue "duration"        "$pullDuration";
   287 		printRecfileKeyValue "resultCode"      "$pullResultCode";
   288 		printRecfileKeyValue "message"         "$pullResult";
   289 		echo;
   290 	done
   291 }
   292 
   293 # Environment: server
   294 # User: root
   295 # should be called from cron (usually every day) after Pull (see above)
   296 vcs_backup_public_serverSnapshotCronTask() {
   297 	printRecfileKeyValue "%rec"  "snapshot";
   298 	printRecfileKeyValue "%type" "started date";
   299 	printRecfileKeyValue "%type" "finished date";
   300 	printRecfileKeyValue "%type" "duration int";
   301 	printRecfileKeyValue "%type" "resultCode int";
   302 	
   303 	allRepositories | while read d; do
   304 		state=$(cat "$VCS_BACKUP_CONFIG_DIR/$d/state.txt");
   305 		vcsType=$(echo "$d" | sed 's@/.*@@g');
   306 		absolutePath="$VCS_BACKUP_CURRENT_DIR/$d";
   307 
   308 		started=$(date --iso-8601=s);
   309 		startedMiliseconds=$(($(date +%s%N)/1000000));
   310 		finished="";
   311 		duration="";
   312 		result="";
   313 		resultCode="";
   314 		snapshotPath="";
   315 		if [[ "$state" == "cloned" ]]; then
   316 			snapshotPath="$VCS_BACKUP_SNAPSHOT_DIR/$d/$(date --iso-8601=date)";
   317 			mkdir -p $(dirname "$snapshotPath");
   318 			result=$(btrfs subvolume snapshot -r "$absolutePath" "$snapshotPath" 2>&1);
   319 			resultCode=$?;
   320 			finished=$(date --iso-8601=s);
   321 			finishedMiliseconds=$(($(date +%s%N)/1000000));
   322 			duration=$(( $finishedMiliseconds - $startedMiliseconds ));
   323 		fi
   324 
   325 		printRecfileKeyValue "currentPath"     "$absolutePath";
   326 		printRecfileKeyValue "snapshotPath"    "$snapshotPath";
   327 		printRecfileKeyValue "type"            "$vcsType";
   328 		printRecfileKeyValue "state"           "$state";
   329 		printRecfileKeyValue "started"         "$started";
   330 		printRecfileKeyValue "finished"        "$finished";
   331 		printRecfileKeyValue "duration"        "$duration";
   332 		printRecfileKeyValue "resultCode"      "$resultCode";
   333 		printRecfileKeyValue "message"         "$result";
   334 		echo;
   335 	done
   336 }
   337 
   338 # --- Single entry-point: --------------------------------------------------------------------------
   339 
   340 loadClientConfigFile;
   341 loadServerConfigFile;
   342 PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin";
   343 PUBLIC_FUNCTION_PREFIX="vcs_backup_public_";
   344 if type -t "$PUBLIC_FUNCTION_PREFIX$1" > /dev/null; then
   345 	"$PUBLIC_FUNCTION_PREFIX${@:1}";
   346 elif [[ $(basename $0) == "vcs-backup-clone-private-hg"  ]]; then "${PUBLIC_FUNCTION_PREFIX}clientSubmitBackupRequest" hg  "$1" private clone;
   347 elif [[ $(basename $0) == "vcs-backup-clone-private-git" ]]; then "${PUBLIC_FUNCTION_PREFIX}clientSubmitBackupRequest" git "$1" private clone;
   348 elif [[ $(basename $0) == "vcs-backup-clone-public-hg"   ]]; then "${PUBLIC_FUNCTION_PREFIX}clientSubmitBackupRequest" hg  "$1" public  clone;
   349 elif [[ $(basename $0) == "vcs-backup-clone-public-git"  ]]; then "${PUBLIC_FUNCTION_PREFIX}clientSubmitBackupRequest" git "$1" public  clone;
   350 else
   351 	echo "Unsupported sub-command: $1" >&2
   352 	echo "Available sub-commands:" >&2
   353 	declare -F | grep "$PUBLIC_FUNCTION_PREFIX" | sed "s/.*$PUBLIC_FUNCTION_PREFIX/  /g" >&2
   354 	exit 1;
   355 fi