import axios from "axios";
import { parseTar, parseTarGzip } from "nanotar";

const tagRegex = /([^0-9]?[0-9]+)-?([0-9]+)?/;
async function annotateAudio(html, audioInfo) {
	console.log("annotateAudio " + audioInfo);
	let phrases = [];
	let timings = [];
	try {
		if (!audioInfo.startsWith("http")) audioInfo = "testfiles/" + audioInfo; // get tar file holding phrases and timings
		console.log("audioInfo " + audioInfo);
		const response = await axios.get(audioInfo, {
			responseType: "arraybuffer"
		});
		console.log("response.status " + response.status);
		const data = await response.data;
		console.log(data);
		console.log("data.length " + data.byteLength);
		let files;
		if (audioInfo.endsWith(".tar")) files = parseTar(data);
		else files = await parseTarGzip(data);
		console.log("files.length " + files.length);
		if (files.length < 2) {
			console.error("failed to get " + audioInfo);
			return;
		}
		files.sort((a, b) => {
			return a.name < b.name ? -1 : 1;
		});
		for (let i = 0; i < files.length; i += 1) {
			const file = files[i]; // .type, .name, .size, .data=>uint8array, .text=>String, .attrs
			const name = file.name;
			let contents = file.text.split("\n"); // might leave blank line at end
			if (name.indexOf("aeneas") > 0 || name.indexOf("phrases") > 0)
				phrases[phrases.length] = contents;
			else timings[timings.length] = removeTimingHeaders(contents);
		}
	} catch (error) {
		console.error("Error fetching audio phrases");
		return;
	}
	return addAudioTags(html, phrases, timings);
}
function addAudioTags(html, allphrases, timings) {
	let index = 0;
	let result = "";
	// let count = 0;
	let separator = allphrases[0][0].indexOf("|") > 0 ? "|" : "\t"; // 1st chapter, 1st phrase
	let tagFixes = ["z", "y", "x", "w"];
	// audio is per chapter, and phrases is per chapter
	for (let chap = 1; chap <= allphrases.length; chap += 1) {
		let tags = []; // keep track of used tags to correct for duplicates from aeneas
		let fixes = tagFixes;
		let chapStart = index;
		let prevlen = result.length;
		let phrases = allphrases[chap - 1];
		// let unclosed = null;
		for (let iphrase = 0; iphrase < phrases.length; iphrase += 1) {
			let phrase = phrases[iphrase];
			// if (unclosed) {
			// 	// console.log("looking for unclosed " + unclosed);
			// 	let parts = phrase.split(separator);
			// 	let found = parts[1].indexOf(unclosed);
			// 	if (found >= 0) {
			// 		unclosed = null;
			// 		if (found >= parts[1].length - 1) continue; // phrase only has closing symbol
			// 		phrase = parts[0] + separator + parts[1].substring(found + 1);
			// 	}
			// }
			let iopen = phrase.indexOf("["); // remove all [12a,12d] type verse markers from generated phrase
			while (iopen > 0) {
				phrase =
					phrase.substring(0, iopen) +
					phrase.substring(phrase.indexOf("]") + 1);
				iopen = phrase.indexOf("["); // might be more than one
			}
			iopen = phrase.indexOf("\\f "); // remove footnotes from words
			if (iopen >= 0) {
				console.log("footnote at index " + iopen + ": " + phrase);
				let ft = phrase.indexOf("\\ft", iopen + 2);
				let iclose = phrase.indexOf("\\f", ft + 3); // unfortunately aeneas discards '*' so closing tag is ambiguous
				if (ft > iopen && iclose > ft) {
					console.log("close \f " + iclose);
					let before = phrase.substring(0, iopen);
					phrase =
						iclose < phrase.length - 2
							? before // footnote consumed rest of this line
							: before + phrase.substring(iclose + 2); // go past \f
				} else {
					console.warn("addAudioTags having trouble with phrase " + phrase);
				}
				// console.log("after removal " + phrase);
			}
			if (!phrase || phrase.length < 2) continue; // empty last phrase
			// console.log(phrase);
			let parts = phrase.split(separator);
			let tag = parts[0];
			if (parts.length < 2 || parts[1].length <= 1) continue; // nothing to find
			if (tags.includes(tag)) {
				console.log("duplicate tag " + tag + " in chapter " + chap);
				let tparts = timings[chap - 1][iphrase].split("\t");
				if (tparts[2] === tag) {
					tag += fixes.pop(); // fix duplicate tag
					tparts[2] = tag;
					timings[chap - 1][iphrase] = tparts.join("\t"); // fix in timings
				} else console.warn("unable to fix duplicate tag");
			}
			tags[tags.length] = tag;
			let words = parts[1].split(" ");
			let [start, next] = findWords(html, index, words);
			result += html.substring(index, start);
			const toEmbed = html.substring(start, next);
			result += '<span id="' + chap + "-" + tag + '">' + toEmbed + "</span>";
			// count += 1;
			index = next;
			// console.log(result.substring(result.length - 200));
		}
		console.log(
			"chap " +
				chap +
				": " +
				(index - chapStart) +
				" => " +
				(result.length - prevlen)
		); // diagnostic to detect where something went wrong
	}
	// console.log("annotateAudio: " + count + " spans added");
	if (index < html.length) result += html.substring(index); // the rest of the file, if any
	return [result, timings];
}
function removeTimingHeaders(timings) {
	let len = timings.length;
	if (timings[timings.length - 1].length === 0) len -= 1;
	for (let i = 0; i < timings.length; i += 1) {
		if (!timings[i].startsWith("\\")) return timings.slice(i, len);
	}
}

function findWords(html, index, words) {
	let special = ["sfm-too", "sfm-ver", 'sfm-c">', "tooltip"]; // clipped to 7 chars
	if (words[0].startsWith("(")) {
		// console.log("( at " + index);
		// console.log(words);
	}
	let next = index;
	let istart;
	let iword;
	for (let i = 0; i < words.length; i += 1) {
		let word = words[i]; // protect against span between word and parens left by aeneas
		if (word.startsWith("(")) word = word.substring(1);
		if (word.endsWith(")")) word = word.substring(0, word.length - 1);
		let limit = 0;
		while (next < html.length && limit < 10) {
			limit += 1;
			let iangle = html.indexOf("<", next);
			iword = html.indexOf(word, next);
			if (iword < iangle && iword > 0) {
				// if (iword - next > 20) // this happens when skipping tooltext footnotes
				// console.warn("skipping over: " + html.substring(next, iword));
				next = iword + word.length;
				break;
			}
			// skip over html that isn't text of interest
			let iclose = html.indexOf(">", iangle + 1) + 1;
			let test = html.substring(iangle, iangle + 19);
			if (test.startsWith("<span class=")) {
				if (test.substring(12) in special) {
					// look for spans with content to skip
					iangle = html.indexOf("<span", iangle + 12); // check for span in span
					let iclose = html.indexOf("</span>", iangle + 12) + 7;
					if (iangle < iclose) {
						// 2 deep for footnotes
						iclose = html.indexOf("</span>", iclose + 7) + 7;
					}
				}
			} else iclose = html.indexOf(">", iangle + 1) + 1;
			// console.log("skipping over " + html.substring(next, iclose));
			next = iclose;
		}
		if (i === 0) istart = iword;
		// console.log("found " + word + " at " + iword);
	}
	if (next <= 0) next = html.length;
	// console.log("findWords returning " + istart + ", " + next);
	return [istart, next];
}

function verseToTimingTag(v, timings) {
	if (v === "1:1") {
		let entry = timings[0][0]; // special case, go to first timing entry
		let tag = entry.split("\t")[2];
		return "1-" + tag;
	}
	const [chap, vers] = v.split(":");
	let iv = parseInt(vers);
	console.log(v);

	let chapTimings = timings[parseInt(chap) - 1]; // one file [lines] per chapter
	for (let i = 0; i < chapTimings.length; i += 1) {
		let timing = chapTimings[i];
		let parts = timing.split("\t"); // start, end, tag
		let tag = parts[2];
		let match = tag.match(tagRegex);
		if (match[2]) {
			if (parseInt(match[1]) > iv || iv > parseInt(match[2])) continue;
		} else {
			if (match[1] !== vers) continue;
		}
		return chap + "-" + tag;
	}
	console.log("verseToTimingTag failed to find " + v);
	return null;
}

function timingTagToTime(tag, timings) {
	const indx = tag.indexOf("-");
	let chap = tag.substring(0, indx);
	let vers = tag.substring(indx + 1);
	let chapTimings = timings[parseInt(chap) - 1];
	for (let i = 0; i < chapTimings.length; i += 1) {
		let timing = chapTimings[i];
		let parts = timing.split("\t");
		if (parts[2] === vers) return parseFloat(parts[0]);
	}
	console.log("timingTagToTime failed for " + tag);
	return null;
}

function timeToTimingTag(time, timings) {
	for (let ic = 0; ic < timings.length; ic += 1) {
		let ctimings = timings[ic];
		let chapEnd = parseFloat(ctimings[ctimings.length - 1].split("\t")[1]);
		if (time > chapEnd) continue;
		for (let i = 0; i < ctimings.length; i++) {
			let timing = ctimings[i];
			let parts = timing.split("\t");
			let end = parseFloat(parts[1]);
			if (time < end) return ic + 1 + "-" + parts[2];
		}
	}
	return null;
}

function getNextTagChange(tag, time, timings) {
	console.log("getNextTagChange after " + tag + " " + time);
	let parts;
	let end;
	let iclip = parseInt(tag.split("-")[0]);
	let ctimings = timings[iclip - 1];
	for (let i = 0; i < ctimings.length; i++) {
		let timing = ctimings[i];
		parts = timing.split("\t");
		let start = parseFloat(parts[0]);
		if (time > start) continue;
		return [iclip + "-" + parts[2], start];
	}
	end = parseFloat(parts[1]); // end time of current tag's clip
	if (iclip < timings.length - 1) {
		let timing = timings[iclip][0]; // next clip
		parts = timing.split("\t");
		let nextstart = parseFloat(parts[0]);
		let newtag = iclip + 1 + "-" + parts[2];
		return [newtag, end, nextstart]; // extra field when changing clips / chapters: duration of current phrase
	}
	return [null, end, 0]; // end of last (current) clip in book
}

export {
	annotateAudio,
	addAudioTags,
	verseToTimingTag,
	timingTagToTime,
	timeToTimingTag,
	getNextTagChange
};
