Python: inserting subtitles to Youtube videos on your wordpress site

What are we talking about?

This post could result a little bit complicated to get it, because it’s a personal way to solve a problem that I have in my blogs. I will try to make it as clearer as I can and I wish that you can take some useful tips from this code, in case you need something like this on your blog.

Can I put subtitles on Youtube video… on my site?

I had the need to put subtitles in italian on my site of interesting videos in english, so I started searching and I found this js code, but there was a lot to write …. start, ends, sum the seconds everytime. For videos of more than 3 minutes it was really time wasting. So I decided to make a script in python to facilitate this process. Now, I just have to create a txt file, write only the seconds… and it’s done. Let’s take a look at the code.

Version 1 of the code

In this code we use the script in https://sportelloautismo.github.io/js/sub16em.js that is this:

// javascript code to make the subtitles
// on youtube videos, just for your site

(function(root, factory) {
	// Set up YoutubeExternalSubtitle appropriately for the environment. Start with AMD.
	if (typeof define === 'function' && define.amd) {
		define(['exports'], function(exports) {
			// Export global even in AMD case in case this script is loaded with
			// others that may still expect a global YoutubeExternalSubtitle.
			root.YoutubeExternalSubtitle = factory(root, exports);
		});
	}
	// Next for CommonJS.
	else if (typeof exports !== 'undefined') {
		factory(root, exports);
	}
	// Finally, as a browser global.
	else {
		root.YoutubeExternalSubtitle = factory(root, {});
	}
}(this, function(root, YoutubeExternalSubtitle) {
	'use strict';

	var iframeApiLoaded = function() {
		return !!(root.YT && root.YT.Player);
	};

	var proxy = function(func, context) {
		return function() {
			return func.apply(context, arguments);
		};
	};

	var addClass = function(element, cls) {
		if (!hasClass(element, cls)) {
			element.className += (element.className ? ' ' : '') + cls;
		}
	};

	var hasClass = function(element, cls) {
		return element.className.match(new RegExp('(\\s|^)' + cls + '(\\s|$)'));
	};

	var removeClass = function(element, cls) {
		if (hasClass(element, cls)) {
			var reg = new RegExp('(\\s|^)' + cls + '(\\s|$)');

			element.className = element.className.replace(reg, ' ');
		}
	};

	var getYouTubeIDFromUrl = function(url) {
		var match = url.match(/^.*((youtu.be\/)|(v\/)|(\/u\/\w\/)|(embed\/)|(watch\?))\??v?=?([^#\&\?]*).*/);

		if (match && match[7].length === 11) {
			return match[7];
		}
		else {
			return false;
		}
	};

	var addQueryStringParameterToUrl = function(url, qsParameters) {
		var hashIndex = url.indexOf('#');
		var hash      = '';

		if (hashIndex !== -1) {
			hash = url.substr(hashIndex);
			url  = url.substr(0, hashIndex);
		}

		var qsIndex = url.indexOf('?');
		var qs      = '';

		if (qsIndex !== -1) {
			qs  = url.substr(qsIndex);
			url = url.substr(0, qsIndex);
		}

		for (var i in qsParameters) {
			qs += (qs === '' ? '?' : '&') + i + '=' + qsParameters[i];
		}

		return url + qs + hash;
	};

	var getCacheName = function(seconds) {
		return Math.floor(seconds / 10);
	};

	var buildCache = function(subtitles) {
		var cache = {};

		for (var i in subtitles) {
			var subtitle = subtitles[i];

			var startCache = getCacheName(subtitle.start);
			var endCache = getCacheName(subtitle.end);

			if (!cache[startCache]) {
				cache[startCache] = [];
			}

			cache[startCache].push(subtitle);

			if (startCache !== endCache) {
				if (!cache[endCache]) {
					cache[endCache] = [];
				}

				cache[endCache].push(subtitle);
			}
		}

		return cache;
	};

	var iframeApiScriptAdded = function() {
		var scripts = root.document.getElementsByTagName('script');

		for (var i = 0; i < scripts.length; i++) {
			var src = scripts[i].src;

			if (src && src.indexOf('youtube.com/iframe_api') !== -1) {
				return true;
			}
		}

		return false;
	};

	var loadIframeApi = function(cb) {
		if (iframeApiLoaded()) {
			cb();

			return;
		}

		var iframeApiInterval = setInterval(function() {
			if (iframeApiLoaded()) {
				clearInterval(iframeApiInterval);

				cb();
			}
		}, 100);

		if (!iframeApiScriptAdded()) {
			var tag = root.document.createElement('script');
			tag.src = 'https://www.youtube.com/iframe_api';
			var firstScriptTag = root.document.getElementsByTagName('script')[0];
			firstScriptTag.parentNode.insertBefore(tag, firstScriptTag);
		}
	};

	var fullscreenChangeHandler = function(e) {
		var fullscreenElement = root.document.fullscreenElement ||
			root.document.webkitFullscreenElement ||
			root.document.webkitCurrentFullScreenElement ||
			root.document.mozFullScreenElement ||
			root.document.msFullscreenElement;

		var subtitles = root.document.getElementsByClassName('youtube-external-subtitle');

		if (fullscreenElement) {
			if (fullscreenElement.youtubeExternalSubtitle) {
				for (var i = 0; i < subtitles.length; i++) {
					if (subtitles[i] === fullscreenElement.youtubeExternalSubtitle.element) {
						addClass(subtitles[i], 'fullscreen');

						setTimeout(function() {
							fullscreenElement.youtubeExternalSubtitle.render();
						}, 0);
					}
					else {
						addClass(subtitles[i], 'fullscreen-ignore');
					}
				}
			}
		}
		else {
			for (var i = 0; i < subtitles.length; i++) {
				if (hasClass(subtitles[i], 'fullscreen')) {
					removeClass(subtitles[i], 'fullscreen');

					subtitles[i].parentFrame.youtubeExternalSubtitle.render();
				}
				else {
					removeClass(subtitles[i], 'fullscreen-ignore');
				}
			}
		}
	};

	var firstInit = function() {
		var style       = root.document.createElement('style');
		style.id        = 'youtube-external-subtitle-style';
		style.type      = 'text/css';
		style.innerHTML = ".youtube-external-subtitle { position: absolute; display: none; z-index: 0; pointer-events: none; color: #fff; font-family: Arial, 'Helvetica Neue', Helvetica, sans-serif; font-size: 1.6em; text-align: center; } .youtube-external-subtitle span { background: #000; opacity:0.6; padding: 1px 4px; display: inline-block; margin-bottom: 2px; margin:auto; max-width: 500px; } .youtube-external-subtitle.fullscreen-ignore { display: none !important; } .youtube-external-subtitle.fullscreen { z-index: 3000000000; }";

		var head = root.document.getElementsByTagName('head')[0] || root.document.documentElement;
		head.insertBefore(style, head.firstChild);

		root.document.addEventListener('fullscreenchange', fullscreenChangeHandler);
		root.document.addEventListener('webkitfullscreenchange', fullscreenChangeHandler);
		root.document.addEventListener('mozfullscreenchange', fullscreenChangeHandler);
		root.document.addEventListener('MSFullscreenChange', fullscreenChangeHandler);
	};

	var Subtitle = YoutubeExternalSubtitle.Subtitle = function(iframe, subtitles) {
		this.subtitle           = null;
		this.cache              = null;
		this.timeChangeInterval = 0;
		this.player             = null;
		this.videoId            = null;
		this.element            = null;

		if (iframe.youtubeExternalSubtitle) {
			throw new Error('YoutubeExternalSubtitle: subtitle is already added for this element');
		}

		iframe.youtubeExternalSubtitle = this;

		if (!root.document.getElementById('youtube-external-subtitle-style')) {
			firstInit();
		}

		var newSrc = iframe.src;

		if (newSrc.indexOf('enablejsapi=1') === -1) {
			newSrc = addQueryStringParameterToUrl(newSrc, { enablejsapi: '1' });
		}

		if (newSrc.indexOf('html5=1') === -1) {
			newSrc = addQueryStringParameterToUrl(newSrc, { html5: '1' });
		}

		if (iframe.src !== newSrc) {
			iframe.src = newSrc;
		}

		if (subtitles) {
			this.cache = buildCache(subtitles);
		}

		loadIframeApi(proxy(function() {
			this.player  = new root.YT.Player(iframe);
			this.videoId = this.getCurrentVideoId();

			this.element = root.document.createElement('div');
			addClass(this.element, 'youtube-external-subtitle');

			this.element.parentFrame = iframe;

			iframe.parentNode.insertBefore(this.element, iframe.nextSibling);

			this.player.addEventListener('onStateChange', proxy(this.onStateChange, this));
		}, this));
	};

	Subtitle.prototype.load = function(subtitles) {
		this.cache = buildCache(subtitles);
	};

	Subtitle.prototype.start = function() {
		this.stop();

		this.timeChangeInterval = setInterval(proxy(this.onTimeChange, this), 500);
	};

	Subtitle.prototype.stop = function() {
		clearInterval(this.timeChangeInterval);
	};

	Subtitle.prototype.destroy = function() {
		this.stop();

		this.element.parentNode.removeChild(this.element);

		this.player.getIframe().youtubeExternalSubtitle = null;
	};

	Subtitle.prototype.getCurrentVideoId = function() {
		var videoUrl = this.player.getVideoEmbedCode().match(/src="(.*?)"/)[1];

		return getYouTubeIDFromUrl(videoUrl);
	};

	Subtitle.prototype.onStateChange = function(e) {
		if (this.videoId !== this.getCurrentVideoId()) {
			return;
		}

		if (e.data === root.YT.PlayerState.PLAYING) {
			this.start();
		}
		else if (e.data === root.YT.PlayerState.PAUSED) {
			this.stop();
		}
		else if (e.data === root.YT.PlayerState.ENDED) {
			this.stop();

			this.subtitle = null;

			this.render();
		}
	};

	Subtitle.prototype.onTimeChange = function() {
		var subtitle = this.getSubtitleFromCache(this.player.getCurrentTime());

		if (this.subtitle === subtitle) {
			return;
		}

		this.subtitle = subtitle;

		this.render();
	};

	Subtitle.prototype.getSubtitleFromCache = function(seconds) {
		if (!this.cache) {
			return null;
		}

		var cache = this.cache[getCacheName(seconds)];

		if (!cache) {
			return null;
		}

		for (var i in cache) {
			if (seconds >= cache[i].start && seconds <= cache[i].end) {
				return cache[i];
			}
		}

		return null;
	};

	Subtitle.prototype.render = function() {
		if (this.subtitle === null) {
			this.element.style.display = '';
		}
		else {
			var iframe = this.player.getIframe();

			var frame = {
				x      : iframe.offsetLeft - iframe.scrollLeft + iframe.clientLeft,
				y      : iframe.offsetTop - iframe.scrollTop + iframe.clientTop,
				width  : iframe.offsetWidth,
				height : iframe.offsetHeight
			};

			this.element.innerHTML = '<span>' + this.subtitle.text.replace(/(?:\r\n|\r|\n)/g, '</span><br /><span>') + '</span>';
			this.element.style.display = 'block';
			this.element.style.top = (frame.y + frame.height - 60 - this.element.offsetHeight) + 'px';
			this.element.style.left = (frame.x + (frame.width - this.element.offsetWidth) / 2) + 'px';
		}
	};

	return YoutubeExternalSubtitle;

}));

 

This is the python code to make the subtitles in an easy way. When the program starts you must enter the name of the txt file where you put the subtitles, like this

cVaYAetyxO0
- lo stratagemma per tagliare i capelli
04 A volte bisogna mettersi a terra per fare il proprio lavoro
07 Franz Jacob non lo ha imparato alla scuola per barbieri
09 lo ha fatto per istinto
14 il suo giovane cliente Wyatt è un bambino con autismo
16 a volte  è un'impresa farlo stare fermo sulla sedia
21 ho osservato il suo comportamento
24 e l'ho solo assecondato
28 non è la prima volta che facciamo così
35 ogni volta che viene c'è sempre una 'fase' sul pavimento
38 ecco Wyatt con un bel taglio e un sorriso
02 è tutto per oggi

As you see in the text file above, the first thing is the address of the youtube video, only the final part of it. Then there is an optional part with a sort of comment and then the subtitle with the seconds. If the speech does not start in the first minute, you can write x1x30 for example to indicate that it starts at 1’30”… the same thing can be done if it happens in other parts of the video. When the video is long, this code is handy because you do not have to sum the seconds.

The following code will read the text file and transform it into the right javascript code to be copied in the wordpress page (or any other type of web page).

# REGOLE sub6_2_jumpGUI.py --------- ESEMPIO ----------
'''
OUbqytVF-F0
nella riga 1 se scrivo x12x00 comincia al minuto 12, se non inizia per x ignora la riga e comincia da 0:0
03 traduzione di Giovanni Gatto
11 ci avete chiesto spesso cosa usa Abigail per comunicare
16 a causa della mancanza di alcune abilità motorie e per renderglielo più semplice
x3x00 salta al minuto 3
19 il linguaggio dei segni è leggermente modificato
20 per andare incontro a questo
22 ok, sei pronta?
24 sì, sei pronta?
# --------------- questo è un commento---------------------------
'''

import tkinter as tk


def createSub(aotv, tots):
    'creates the subtitles in the way that the js script accepts'
    indirizzo_video = aotv          # Address Of The Video
    sub_txt = tots                  # Text Of The Subtitles
    subtitle = 'subtitle' + sub_txt  # this makes the subtitle unique in the web page in case of more videos
    video = "video" + sub_txt       # this make a unique referrement to the video
    sub_txt = sub_txt + ".txt"      # adding .txt to the name of the file of the subtitles
    sub_js = "sub_" + sub_txt + ".js"  # not used; I must implement the choice to choose the type of subtitles

    print("<iframe id=\"" + video + "\" width=\"100%\" height=\"500\" src=\"https://www.youtube.com/embed/" + indirizzo_video + "\" frameborder=\"0\" allowfullscreen></iframe>")
    print("""
    <!-- subtit_em.js subtut_1m.js subtit_2m.js (non funziona?) sub16em.js -->
        """)
    print("<script src=\"https://sportelloautismo.github.io/js/sub16em.js\"></script>")
    print("<script>\nvar " + subtitle + " = [")
    with open(sub_txt, encoding="utf-8") as file:
        testo = file.readlines()
        for line in range(len(testo)):
            if line == 0:
                continue
            elif line == 1:
                # Alla prima linea ci può essere l'indicazione di partire da un tempo
                # diverso da x0x0, ad esempio 10x12
                # altrimenti, se non c'è x si parte da 0
                if testo[line][:1] == "x":
                    jump = testo[line].split("x")
                    min = int(jump[1])
                    sec = int(jump[2])
                # Se non c'è la x nella prima riga, si inizia da zero
                else:
                    min = 0
                    sec = 0
                continue
            # Quando siamo oltre la prima linea, se si incontra una x si salta al temp x5x30 ad esempio
            elif testo[line][:1] == "x":
                jump = testo[line].split("x")
                min = int(jump[1])
                sec = int(jump[2])
                continue
            elif testo[line][:1] == "#":
                # this is used to write the time or other stuff the program ignore
                continue
            s1 = min * 60 + sec
            sec2 = int(testo[line][:2])
            if sec2 < sec:
                min += 1
            e1 = min * 60 + sec2
            text = testo[line].rstrip("\n")
            # text = text.replace("\"","\'")
            print("{\"start\": " + str(s1) + ".1,")
            print("\"end\" :" + str(e1) + '.0,')
            text = text.replace("\"", "'")
            print("\"text\" :\"" + text[3:] + "\"},")
            sec = sec2

    print("];\nvar youtubeExternalSubtitle = new YoutubeExternalSubtitle.Subtitle(document.getElementById('" + video + "'), " + subtitle + ");\n</script>")

# INTERFACCIA GRAFICA


root = tk.Tk()
root.title("Subtitle Maker")
root.geometry("250x150")
root.configure(bg='green')
var = tk.StringVar()
label = tk.Message(root, textvariable=var, relief=tk.RAISED, bg='yellow')
var.set("insert file and press enter")
label.pack()

# LEGGE IL NOME DEL VIDEO NEL FILE


def start():
    with open(vartesto.get() + ".txt") as file:
        a = file.readline().strip()
    varvideo.set(a)
    createSub(varvideo.get(), vartesto.get())


varvideo = tk.StringVar()
entryvid = tk.Entry(root, textvariable=varvideo, bg='pink')
entryvid.pack()

vartesto = tk.StringVar()
entry = tk.Entry(root, textvariable=vartesto, bg='lightblue', bd=4)
entry.pack()
entry.focus_set()

root.bind("<Return>", lambda x: start())
root.mainloop()

In this example, you will have this:

Write the name of the txt file in the second text box without the .txt
<iframe id="videobarbiere" width="100%" height="500" src="https://www.youtube.com/embed/cVaYAetyxO0" frameborder="0" allowfullscreen></iframe>

    <!-- subtit_em.js subtut_1m.js subtit_2m.js (non funziona?) sub16em.js -->
        
<script src="https://sportelloautismo.github.io/js/sub16em.js"></script>
<script>
var subtitlebarbiere = [
{"start": 0.1,
"end" :4.0,
"text" :"A volte bisogna mettersi a terra per fare il proprio lavoro"},
{"start": 4.1,
"end" :7.0,
"text" :"Franz Jacob non lo ha imparato alla scuola per barbieri"},
{"start": 7.1,
"end" :9.0,
"text" :"lo ha fatto per istinto"},
{"start": 9.1,
"end" :14.0,
"text" :"il suo giovane cliente Wyatt è un bambino con autismo"},
{"start": 14.1,
"end" :16.0,
"text" :"a volte  è un'impresa farlo stare fermo sulla sedia"},
{"start": 16.1,
"end" :21.0,
"text" :"ho osservato il suo comportamento"},
{"start": 21.1,
"end" :24.0,
"text" :"e l'ho solo assecondato"},
{"start": 24.1,
"end" :28.0,
"text" :"non è la prima volta che facciamo così"},
{"start": 28.1,
"end" :35.0,
"text" :"ogni volta che viene c'è sempre una 'fase' sul pavimento"},
{"start": 35.1,
"end" :38.0,
"text" :"ecco Wyatt con un bel taglio e un sorriso"},
];
var youtubeExternalSubtitle = new YoutubeExternalSubtitle.Subtitle(document.getElementById('videobarbiere'), subtitlebarbiere);
</script>

As you ca see it is way more complicated than the txt file. If it last more than one minute you do not have to write the minute, because the computer when sees that the seconds are lower than the previous number adds a minute to it, so you have to write only the seconds of every minutes. As I said before, if there is a pause that is longer than a minute, you add x2x35 (for example) to say that there is a jump and then in the next line continue to write the seconds of the end of the first subtitle after that (for example 39).

Take a look at the video to understand better how this works. I used it a lot, so it works pretty fine. If you want to modify the way the subtitle works, you have to modify the js file.

https://youtu.be/sRc2dZrzmVY

The latest version of the code

To make the program easier, I have put a listbox into the GUI so that the user can choose the text files with the subtitles from a list. Now you do not have to rember exactly the name of the text file with the subtitles that you created. You just need a click. I used the bind method and the <<ListboxSelect>> event listener to do this. To know what I selected I used the curselection()[0] method of listbox (memorized in a variable) and then, to get the string of the text file I chose, I used the other method of listbox, get(), with the variable as argument (a little tricky to remember, but we talked about this in the listbox post).

# REGOLE sub6_2_jumpGUI.py --------- ESEMPIO ----------
'''
OUbqytVF-F0
nella riga 1 se scrivo x12x00 comincia al minuto 12, se non inizia per x ignora la riga e comincia da 0:0
03 traduzione di Giovanni Gatto
11 ci avete chiesto spesso cosa usa Abigail per comunicare
16 a causa della mancanza di alcune abilità motorie e per renderglielo più semplice
x3x00 salta al minuto 3
19 il linguaggio dei segni è leggermente modificato
20 per andare incontro a questo
22 ok, sei pronta?
24 sì, sei pronta?
# --------------- questo è un commento---------------------------
'''
# subtitles4.py: try to make a list of the txt file so that you just have to select one instead of writing the title
import tkinter as tk
import glob

def createSub(aotv, tots):
    'creates the subtitles in the way that the js script accepts'
    indirizzo_video = aotv          # Address Of The Video
    sub_txt = tots                  # Text Of The Subtitles
    subtitle = 'subtitle' + sub_txt  # this makes the subtitle unique in the web page in case of more videos
    video = "video" + sub_txt       # this make a unique referrement to the video
    sub_txt = sub_txt + ".txt"      # adding .txt to the name of the file of the subtitles
    sub_js = "sub_" + sub_txt + ".js"  # not used; I must implement the choice to choose the type of subtitles

    # ========================== codice_wp lista del codice html da visualizzare ==========
    codice_wp = []
    codice_wp.append("<iframe id=\"" + video + "\" width=\"100%\" height=\"500\" src=\"https://www.youtube.com/embed/" + indirizzo_video + "\" frameborder=\"0\" allowfullscreen></iframe>")
    codice_wp.append("""
    <!-- subtit_em.js subtut_1m.js subtit_2m.js (non funziona?) sub16em.js -->
        """)
    codice_wp.append("<script src=\"https://sportelloautismo.github.io/js/sub16em.js\"></script>")
    codice_wp.append("<script>\nvar " + subtitle + " = [")

    with open(sub_txt, encoding="utf-8") as file:
        testo = file.readlines()
        for line in range(len(testo)):
            if line == 0:
                continue
            elif line == 1:
                # Alla prima linea ci può essere l'indicazione di partire da un tempo
                # diverso da x0x0, ad esempio 10x12
                # altrimenti, se non c'è x si parte da 0
                if testo[line][:1] == "x":
                    jump = testo[line].split("x")
                    min = int(jump[1])
                    sec = int(jump[2])
                # Se non c'è la x nella prima riga, si inizia da zero
                else:
                    min = 0
                    sec = 0
                continue
            # Quando siamo oltre la prima linea, se si incontra una x si salta al temp x5x30 ad esempio
            elif testo[line][:1] == "x":
                jump = testo[line].split("x")
                min = int(jump[1])
                sec = int(jump[2])
                continue
            elif testo[line][:1] == "#":
                # this is used to write the time or other stuff the program ignore
                continue
            s1 = min * 60 + sec
            sec2 = int(testo[line][:2])
            if sec2 < sec:
                min += 1
            e1 = min * 60 + sec2
            text = testo[line].rstrip("\n")
            # text = text.replace("\"","\'")
            codice_wp.append("{\"start\": " + str(s1) + ".1,")
            codice_wp.append("\"end\" :" + str(e1) + '.0,')
            text = text.replace("\"", "'")
            codice_wp.append("\"text\" :\"" + text[3:] + "\"},")
            sec = sec2


    codice_wp.append("];\nvar youtubeExternalSubtitle = new YoutubeExternalSubtitle.Subtitle(document.getElementById('" + video + "'), " + subtitle + ");\n</script>")

    for code in codice_wp:
        print(code)
# INTERFACCIA GRAFICA


root = tk.Tk()
root.title("Subtitle Maker")
root.geometry("250x150")
root.configure(bg='green')
var = tk.StringVar()
label = tk.Message(root, textvariable=var, relief=tk.RAISED, bg='yellow')
var.set("insert file and press enter")
label.pack()

# LEGGE IL NOME DEL VIDEO NEL FILE


def start():
    with open(vartesto.get() + ".txt") as file:
        a = file.readline().strip()
    varvideo.set(a)
    createSub(varvideo.get(), vartesto.get())


varvideo = tk.StringVar()
entryvid = tk.Entry(root, textvariable=varvideo, bg='pink')
entryvid.pack()

vartesto = tk.StringVar()
entry = tk.Entry(root, textvariable=vartesto, bg='lightblue', bd=4)
entry.pack()
entry.focus_set()


def start2():
    lc = listbox.curselection()[0]
    vartesto.set(listbox.get(lc)[:-4])
    start()

listbox = tk.Listbox(root)
listbox.pack()
for txt in glob.glob("*.txt"):
    listbox.insert(0, txt)

listbox.bind("<<ListboxSelect>>", lambda x: start2())

text = tk.Text(root)
text.pack()

root.bind("<Return>", lambda x: start())
root.mainloop()

 

Utilities

Published by pythonprogramming

Started with basic on the spectrum, loved javascript in the 90ies and python in the 2000, now I am back with python, still making some javascript stuff when needed.