Skip to content

Commit c3de7af

Browse files
authored
Add Khan Academy Downloader bash script (HarshCasper#839)
* Add Khan Academy Downloader bash script * Add instructions regarding dependencies Listed the dependencies that the user needs to run the script. * Add dependency installation instructions Added distro specific instructions for users not familiar with installation of packages. * Break multiple shell commands into separate lines
1 parent 1a3f912 commit c3de7af

File tree

2 files changed

+161
-0
lines changed

2 files changed

+161
-0
lines changed

Bash/KhanAcademy_Downloader/README.md

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# Khan Academy Downloader
2+
3+
[![forthebadge](https://forthebadge.com/images/badges/built-with-love.svg)](https://forthebadge.com)
4+
[![forthebadge](https://forthebadge.com/images/badges/open-source.svg)](https://forthebadge.com)
5+
[![Bash Shell](https://badges.frapsoft.com/bash/v1/bash.png?v=103)](https://github.com/ellerbrock/open-source-badges/)
6+
7+
Khan Academy Downloader is a bash script which fetches all the course names from the [Khan Academy website](https://www.khanacademy.org/) and displays a menu to select those from.
8+
The user can select an entry from that menu to download the corresponding video for offline watching.
9+
10+
## Setup and Usage Instructions
11+
12+
- Make sure you have got curl, perl, perl-HTML-parser, youtube-dl and coreutils (includes grep and sed) installed on your machine.
13+
- For Debian/Ubuntu:
14+
> sudo apt install perl-HTML-parser youtube-dl coreutils
15+
16+
- For RHEL/Fedora:
17+
> sudo dnf install perl coreutils
18+
19+
> pip3 install youtube-dl
20+
21+
> wget http://rpmfind.net/linux/RPM/mageia/cauldron/aarch64/media/core/release/perl-HTML-Parser-3.760.0-1.mga9.aarch64.html
22+
23+
> sudo dnf install http://rpmfind.net/linux/mageia/distrib/cauldron/aarch64/media/core/release/perl-HTML-Parser-3.760.0-1.mga9.aarch64.rpm
24+
25+
> sudo dnf install perl-HTML-Parser
26+
27+
- For Arch Linux:
28+
> sudo pacman -S perl-HTML-parser youtube-dl coreutils
29+
30+
- For running the script as a local user, just navigate to the directory where khanacademy-dl.sh resides, and execute the following command:
31+
32+
> bash khanacademy-dl.sh
33+
34+
- For running the script from anywhere, regardless of the current directory, copy the file **khanacademy-dl.sh** to **_/usr/local/bin/_**.
35+
36+
1. To do so execute the following command:
37+
38+
> sudo cp khanacademy-dl.sh /usr/local/bin
39+
40+
2. Then set the executable bit on the script:
41+
42+
> sudo chmod +x /usr/local/bin/khanacademy-dl.sh
43+
44+
3. You can now run the script using the following command:
45+
46+
> khanacademy-dl.sh
47+
48+
## Output
49+
50+
![sample-output](https://imgur.com/zV7HCU8.png)
51+
52+
## Author(s)
53+
54+
[Prateek Ganguli](https://github.com/pganguli)
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
#!/bin/bash
2+
3+
# commands used by this script
4+
DEPS=("grep" \
5+
"sed" \
6+
"curl" \
7+
"perl" \
8+
"youtube-dl")
9+
10+
# website to scrape listings from
11+
DOMAIN="https://www.khanacademy.org"
12+
13+
# check for the existence of dependencies
14+
check_commands() {
15+
for COMMAND in "$@"; do
16+
if ! command -v "$COMMAND" > /dev/null 2>&1; then
17+
echo "command not found: $COMMAND"
18+
exit 1
19+
fi
20+
done
21+
}
22+
23+
# show a general-purpose menu with numbered entries
24+
menu_entry() {
25+
for i in $(seq 1 $#); do
26+
# parse html escapes, such as '"' and '&' as '"' and '&' respectively
27+
echo " [$i]" "$(echo "${!i}" | perl -MHTML::Entities -pe 'decode_entities($_);')"
28+
done
29+
30+
read -p "Enter number to select: " SELECTION
31+
SELECTION=$(( $SELECTION - 1 ))
32+
}
33+
34+
# apply regex patterns to extract the list of topics
35+
extract_topics() {
36+
local IFS=$'\n'
37+
38+
REGEX_MATCH=$(curl -s "$DOMAIN" | grep -Po '>Courses</h3><ul>.*?</ul>' | grep -Po 'href="/[^/]*?"[^>]*?>[^<]+?<')
39+
TOPIC_TITLES=( $(echo "$REGEX_MATCH" | grep -Po '>[^<]+?<' | sed 's/[><]//g') )
40+
TOPICS=( $(echo "$REGEX_MATCH" | grep -Po 'href=".*?"' | sed 's/href="\///;s/"//') )
41+
}
42+
43+
# apply regex patterns to extract the list of subtopics
44+
extract_subtopics() {
45+
local IFS=$'\n'
46+
TOPIC="$1"
47+
48+
REGEX_MATCH=$(curl -s "$DOMAIN/$TOPIC" | grep -Po 'href="/'"$TOPIC"'/[^/"]*?">[^<]+?<')
49+
SUBTOPIC_TITLES=( $(echo "$REGEX_MATCH" | grep -Po '>[^<]+?<' | sed 's/[><]//g') )
50+
SUBTOPICS=( $(echo "$REGEX_MATCH" | grep -Po 'href=".*?"' | sed 's/href="\///;s/"//') )
51+
}
52+
53+
# apply regex patterns to extract the list of chapters
54+
extract_chapters() {
55+
local IFS=$'\n'
56+
SUBTOPIC="$1"
57+
58+
REGEX_MATCH=$(curl -s "$DOMAIN/$SUBTOPIC" | grep -Po 'href="/'"$SUBTOPIC"'/[^/#]*?"[^>]*?><h3.*?</h3>')
59+
CHAPTER_TITLES=( $(echo "$REGEX_MATCH" | grep -Po '>[^<]+?<' | sed 's/[><]//g') )
60+
CHAPTERS=( $(echo "$REGEX_MATCH" | grep -Po 'href=".*?"' | sed 's/href="\///;s/"//') )
61+
}
62+
63+
# invoke youtube-dl to list the videos under the discovered playlist
64+
extract_videos() {
65+
local IFS=$'\n'
66+
CHAPTER="$1"
67+
68+
VIDEO_TITLES=( $(youtube-dl --flat-playlist --get-title "$DOMAIN/$CHAPTER") )
69+
VIDEOS=( $(youtube-dl --get-id "$DOMAIN/$CHAPTER") )
70+
}
71+
72+
check_commands "${DEPS[@]}"
73+
74+
echo -e "\nFetching course topics..."
75+
extract_topics
76+
echo -e "Select topic:"
77+
menu_entry "${TOPIC_TITLES[@]}"
78+
79+
echo -e "\nFetching subtopics under \"${TOPIC_TITLES[$SELECTION]}\"..."
80+
extract_subtopics "${TOPICS[$SELECTION]}"
81+
echo -e "Select subtopic:"
82+
menu_entry "${SUBTOPIC_TITLES[@]}"
83+
84+
echo -e "\nFetching chapters under \"${SUBTOPIC_TITLES[$SELECTION]}\"..."
85+
extract_chapters "${SUBTOPICS[$SELECTION]}"
86+
echo -e "Select chapter:"
87+
menu_entry "${CHAPTER_TITLES[@]}"
88+
89+
echo -e "\nFetching videos under \"${CHAPTER_TITLES[$SELECTION]}\"..."
90+
extract_videos "${CHAPTERS[$SELECTION]}"
91+
echo -e "Select video:"
92+
93+
# some chapters have only text materials and no videos
94+
if [[ "${#VIDEO_TITLES[@]}" = "0" ]]; then
95+
echo -e "No videos found!"
96+
else
97+
menu_entry "${VIDEO_TITLES[@]}"
98+
99+
VIDEO_URL="https://youtube.com/watch?v=${VIDEOS[$SELECTION]}"
100+
echo -e "\nURL of selected video: $VIDEO_URL"
101+
read -p "Download video (y/n)? " SAVE
102+
if [[ "$SAVE" = 'y' || "$SAVE" = 'Y' ]]; then
103+
youtube-dl "$VIDEO_URL"
104+
else
105+
echo -e "Aborting..."
106+
fi
107+
fi

0 commit comments

Comments
 (0)