#!/bin/bash

# it's:learning screen scraping script
#     by Vidar 'koala_man' Holen
# 
# currently it only supports the ntnu style login

#User config:
USER=""
PASS=""

#List of subjects to ignore. Set blank to not ignore anything
IGNORES="IT3704 TDT4125 TDT4500 TDT4505 TMA4150 TBA4852"

#System stuff
LATIN1=en_US

#Note, can't just replace lynx for links due to the different rendering :P 
DUMPCOMMAND="lynx -force-html -width=1000 -nolist -dump "

if ! [ "$BASH_VERSION" ]
then
	echo "You do not appear to be running bash. This script uses a bunch of "
	echo "fairly bash specific features. Won't continue. "
	exit 1
fi

if ! [[ $PASS ]] 
then
	echo "Due to Vidar's lazy coding, you have to specify the username/password"
	echo "by editing this script file. "
	exit 1
fi

if ! type lynx &> /dev/null
then
	echo "You do not appear to have lynx installed (used for rendering html),"
	echo "can't continue."
	exit 1
fi

#Not config:
TMP="`mktemp -d`"

die() {
	echo "$@"
	exit 1
}

[[ -d $TMP ]] || die "mktemp failed"

UA="Mozilla/5.0 (X11; Linux i686; en-US) Gecko/20070907 Firefox/2.0.3"
LOGINURL="https://innsida.ntnu.no/sso/?target=itslearning"
#MAINFRAME="https://www.itslearning.com/mainframe.aspx?Course=&amp;CPHFrame=1&amp;item="
MAINFRAME="https://www.itslearning.com/main/mainmenu.aspx"
COURSE="https://www.itslearning.com/Main.aspx?CourseID="
COURSE2="https://www.itslearning.com/course/course.aspx"

curl -s -A "$UA" -k -u "$USER:$PASS" -c "$TMP/cookies" -o "$TMP/login" -L -D "$TMP/login-header" "$LOGINURL" -o "$TMP/main" "$MAINFRAME"

#curl -s -A "$UA" -k -u "$USER:$PASS" -c "$TMP/cookies" -o "$TMP/login" -L -D "$TMP/login-header" "$LOGINURL" -o "$TMP/main2" -D "$TMP/main2-head" "https://www.itslearning.com/main/mainmenu.aspx"


N=0

while read code name
do
	CODE[$N]="$code"
	NAME[$N]="$name"
	echo "$code with $name" | iconv -f latin1 1>&2
	let N++
done < <(LC_CTYPE=$LATIN1 sed -n 's/.*CourseID=\([0-9]*\).*<span>\([^<]*\).*/\1 \2/p' $TMP/main)


echo "<html>"
echo '<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=iso-8859-1">'
echo "<title>it's:lollin</title>"

echo "<table border=1>"
echo "<tr>"

for((I=0; I<N; I++))
do
	for IGN in $IGNORES
	do 
		[[ ${NAME[$I]} == *$IGN* ]] && continue 2
	done
	echo "<td width=\"$((100/N))%\"><h3>${NAME[$I]}</h3></td>"
done

echo "</tr><tr>"
for((I=0; I<N; I++))
do
	for IGN in $IGNORES
	do 
		[[ ${NAME[$I]} == *$IGN* ]] && continue 2
	done

	curl -s -A "$UA" -b "$TMP/cookies" -c "$TMP/cookies" -L -D "$TMP/course-header$I" -o "$TMP/course-$I" "$COURSE${CODE[I]}"  
	curl -s -A "$UA" -b "$TMP/cookies" -c "$TMP/cookies" -L -D "$TMP/course2-header$I" -o "$TMP/course2-$I" "$COURSE2"

	#This is kind of ugly. We dump the page with lynx, then we do some simple
	#markup on the result.
	echo "<td valign=top>"
	LC_CTYPE=$LATIN1 $DUMPCOMMAND "$TMP/course2-$I" | LC_CTYPE=$LATIN1 awk 'substr($0,1,1) != " " && $0 != "" { N++; I=1;}  N>2 && $0 != "" { if(I==1) print "<hr><b>",$0,"</b><p>"; else print $0,"<br>"; I=0;}'
	echo "</td>"
done
echo "</tr>"
echo "</table>"

rm "$TMP"/*
rmdir "$TMP"



