#Refresh Cache v0.6a #****************** #To Do: #1.) DONE for TV - Over 20 episodes / Over 10 episodes. #2.) DONE - Radio Cache. #3.) DONE - Episodes / Series numbers. #4.) DONE - Remove first line of Cache file. #5.) http://www.bbc.co.uk/iplayer/ion/atoz/format/json/service_type/tv/masterbrand/bbc_one/letter/a/ # http://www.bbc.co.uk/iplayer/ion/atoz/format/json/service_type/tv/letter/b/ #6. Merge with existing cache file. use Date::Parse; use POSIX; use LWP 5.64; # Loads all important LWP classes, and makes # sure your version is reasonably recent. $browser = LWP::UserAgent->new; print "\n\n\nIPlayer Refresh Cache v0.6a\n"; print "**************************\n\n"; # Parameters $ProgramFiles = $ENV{'ProgramFiles(x86)'}; #print "Directory for Program Files(x86): ".$ProgramFiles."\n"; $Get_iPlayerPath = $ProgramFiles.'/Get-iPlayer'; #print "Directory for Get_iPlayer files: ".$Get_iPlayerPath."\n"; $PerlPath = $Get_iPlayerPath.'/Perl.exe'; print "Directory for Perl.exe: ".$PerlPath."\n"; $UserProfile = $ENV{'USERPROFILE'}; #print "Directory for User Files: ".$UserProfile."\n"; $Get_iPlayer_Cache_Path = $UserProfile.'/.get_iplayer'; #print "Directory for iPlayer Cache File: ".$Get_iPlayer_Cache_Path."\n"; $Refreshed_Cache = $Get_iPlayer_Cache_Path."/tv.cache"; $UnRefreshed_Cache = $Get_iPlayer_Cache_Path."/tv.cache"; print "Directory for TV Cache File: ".$Refreshed_Cache."\n"; $Refreshed_Radio_Cache = $Get_iPlayer_Cache_Path."/radio.cache"; print "Directory for Radio Cache File: ".$Refreshed_Radio_Cache."\n"; $index_number = 0; #Cache Format: index|type|name|pid|available|expires|episode|seriesnum|episodenum|versions|duration|desc|channel|categories|thumbnail|timeadded|guidance|web $Cache[$index_number++] = "#index|type|name|pid|available|expires|episode|seriesnum|episodenum|versions|duration|desc|channel|categories|thumbnail|timeadded|guidance|web\n"; $Read_Cache[$index_number++] = "#index|type|name|pid|available|expires|episode|seriesnum|episodenum|versions|duration|desc|channel|categories|thumbnail|timeadded|guidance|web\n"; $existing_progs = 0; #Main print "\n\n\n"; print '(1) Please type "t" or "T" for TV CACHE'."\n\n"; print 'or'."\n\n"; print '(2) Please type "r" or "R" for RADIO CACHE'."\n\n\n"; print 'Your choice? '; $menu_choice = <>; if ($menu_choice eq "t\n"){ print "\n\n".'You selected TV CACHE...'."\n\n"; &get_TV_cache; }elsif ($menu_choice eq "T\n"){ print "\n\n".'You selected TV CACHE...'."\n\n"; &get_TV_cache; }elsif ($menu_choice eq "r\n"){ print "\n\n".'You selected RADIO CACHE...'."\n\n"; &get_RADIO_cache; }elsif ($menu_choice eq "R\n"){ print "\n\n".'You selected RADIO CACHE...'."\n\n"; &get_RADIO_cache; }else { print "\n\n".'I do not understand???...'."\n\n"; } print "\n\n\n".'Exiting...Have a very nice day!'."\n\n\n"; sub get_TV_cache { my $m; print "\n\n".'This requires about 20 minutes to complete...'."\n\n"; &read_tv_cache; @all_shows = &search_all_tv_shows; open REFRESHED_CACHE_FILE, ">", $Refreshed_Cache or die "Couldn't open ".$Refreshed_Cache; print REFRESHED_CACHE_FILE "#index|type|name|pid|available|expires|episode|seriesnum|episodenum|versions|duration|desc|channel|categories|thumbnail|timeadded|guidance|web\n"; for $j (0 .. $#all_shows) { @all_episodes = &search_ePIDs_for_TV_show($all_shows[$j]); for $i (0 .. $#all_episodes) { $existing = 0; $existing = &match_PID_in_cachefile($all_episodes[$i][0]); $name = $all_episodes[$i][2]; if (index($name, '.') != -1) { substr ($name, index($name, '.')) = ""; } #if (index($name, ':') != -1) { # substr ($name, index($name, ':')) = ""; #} $episode = $all_episodes[$i][5]; if (index($episode, 'Series ') != -1) { if (index($episode, ':') != -1) { substr ($episode, 0, index($episode, ':')+1) = ""; } } $name = &remove_spaces($name); $series = &get_series_string($name); $series_num = ""; if ($series =~ /Series/ ) { $series_num = &get_num($series); } if ($name =~ /.*\,/ ) { $name = &remove_comma_and_after($name); } if ($series =~ /Series/ ) { $name = "$name: $series"; } $episode = &remove_spaces($episode); $episode_num = &get_num($episode); $all_episodes[$i][3] = &remove_spaces($all_episodes[$i][3]); $all_episodes[$i][3] = &strip_nl($all_episodes[$i][3]); $all_episodes[$i][4] = &remove_spaces($all_episodes[$i][4]); $all_episodes[$i][6] = &remove_spaces($all_episodes[$i][6]); $all_episodes[$i][6] = &tv_or_web($all_episodes[$i][6]); $all_episodes[$i][8] = &calc_available_until($all_episodes[$i][7], $all_episodes[$i][8], $all_episodes[$i][9], $all_episodes[$i][10]); $all_episodes[$i][10] = $all_episodes[$i][10] *= 60; $time_added = time(); print REFRESHED_CACHE_FILE $index_number++ .'|'; #index if ($existing eq 0 ) { print REFRESHED_CACHE_FILE "tv" .'|'; #type print REFRESHED_CACHE_FILE $name .'|'; #name print REFRESHED_CACHE_FILE $all_episodes[$i][0] .'|'; #ePID print REFRESHED_CACHE_FILE $all_episodes[$i][8] .'|'; #available print REFRESHED_CACHE_FILE str2time($all_episodes[$i][8]).'|'; #expiry print REFRESHED_CACHE_FILE $episode .'|'; #episode print REFRESHED_CACHE_FILE $series_num .'|'; #seriesnum print REFRESHED_CACHE_FILE $episode_num .'|'; #episodenum print REFRESHED_CACHE_FILE "default" .'|'; #versions print REFRESHED_CACHE_FILE $all_episodes[$i][10] .'|'; #duration print REFRESHED_CACHE_FILE $all_episodes[$i][3] .'|'; #desc print REFRESHED_CACHE_FILE $all_episodes[$i][6] .'|'; #channel print REFRESHED_CACHE_FILE '|'; #categories print REFRESHED_CACHE_FILE $all_episodes[$i][4] .'|'; #thumbnail print REFRESHED_CACHE_FILE $time_added .'|'; #timeadded print REFRESHED_CACHE_FILE '|'; #guidance print REFRESHED_CACHE_FILE "http://www.bbc.co.uk/iplayer/episodes/".$all_episodes[$i][1].'|'; #web print REFRESHED_CACHE_FILE "\n"; #new line } else { for $m (1 .. 16) { print REFRESHED_CACHE_FILE $Read_Cache[$existing][$m]; if ($m < 16) { print REFRESHED_CACHE_FILE '|'; } else { print REFRESHED_CACHE_FILE "\n"; } } } } } close REFRESHED_CACHE_FILE; } sub get_RADIO_cache { print "\n\n".'This requires a few hours to complete...'."\n\n"; @all_episodes = &search_all_radio_shows; $index_number = 10001; open REFRESHED_CACHE_FILE, ">", $Refreshed_Radio_Cache or die "Couldn't open ".$Refreshed_Radio_Cache; for $i (0 .. $#all_episodes) { $name = $all_episodes[$i][2]; if (index($name, '.') != -1) { substr ($name, index($name, '.')) = ""; } if (index($name, ':') != -1) { substr ($name, index($name, ':')) = ""; } $episode = $all_episodes[$i][5]; if (index($episode, 'Series ') != -1) { if (index($episode, ':') != -1) { substr ($episode, 0, index($episode, ':')+1) = ""; } } $name = &remove_spaces($name); $episode = &remove_spaces($episode); $all_episodes[$i][3] = &remove_spaces($all_episodes[$i][3]); $all_episodes[$i][4] = &remove_spaces($all_episodes[$i][4]); $all_episodes[$i][6] = &remove_spaces($all_episodes[$i][6]); $time_added = time(); print REFRESHED_CACHE_FILE $index_number++ .'|'; #index print REFRESHED_CACHE_FILE "radio" .'|'; #type print REFRESHED_CACHE_FILE $name .'|'; #name print REFRESHED_CACHE_FILE $all_episodes[$i][0] .'|'; #ePID print REFRESHED_CACHE_FILE "Unknown" .'|'; #available print REFRESHED_CACHE_FILE "Unknown" .'|'; #expiry print REFRESHED_CACHE_FILE $episode .'|'; #episode print REFRESHED_CACHE_FILE $index_number .'|'; #seriesnum print REFRESHED_CACHE_FILE $index_number .'|'; #episodenum print REFRESHED_CACHE_FILE "default," .'|'; #versions print REFRESHED_CACHE_FILE "Unknown" .'|'; #duration print REFRESHED_CACHE_FILE $all_episodes[$i][3] .'|'; #desc print REFRESHED_CACHE_FILE $all_episodes[$i][6] .'|'; #channel print REFRESHED_CACHE_FILE '|'; #categories print REFRESHED_CACHE_FILE $all_episodes[$i][4] .'|'; #thumbnail print REFRESHED_CACHE_FILE $time_added .'|'; #timeadded print REFRESHED_CACHE_FILE '|'; #guidance print REFRESHED_CACHE_FILE "http://www.bbc.co.uk/programmes/".$all_episodes[$i][0].'|'; #web print REFRESHED_CACHE_FILE "\n"; #new line } close REFRESHED_CACHE_FILE; } sub search_all_tv_shows { my (@ops) = @_; my $i = 0; my @all_shows; my @AtoZIndex = ('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','0-9'); print "\n\n\nSearching for list of all TV shows\n\n"; print "Search Results:\n"; for my $j (0 .. $#AtoZIndex) { # for my $j (0 .. 2) { my $url = 'http://www.bbc.co.uk/iplayer/a-z/'.$AtoZIndex[$j]; print '"'.$AtoZIndex[$j].'" shows have been searched.'."\n"; my $result = $browser->get($url); die "Can't GET $url" if (! defined $result); my $html = $result->content; while ($html =~ /\/iplayer\/brand\//) { #print "Found Show - ", ; #my $sPID = substr ($html, index($html, '/iplayer/brand/') + 15, 8); #print "$sPID\n"; $all_shows[$i++] = substr ($html, index($html, '/iplayer/brand/') + 15, 8); substr ($html, index($html, '/iplayer/brand/'), 13) = ""; } } print "\nFound ".$i." shows\n\n\n"; return @all_shows; } sub search_all_radio_shows { my (@ops) = @_; my $i = 0; my @all_shows; my @all_episodes; my @more_episodes; my @AtoZIndex = ('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','0','1','2','3','4','5','6','7','8','9'); my $page_number = 1; my $url = 'http://www.bbc.co.uk/radio/programmes/a-z/by/'; my $search_url = '/programmes/a-z/by/'; my $result = ''; my $ePID = ''; my $html = ''; my $html_section = ''; my $found_more_shows = 'No'; my $channel = ''; print "\n\n\nSearching for list of all RADIO shows\n\n"; print "Search Results:\n"; for my $j (0 .. $#AtoZIndex) { # for my $j (0 .. 1) { $page_number = 1; while ($page_number != -1) { $url = 'http://www.bbc.co.uk/radio/programmes/a-z/by/'.$AtoZIndex[$j].'/current?page='; $url = $url.$page_number; $result = $browser->get($url); die "Can't GET $url" if (! defined $result); $ePID = ""; $html = $result->content; $found_more_shows = 'No'; while ($html =~ /data-pid=/) { substr ($html, 0, index($html, 'data-pid=') + 9) = ""; $ePID = substr ($html, index($html, '"')+1, 8); if (index ($html,'data-pid=') != -1) { $html_section = substr ($html, 0, index($html, 'data-pid=')); } else { $html_section = $html; } if (index ($html_section, 'Show available episodes') != -1) { @more_episodes = &search_ePIDs_for_RADIO_show($ePID); $channel = &extract_string($html_section, '', ''); for my $k (0 .. $#more_episodes) { $all_episodes[$i][0] = $more_episodes[$k][0]; #store ePID print "\n".$all_episodes[$i][0].' - '; print $channel.' - '; $all_episodes[$i][1] = $more_episodes[$k][1]; #store sPID #print $all_episodes[$i][1].' - '; $all_episodes[$i][2] = &extract_string($html_section, 'property="name">', ''); print $all_episodes[$i][2]; $all_episodes[$i][3] = $more_episodes[$k][3]; #print $all_episodes[$i][3]; $all_episodes[$i][4] = $more_episodes[$k][4]; #print $all_episodes[$i][4]; $all_episodes[$i][5] = $more_episodes[$k][5]; #print $all_episodes[$i][5]; $all_episodes[$i][6] = $channel; $found_more_shows = 'Yes'; $all_shows[$i++] = $ePID; } print "\n"; } elsif (index ($html_section, 'programme__availability') != -1) { $channel = &extract_string($html_section, '', ''); $all_episodes[$i][0] = $ePID; #store ePID print "\n".$all_episodes[$i][0].' - '; print $channel.' - '; $all_episodes[$i][1] = ''; #store sPID #print $all_episodes[$i][1].' - '; $all_episodes[$i][2] = &extract_string($html_section, 'property="name">', ''); print $all_episodes[$i][2]; $all_episodes[$i][3] = &extract_string($html_section, 'property="description">', ''); #print $all_episodes[$i][3]; $all_episodes[$i][4] = &extract_string($html_section, 'data-img-src-68="', '"'); #print $all_episodes[$i][4]; $all_episodes[$i][5] = ''; #print $all_episodes[$i][5]; $all_episodes[$i][6] = $channel; $found_more_shows = 'Yes'; $all_shows[$i++] = $ePID; } else { $ePID = $ePID.'!!'; $found_more_shows = 'Yes'; } } print "\n".'"'.$AtoZIndex[$j].'" shows have been searched. (Page '.$page_number++.")\n\n"; $search_url = 'radio/programmes/a-z/by/'.$AtoZIndex[$j].'/current?page='; $search_url = $search_url.$page_number; if (index ($html, $search_url) == -1) { $page_number = -1; #STOP! } if ($found_more_shows eq 'No') { $page_number = -1; #STOP! } } } #print "\nFound ".$i." shows\n\n\n"; return @all_episodes; } sub search_ePIDs_for_TV_show { my (@ops) = @_; my $i = 0; my $url; my $result; my @all_episodes; my $ePID = ''; my $pages = "1"; my $max_pages = "1"; my $html; my $html_1; my $html_2; my $html_3; my $html_4; my $html_5; my $html_6; my $html_7; my $html_8; my $html_9; my $channel; #print "\n\n\nSearching for episodes (sPID: $ops[0])\n\n"; #print "Search Results:\n"; do { $url = "http://www.bbc.co.uk/iplayer/episodes/$ops[0]?page=$pages"; $result = $browser->get($url); die "Can't GET $url" if (! defined $result); $html = $result->content; $html_1 = $html; $html_2 = $html; $html_3 = $html; $html_4 = $html; $html_5 = $html; #My changes :) $html_6 = $html; $html_7 = $html; $html_8 = $html; $html_9 = $html; if ( $pages eq "1" ) { $max_pages = substr $html, index($html, 'data-page-total="'), 20; $max_pages = &get_num($max_pages); #print " max pages ".$max_pages; } #end my changes $channel = &extract_string($html_1, 'class="master-brand">', ''); #print "Channel:".$channel."*\n"; while ($html =~ /\/iplayer\/episode\//) { #print "Found PID - ", ; $ePID = substr $html, index($html, '/iplayer/episode/') + 17, 8; $all_episodes[$i][0] = $ePID; #store ePID print $all_episodes[$i][0]." - "; $all_episodes[$i][1] = $ops[0]; #store sPID #print $all_episodes[$i][1]." - "; substr ($html_2, 0, index($html_2, 'data-ip-id="') + 12) = ""; $all_episodes[$i][2] = &extract_string($html_2, 'title="', '"'); print $all_episodes[$i][2]."\n"; substr ($html_3, 0, index($html_3, 'data-ip-id="') + 12) = ""; $all_episodes[$i][3] = &extract_string($html_3, 'class="synopsis">', '

'); #print $all_episodes[$i][3]."\n"; substr ($html_4, 0, index($html_4, 'data-ip-id="') + 12) = ""; $all_episodes[$i][4] = &extract_string($html_4, 'data-ip-src="', '"'); #print $all_episodes[$i][4]."\n"; substr ($html_5, 0, index($html_5, 'data-ip-id="') + 12) = ""; $all_episodes[$i][5] = &extract_string($html_5, 'class="subtitle">', ''); #print $all_episodes[$i][5]."\n\n"; #Begin my changes substr ($html_6, 0, index($html_6, 'data-ip-id="') + 12) = ""; $all_episodes[$i][7] = &extract_string($html_6, 'First shown: ', ''); #print $all_episodes[$i][7].", "; substr ($html_7, 0, index($html_7, 'data-ip-id="') + 12) = ""; $all_episodes[$i][8] = &extract_string($html_7, 'Available until ', '"'); #print $all_episodes[$i][8].", "; substr ($html_8, 0, index($html_8, 'data-ip-id="') + 12) = ""; $all_episodes[$i][9] = &extract_string($html_8, 'class="availability-duration">', ''); #print $all_episodes[$i][9].", "; substr ($html_9, 0, index($html_9, 'data-ip-id="') + 12) = ""; $all_episodes[$i][10] = &extract_string($html_9, 'Duration', 'mins'); $all_episodes[$i][10] = &get_num($all_episodes[$i][10]); #print $all_episodes[$i][10]."\n"; #end my changes $all_episodes[$i++][6] = $channel; substr ($html, index($html, '/iplayer/episode/'), 15) = ""; } $html = ""; $pages += "1"; }until($pages gt $max_pages); #print "Found ".(($#all_episodes)+1)." episode(s)\n\n"; return @all_episodes,; } sub search_ePIDs_for_RADIO_show { my (@ops) = @_; my $i = 0; my @all_episodes; my $ePID = ''; my $html_section = ''; #print "\n\n\nSearching for episodes (sPID: $ops[0])\n\n"; #print "Search Results:\n"; my $url = "http://www.bbc.co.uk/programmes/$ops[0]/episodes/player"; #http://www.bbc.co.uk/programmes/b00srz5b/episodes/player print "\n".$url; my $result = $browser->get($url); die "Can't GET $url" if (! defined $result); my $html = $result->content; while ($html =~ /data-pid=/) { substr ($html, 0, index($html, 'data-pid=') + 9) = ""; $ePID = substr ($html, index($html, '"') + 1, 8); if (index ($html,'data-pid=') != -1) { $html_section = substr ($html, 0, index($html, 'data-pid=')); } else { $html_section = $html; } $all_episodes[$i][0] = $ePID; #store ePID #print "\n".$all_episodes[$i][0]." - "; $all_episodes[$i][1] = $ops[0]; #store sPID #print $all_episodes[$i][1]." - "; $all_episodes[$i][2] = ''; #print $all_episodes[$i][2]; $all_episodes[$i][3] = &extract_string($html_section, 'property="description">', ''); #print $all_episodes[$i][3]; $all_episodes[$i][4] = &extract_string($html_section, 'data-img-src-68="', '"'); #print $all_episodes[$i][4]; $all_episodes[$i][5] = &extract_string($html_section, '', ''); $all_episodes[$i][5] = $all_episodes[$i][5].': '.&extract_string($html_section, '; }else{ $available_date = $available_until; $available_date =~ /\d\d:\d\d (.*)/; $available_date = "$1"; $available_day = $available_date; $available_day =~ /(\d\d?)/; $available_day = "$1"; $available_month = $available_date; $available_month =~ /([A-Z][a-z]+)/; $available_month = "$1"; $available_month = &month_to_number($available_month); $available_year = $available_date; $available_year =~ /(\d{4})/; $available_year = "$1"; } if ($available_until eq "") { $available_time = "23:59:59"; }else{ $available_time = $available_until; $available_time =~ /(\d\d:\d\d)/; $available_time = "$1".":00"; } #print "\n"; #print "\nAvailable until; ".$available_until; #print "day "; #print $available_day; #print "month"; #print $available_month; #print "year"; #print $available_year; #print "T"; #print $available_time; #print "\n"; $result = $available_year.'-'.$available_month.'-'.$available_day.'T'.$available_time.'Z'; #print str2time($result); return $result ; } sub month_to_number { my (@ops) = @_; my $month; my %mon2num; my $mon2num; $month = $ops[0]; %mon2num = qw( jan 1 feb 2 mar 3 apr 4 may 5 jun 6 jul 7 aug 8 sep 9 oct 10 nov 11 dec 12 ); return $mon2num{ lc substr($month, 0, 3) }; } sub read_tv_cache { my $i; print"\nReading Existing TV cache file\n"; open UNREFRESHED_CACHE_FILE, "<", $UnRefreshed_Cache or die "Couldn't open ".$UnRefreshed_Cache; #my @lines = ; for (my $j = 0; ; $j++) { chomp; $Read_Cache[$j] = [ split /\|/ ]; #shift @{$Read_Cache[$j]}; # Remove the line label } for ($i = 0; $i < scalar(@Read_Cache); $i++) { for (my $j = 0; $j < scalar(@{$Read_Cache[$i]}); $j++) { #print "\nCache entry $Read_Cache[$i][3] index $i , $j\n"; } } $existing_progs = $i; #print "$existing_progs programmes in old file"; close UNREFRESHED_CACHE_FILE } sub match_PID_in_cachefile { my (@ops) = @_; my $find_id = $ops[0]; my ($code, $string ); my $i; my $match = 0; #print "\nsearch ".$find_id; for $i (1 .. $existing_progs) { #print " compare $Read_Cache[$i][3] index $i"; #print "\n"; if ($find_id eq $Read_Cache[$i][3]) { $match = $i; #print "Matched PID $match"; } else { #print "No match!\n"; } } return $match; }