#Refresh Cache v0.5 #****************** #To Do: #1.) Over 20 episodes / Over 10 episodes. #2.) DONE - Radio Cache. #3.) Episodes / Series numbers. #4.) DONE - Remove first line of Cache file. #5.) http://www.bbc.co.uk/iplayer/ion/atoz/format/json/service_type/tv/masterbrand/bbc_one/letter/a/ # http://www.bbc.co.uk/iplayer/ion/atoz/format/json/service_type/tv/letter/b/ use LWP 5.64; # Loads all important LWP classes, and makes # sure your version is reasonably recent. $browser = LWP::UserAgent->new; print "\n\n\nIPlayer Refresh Cache v0.5\n"; print "**************************\n\n"; # Parameters $ProgramFiles = $ENV{'ProgramFiles(x86)'}; #print "Directory for Program Files(x86): ".$ProgramFiles."\n"; $Get_iPlayerPath = $ProgramFiles.'/Get-iPlayer'; #print "Directory for Get_iPlayer files: ".$Get_iPlayerPath."\n"; $PerlPath = $Get_iPlayerPath.'/Perl.exe'; print "Directory for Perl.exe: ".$PerlPath."\n"; $UserProfile = $ENV{'USERPROFILE'}; #print "Directory for User Files: ".$UserProfile."\n"; $Get_iPlayer_Cache_Path = $UserProfile.'/.get_iplayer'; #print "Directory for iPlayer Cache File: ".$Get_iPlayer_Cache_Path."\n"; $Refreshed_Cache = $Get_iPlayer_Cache_Path."/tv.cache"; print "Directory for TV Cache File: ".$Refreshed_Cache."\n"; $Refreshed_Radio_Cache = $Get_iPlayer_Cache_Path."/radio.cache"; print "Directory for Radio Cache File: ".$Refreshed_Radio_Cache."\n"; $index_number = 0; #Cache Format: index|type|name|pid|available|episode|seriesnum|episodenum|versions|duration|desc|channel|categories|thumbnail|timeadded|guidance|web $Cache[$index_number++] = "#index|type|name|pid|available|episode|seriesnum|episodenum|versions|duration|desc|channel|categories|thumbnail|timeadded|guidance|web\n"; #Main print "\n\n\n"; print '(1) Please type "t" or "T" for TV CACHE'."\n\n"; print 'or'."\n\n"; print '(2) Please type "r" or "R" for RADIO CACHE'."\n\n\n"; print 'Your choice? '; $menu_choice = <>; if ($menu_choice eq "t\n"){ print "\n\n".'You selected TV CACHE...'."\n\n"; &get_TV_cache; }elsif ($menu_choice eq "T\n"){ print "\n\n".'You selected TV CACHE...'."\n\n"; &get_TV_cache; }elsif ($menu_choice eq "r\n"){ print "\n\n".'You selected RADIO CACHE...'."\n\n"; &get_RADIO_cache; }elsif ($menu_choice eq "R\n"){ print "\n\n".'You selected RADIO CACHE...'."\n\n"; &get_RADIO_cache; }else { print "\n\n".'I do not understand???...'."\n\n"; } print "\n\n\n".'Exiting...Have a very nice day!'."\n\n\n"; sub get_TV_cache { print "\n\n".'This requires about 20 minutes to complete...'."\n\n"; @all_shows = &search_all_tv_shows; open REFRESHED_CACHE_FILE, ">", $Refreshed_Cache or die "Couldn't open ".$Refreshed_Cache; for $j (0 .. $#all_shows) { @all_episodes = &search_ePIDs_for_TV_show($all_shows[$j]); for $i (0 .. $#all_episodes) { $name = $all_episodes[$i][2]; if (index($name, '.') != -1) { substr ($name, index($name, '.')) = ""; } if (index($name, ':') != -1) { substr ($name, index($name, ':')) = ""; } $episode = $all_episodes[$i][5]; if (index($episode, 'Series ') != -1) { if (index($episode, ':') != -1) { substr ($episode, 0, index($episode, ':')+1) = ""; } } $name = &remove_spaces($name); $episode = &remove_spaces($episode); $all_episodes[$i][3] = &remove_spaces($all_episodes[$i][3]); $all_episodes[$i][4] = &remove_spaces($all_episodes[$i][4]); $all_episodes[$i][6] = &remove_spaces($all_episodes[$i][6]); $time_added = time(); print REFRESHED_CACHE_FILE $index_number++ .'|'; #index print REFRESHED_CACHE_FILE "tv" .'|'; #type print REFRESHED_CACHE_FILE $name .'|'; #name print REFRESHED_CACHE_FILE $all_episodes[$i][0] .'|'; #ePID print REFRESHED_CACHE_FILE "Unknown" .'|'; #available print REFRESHED_CACHE_FILE $episode .'|'; #episode print REFRESHED_CACHE_FILE $index_number .'|'; #seriesnum print REFRESHED_CACHE_FILE $index_number .'|'; #episodenum print REFRESHED_CACHE_FILE "default," .'|'; #versions print REFRESHED_CACHE_FILE "Unknown" .'|'; #duration print REFRESHED_CACHE_FILE $all_episodes[$i][3] .'|'; #desc print REFRESHED_CACHE_FILE $all_episodes[$i][6] .'|'; #channel print REFRESHED_CACHE_FILE '|'; #categories print REFRESHED_CACHE_FILE $all_episodes[$i][4] .'|'; #thumbnail print REFRESHED_CACHE_FILE $time_added .'|'; #timeadded print REFRESHED_CACHE_FILE '|'; #guidance print REFRESHED_CACHE_FILE "http://www.bbc.co.uk/iplayer/episodes/".$all_episodes[$i][1].'|'; #web print REFRESHED_CACHE_FILE "\n"; #new line } } close REFRESHED_CACHE_FILE; } sub get_RADIO_cache { print "\n\n".'This requires a few hours to complete...'."\n\n"; @all_episodes = &search_all_radio_shows; $index_number = 10001; open REFRESHED_CACHE_FILE, ">", $Refreshed_Radio_Cache or die "Couldn't open ".$Refreshed_Radio_Cache; for $i (0 .. $#all_episodes) { $name = $all_episodes[$i][2]; if (index($name, '.') != -1) { substr ($name, index($name, '.')) = ""; } if (index($name, ':') != -1) { substr ($name, index($name, ':')) = ""; } $episode = $all_episodes[$i][5]; if (index($episode, 'Series ') != -1) { if (index($episode, ':') != -1) { substr ($episode, 0, index($episode, ':')+1) = ""; } } $name = &remove_spaces($name); $episode = &remove_spaces($episode); $all_episodes[$i][3] = &remove_spaces($all_episodes[$i][3]); $all_episodes[$i][4] = &remove_spaces($all_episodes[$i][4]); $all_episodes[$i][6] = &remove_spaces($all_episodes[$i][6]); $time_added = time(); print REFRESHED_CACHE_FILE $index_number++ .'|'; #index print REFRESHED_CACHE_FILE "radio" .'|'; #type print REFRESHED_CACHE_FILE $name .'|'; #name print REFRESHED_CACHE_FILE $all_episodes[$i][0] .'|'; #ePID print REFRESHED_CACHE_FILE "Unknown" .'|'; #available print REFRESHED_CACHE_FILE $episode .'|'; #episode print REFRESHED_CACHE_FILE $index_number .'|'; #seriesnum print REFRESHED_CACHE_FILE $index_number .'|'; #episodenum print REFRESHED_CACHE_FILE "default," .'|'; #versions print REFRESHED_CACHE_FILE "Unknown" .'|'; #duration print REFRESHED_CACHE_FILE $all_episodes[$i][3] .'|'; #desc print REFRESHED_CACHE_FILE $all_episodes[$i][6] .'|'; #channel print REFRESHED_CACHE_FILE '|'; #categories print REFRESHED_CACHE_FILE $all_episodes[$i][4] .'|'; #thumbnail print REFRESHED_CACHE_FILE $time_added .'|'; #timeadded print REFRESHED_CACHE_FILE '|'; #guidance print REFRESHED_CACHE_FILE "http://www.bbc.co.uk/programmes/".$all_episodes[$i][0].'|'; #web print REFRESHED_CACHE_FILE "\n"; #new line } close REFRESHED_CACHE_FILE; } sub search_all_tv_shows { my (@ops) = @_; my $i = 0; my @all_shows; my @AtoZIndex = ('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','0-9'); print "\n\n\nSearching for list of all TV shows\n\n"; print "Search Results:\n"; for my $j (0 .. $#AtoZIndex) { # for my $j (0 .. 2) { my $url = 'http://www.bbc.co.uk/iplayer/a-z/'.$AtoZIndex[$j]; print '"'.$AtoZIndex[$j].'" shows have been searched.'."\n"; my $result = $browser->get($url); die "Can't GET $url" if (! defined $result); my $html = $result->content; while ($html =~ /\/iplayer\/brand\//) { #print "Found Show - ", ; #my $sPID = substr ($html, index($html, '/iplayer/brand/') + 15, 8); #print "$sPID\n"; $all_shows[$i++] = substr ($html, index($html, '/iplayer/brand/') + 15, 8); substr ($html, index($html, '/iplayer/brand/'), 13) = ""; } } print "\nFound ".$i." shows\n\n\n"; return @all_shows; } sub search_all_radio_shows { my (@ops) = @_; my $i = 0; my @all_shows; my @all_episodes; my @more_episodes; my @AtoZIndex = ('a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','0','1','2','3','4','5','6','7','8','9'); my $page_number = 1; my $url = 'http://www.bbc.co.uk/radio/programmes/a-z/by/'; my $search_url = '/programmes/a-z/by/'; my $result = ''; my $ePID = ''; my $html = ''; my $html_section = ''; my $found_more_shows = 'No'; my $channel = ''; print "\n\n\nSearching for list of all RADIO shows\n\n"; print "Search Results:\n"; for my $j (0 .. $#AtoZIndex) { # for my $j (0 .. 1) { $page_number = 1; while ($page_number != -1) { $url = 'http://www.bbc.co.uk/radio/programmes/a-z/by/'.$AtoZIndex[$j].'/current?page='; $url = $url.$page_number; $result = $browser->get($url); die "Can't GET $url" if (! defined $result); $ePID = ""; $html = $result->content; $found_more_shows = 'No'; while ($html =~ /data-pid=/) { substr ($html, 0, index($html, 'data-pid=') + 9) = ""; $ePID = substr ($html, index($html, '"')+1, 8); if (index ($html,'data-pid=') != -1) { $html_section = substr ($html, 0, index($html, 'data-pid=')); } else { $html_section = $html; } if (index ($html_section, 'Show available episodes') != -1) { @more_episodes = &search_ePIDs_for_RADIO_show($ePID); $channel = &extract_string($html_section, '', ''); for my $k (0 .. $#more_episodes) { $all_episodes[$i][0] = $more_episodes[$k][0]; #store ePID print "\n".$all_episodes[$i][0].' - '; print $channel.' - '; $all_episodes[$i][1] = $more_episodes[$k][1]; #store sPID #print $all_episodes[$i][1].' - '; $all_episodes[$i][2] = &extract_string($html_section, 'property="name">', ''); print $all_episodes[$i][2]; $all_episodes[$i][3] = $more_episodes[$k][3]; #print $all_episodes[$i][3]; $all_episodes[$i][4] = $more_episodes[$k][4]; #print $all_episodes[$i][4]; $all_episodes[$i][5] = $more_episodes[$k][5]; #print $all_episodes[$i][5]; $all_episodes[$i][6] = $channel; $found_more_shows = 'Yes'; $all_shows[$i++] = $ePID; } print "\n"; } elsif (index ($html_section, 'programme__availability') != -1) { $channel = &extract_string($html_section, '', ''); $all_episodes[$i][0] = $ePID; #store ePID print "\n".$all_episodes[$i][0].' - '; print $channel.' - '; $all_episodes[$i][1] = ''; #store sPID #print $all_episodes[$i][1].' - '; $all_episodes[$i][2] = &extract_string($html_section, 'property="name">', ''); print $all_episodes[$i][2]; $all_episodes[$i][3] = &extract_string($html_section, 'property="description">', ''); #print $all_episodes[$i][3]; $all_episodes[$i][4] = &extract_string($html_section, 'data-img-src-68="', '"'); #print $all_episodes[$i][4]; $all_episodes[$i][5] = ''; #print $all_episodes[$i][5]; $all_episodes[$i][6] = $channel; $found_more_shows = 'Yes'; $all_shows[$i++] = $ePID; } else { $ePID = $ePID.'!!'; $found_more_shows = 'Yes'; } } print "\n".'"'.$AtoZIndex[$j].'" shows have been searched. (Page '.$page_number++.")\n\n"; $search_url = 'radio/programmes/a-z/by/'.$AtoZIndex[$j].'/current?page='; $search_url = $search_url.$page_number; if (index ($html, $search_url) == -1) { $page_number = -1; #STOP! } if ($found_more_shows eq 'No') { $page_number = -1; #STOP! } } } print "\nFound ".$i." shows\n\n\n"; return @all_episodes; } sub search_ePIDs_for_TV_show { my (@ops) = @_; my $i = 0; my @all_episodes; my $ePID = ''; #print "\n\n\nSearching for episodes (sPID: $ops[0])\n\n"; #print "Search Results:\n"; my $url = "http://www.bbc.co.uk/iplayer/episodes/$ops[0]"; my $result = $browser->get($url); die "Can't GET $url" if (! defined $result); my $html = $result->content; my $html_1 = $html; my $html_2 = $html; my $html_3 = $html; my $html_4 = $html; my $html_5 = $html; my $channel = &extract_string($html_1, 'class="master-brand">', ''); #print "Channel:".$channel."*\n"; while ($html =~ /\/iplayer\/episode\//) { #print "Found PID - ", ; $ePID = substr $html, index($html, '/iplayer/episode/') + 17, 8; $all_episodes[$i][0] = $ePID; #store ePID print $all_episodes[$i][0]." - "; $all_episodes[$i][1] = $ops[0]; #store sPID #print $all_episodes[$i][1]." - "; substr ($html_2, 0, index($html_2, 'data-ip-id="') + 12) = ""; $all_episodes[$i][2] = &extract_string($html_2, 'title="', '"'); print $all_episodes[$i][2]."\n"; substr ($html_3, 0, index($html_3, 'data-ip-id="') + 12) = ""; $all_episodes[$i][3] = &extract_string($html_3, 'class="synopsis">', '

'); #print $all_episodes[$i][3]."\n"; substr ($html_4, 0, index($html_4, 'data-ip-id="') + 12) = ""; $all_episodes[$i][4] = &extract_string($html_4, 'data-ip-src="', '"'); #print $all_episodes[$i][4]."\n"; substr ($html_5, 0, index($html_5, 'data-ip-id="') + 12) = ""; $all_episodes[$i][5] = &extract_string($html_5, 'class="subtitle">', ''); #print $all_episodes[$i][5]."\n\n"; $all_episodes[$i++][6] = $channel; substr ($html, index($html, '/iplayer/episode/'), 15) = ""; } #print "Found ".(($#all_episodes)+1)." episode(s)\n\n"; return @all_episodes,; } sub search_ePIDs_for_RADIO_show { my (@ops) = @_; my $i = 0; my @all_episodes; my $ePID = ''; my $html_section = ''; #print "\n\n\nSearching for episodes (sPID: $ops[0])\n\n"; #print "Search Results:\n"; my $url = "http://www.bbc.co.uk/programmes/$ops[0]/episodes/player"; #http://www.bbc.co.uk/programmes/b00srz5b/episodes/player print "\n".$url; my $result = $browser->get($url); die "Can't GET $url" if (! defined $result); my $html = $result->content; while ($html =~ /data-pid=/) { substr ($html, 0, index($html, 'data-pid=') + 9) = ""; $ePID = substr ($html, index($html, '"') + 1, 8); if (index ($html,'data-pid=') != -1) { $html_section = substr ($html, 0, index($html, 'data-pid=')); } else { $html_section = $html; } $all_episodes[$i][0] = $ePID; #store ePID #print "\n".$all_episodes[$i][0]." - "; $all_episodes[$i][1] = $ops[0]; #store sPID #print $all_episodes[$i][1]." - "; $all_episodes[$i][2] = ''; #print $all_episodes[$i][2]; $all_episodes[$i][3] = &extract_string($html_section, 'property="description">', ''); #print $all_episodes[$i][3]; $all_episodes[$i][4] = &extract_string($html_section, 'data-img-src-68="', '"'); #print $all_episodes[$i][4]; $all_episodes[$i][5] = &extract_string($html_section, '', ''); $all_episodes[$i][5] = $all_episodes[$i][5].': '.&extract_string($html_section, '