I made a link extractor for Chrome - here it is

roydan

Senior Member
Founding Member
Sapphire Member
Gold Star Gold Star Gold Star Gold Star Gold Star
Joined
Mar 30, 2025
Messages
2,110
Reaction Score
5,617
Feedback
1 / 0 / 0
So I made this tool that allows you to extract links from a page you're visiting. By 'me', I obvioulsly mean that I collaborated with Chat where he did the coding and I did the specs and yelling.

It works like a bookmark, and when you click it, it opens a new tab with a table that contains:

Title
Anchor
Full URL
Domain
Path
URL parameters

There's a filter, and you can export CSV, copy URLs, copy URLs+anchors, copy all table, and there's a button that allows you to open a new Google Sheets file. Pretty basic but useful for my uses:

1. Extracting SERPs (well, it used to be useful before there were limited to 10, as a PPC guy it's still very useful to me)
2. List buildings from directories
3. Check competitors products

Here it is:

JavaScript:
javascript:(function(){
  function S(s){return (s||"").replace(/\s+/g," ").trim();}
  function dec(s){try{return decodeURIComponent((s||"").replace(/\+/g,"%20"));}catch(e){return s||"";}}
  function imgFile(src){
    src = src || "";
    if(!src) return "image";
    if(src.indexOf("data:")===0) return "inline-image";
    src = src.split("#")[0];
    src = src.split("?")[0];
    var parts = src.split("/");
    var last = parts[parts.length-1] || "image";
    return last;
  }

  var w=window.open("about:blank","_blank");
  if(!w){alert("Popup blocked. Allow popups for this site and try again.");return;}
  w.document.open();
  w.document.write("<!doctype html><meta charset=utf-8><title>Link Extractor</title><body style=\"font-family:Tahoma,Segoe UI,Arial,sans-serif;font-size:12px;margin:12px\">Loading...</body>");
  w.document.close();

  try{
    var links=document.links;
    var parser=document.createElement("a");
    var rows=[];

    for(var i=0;i<links.length;i++){
      var a=links[i];
      var raw=a.getAttribute("href")||"";
      if(!raw) continue;
      if(raw.charAt(0)==="#") continue;
      if(raw.indexOf("javascript:")===0) continue;

      var url=a.href||raw;

      if(url && url.indexOf("/url?")>-1 && url.indexOf("q=")>-1 && /(^|\/\/)(www\.)?google\./i.test(url)){
        var m=url.match(/[?&]q=([^&]+)/);
        if(m && m[1]) url=dec(m[1]);
      }

      var urlText = url;
      if(urlText.length>300) urlText = urlText.slice(0,300) + "...";
      var urlLink = (url.length<=2000) ? url : "";

      var domain="", path="", params="";
      if(url.length<=2000){
        parser.href=url;
        domain=parser.hostname||"";
        path=parser.pathname||"";
        params=parser.search?parser.search.slice(1):"";
      }

      var title="";
      var h3=a.getElementsByTagName("h3"); if(h3 && h3.length) title=S(h3[0].textContent);
      if(!title){var h2=a.getElementsByTagName("h2"); if(h2 && h2.length) title=S(h2[0].textContent);}
      if(!title){var h1=a.getElementsByTagName("h1"); if(h1 && h1.length) title=S(h1[0].textContent);}
      if(!title) title=S(a.getAttribute("aria-label")) || S(a.getAttribute("title")) || S(a.textContent);

      var anchor=S(a.textContent);
      if(!anchor){
        var imgs=a.getElementsByTagName("img");
        if(imgs && imgs.length){
          var alt=S(imgs[0].getAttribute("alt"));
          if(alt) anchor=alt;
          else{
            var src=(imgs[0].currentSrc||imgs[0].getAttribute("src")||imgs[0].src||"");
            anchor=imgFile(src);
          }
        }
      }

      var uForFilter = url;
      if(uForFilter.length>500) uForFilter = uForFilter.slice(0,500);

      var s=(title+" "+anchor+" "+uForFilter+" "+domain+" "+path+" "+params).toLowerCase();
      if(s.length>2000) s=s.slice(0,2000);

      rows.push({
        id:rows.length+1,
        title:title,
        anchor:anchor,
        url:url,
        ut:urlText,
        ul:urlLink,
        domain:domain,
        path:path,
        params:params,
        s:s
      });
    }

    w.__rows=rows;
    w.__src=location.href;

    var css=
      "body{font-family:Tahoma,Verdana,Segoe UI,Arial,sans-serif;font-size:12px;color:#111;margin:0;background:#fafafa}"+
      ".top{position:sticky;top:0;background:#fff;border-bottom:1px solid #ddd;padding:10px;display:flex;gap:8px;align-items:center;flex-wrap:wrap}"+
      "button,a.btn{border:1px solid #ccc;background:#fff;border-radius:8px;padding:7px 10px;font-size:12px;color:#111;text-decoration:none;cursor:pointer}"+
      "button:hover,a.btn:hover{background:#f2f2f2}"+
      "input{border:1px solid #ccc;border-radius:8px;padding:7px 10px;font-size:12px;min-width:260px}"+
      ".meta{margin-left:auto;color:#555}"+
      ".wrap{max-width:1500px;margin:0 auto;padding:12px}"+
      ".small{color:#666;font-size:12px;margin:0 0 10px 0;word-break:break-all}"+
      "table{width:100%;border-collapse:collapse;background:#fff}"+
      "th,td{border-bottom:1px solid #eee;padding:8px;vertical-align:top;text-align:left}"+
      "th{position:sticky;top:56px;background:#fff;border-bottom:1px solid #ddd}"+
      "td.url{word-break:break-all}.muted{color:#666}";

    var html="<!doctype html><meta charset='utf-8'><title>Link Extractor</title><style>"+css+"</style>";
    html+="<div class='top'>"+
          "<button id='b1'>Copy all table</button>"+
          "<button id='b2'>Copy all URLs + anchors</button>"+
          "<button id='b3'>Copy all URLs</button>"+
          "<a class='btn' href='https://spreadsheets.new' target='_blank' rel='noopener'>Open a new Google Sheet</a>"+
          "<input id='f' placeholder='Filter' />"+
          "<span class='meta' id='c'></span>"+
          "</div>";
    html+="<div class='wrap'>"+
          "<div class='small' id='src'></div>"+
          "<div class='small' id='st'>Rendering...</div>"+
          "<table id='t'><thead><tr>"+
          "<th>#</th><th>Title</th><th>Anchor</th><th>URL</th><th>Domain</th><th>Path</th><th>URL params</th>"+
          "</tr></thead><tbody></tbody></table>"+
          "</div>";

    var scr="(function(){"+
      "var rows=window.__rows||[];"+
      "var src=window.__src||'';"+
      "var tbody=document.querySelector('#t tbody');"+
      "var f=document.getElementById('f');"+
      "var c=document.getElementById('c');"+
      "var st=document.getElementById('st');"+
      "var b1=document.getElementById('b1');"+
      "var b2=document.getElementById('b2');"+
      "var b3=document.getElementById('b3');"+
      "document.getElementById('src').textContent='Source: '+src;"+
      "var q=''; var tmr=0;"+
      "function esc(s){s=String(s||'');return s.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/\"/g,'&quot;').replace(/\\'/g,'&#39;');}"+
      "function copyText(s){function fb(){var ta=document.createElement('textarea');ta.value=s;ta.style.position='fixed';ta.style.left='-9999px';document.body.appendChild(ta);ta.focus();ta.select();try{document.execCommand('copy')}catch(e){}document.body.removeChild(ta)}"+
        "if(navigator.clipboard&&navigator.clipboard.writeText){navigator.clipboard.writeText(s).catch(fb)}else{fb()}}"+
      "function isFiltered(){return !!(q&&q.length);}"+
      "function setButtons(){var m=isFiltered()?'filtered':'all';b1.textContent='Copy '+m+' table';b2.textContent='Copy '+m+' URLs + anchors';b3.textContent='Copy '+m+' URLs';}"+
      "function subset(){if(!isFiltered()) return rows; var out=[]; for(var i=0;i<rows.length;i++){if(rows[i].s.indexOf(q)!==-1) out.push(rows[i]);} return out;}"+
      "function setCount(rendered){var sub=isFiltered()?subset().length:rows.length; c.textContent=rows.length+' links found, '+sub+' '+(isFiltered()?'filtered':'shown')+', '+rendered+' rendered';}"+
      "function tsvTable(list){var out='id\\ttitle\\tanchor\\turl\\tdomain\\tpath\\tparams\\n'; for(var i=0;i<list.length;i++){var r=list[i]; out+=[r.id,r.title,r.anchor,r.url,r.domain,r.path,r.params].join('\\t')+'\\n';} return out;}"+
      "function tsvUA(list){var out='url\\tanchor\\n'; for(var i=0;i<list.length;i++){var r=list[i]; out+=r.url+'\\t'+(r.anchor||'')+'\\n';} return out;}"+
      "function urlsOnly(list){var out=''; for(var i=0;i<list.length;i++){out+=list[i].url+'\\n';} return out;}"+
      "b1.onclick=function(){copyText(tsvTable(subset()));};"+
      "b2.onclick=function(){copyText(tsvUA(subset()));};"+
      "b3.onclick=function(){copyText(urlsOnly(subset()));};"+
      "function applyFilterToRendered(){var trs=tbody.querySelectorAll('tr'); var shown=0; for(var i=0;i<trs.length;i++){var s=trs[i].getAttribute('data-s')||''; var ok=!isFiltered()||s.indexOf(q)!==-1; trs[i].style.display=ok?'':'none'; if(ok) shown++;} st.textContent=isFiltered()?('Filter matches: '+shown):'Done.';}"+
      "f.oninput=function(e){q=(e.target.value||'').toLowerCase().trim(); setButtons(); if(tmr) clearTimeout(tmr); tmr=setTimeout(function(){applyFilterToRendered(); setCount(rows.length);},120);};"+
      "setButtons();"+
      "function renderAllChunked(){tbody.innerHTML=''; st.textContent='Rendering...'; var i=0; function step(){var end=Math.min(i+700,rows.length); var h=''; for(; i<end; i++){var r=rows[i]; var hide=isFiltered()&&r.s.indexOf(q)===-1;"+
        "h+='<tr data-s=\"'+esc(r.s)+'\"'+(hide?' style=\"display:none\"':'')+'>'"+
          "+'<td class=muted>'+r.id+'</td>'"+
          "+'<td>'+esc(r.title)+'</td>'"+
          "+'<td class=muted>'+esc(r.anchor)+'</td>'"+
          "+'<td class=url>'+(r.ul?('<a target=_blank rel=noopener href=\"'+esc(r.ul)+'\">'+esc(r.ut)+'</a>'):esc(r.ut))+'</td>'"+
          "+'<td>'+esc(r.domain)+'</td>'"+
          "+'<td class=url>'+esc(r.path)+'</td>'"+
          "+'<td class=url>'+esc(r.params)+'</td>'"+
        "+'</tr>'; }"+
        "tbody.insertAdjacentHTML('beforeend',h); setCount(Math.min(i,rows.length)); if(i<rows.length){setTimeout(step,0)} else {st.textContent='Done.'; applyFilterToRendered();}} step();}"+
      "renderAllChunked();"+
    "})();";

    w.document.open();
    w.document.write(html+"<scr"+"ipt>"+scr+"<\/scr"+"ipt>");
    w.document.close();

  }catch(err){
    w.document.open();
    w.document.write("<!doctype html><meta charset=utf-8><title>Link Extractor Error</title><body style=\"font-family:Tahoma,Segoe UI,Arial,sans-serif;font-size:12px;margin:12px\"><b>Error:</b><pre>"+String(err)+"</pre></body>");
    w.document.close();
  }
})();

What you do is add a new bookmark in Chrome and paste it where the link goes, then click on it while on the page you want to extract the links from.
Here's an example for the extraction of links from the "start a new post" inside the Tools at the Forge section:

Cheers, and if you found a bug or anything, or if you have an idea to make it more useful, I'd be happy to know about it.
 
Last edited:
Back
Top