#!/bin/bash
# SCRIPT NAME: mail_id_extractor_from_url.sh
# DESCRIPTION: This script takes a list of seed URLs and extracts all sub URLs and referenced URLs from seed URLs. Visits all sub and referenced URLs of seed URLs and extracts mail ids.
# USAGE: bash mail_id_extractor_from_url.sh website_list.txt
# website_list.txt file contains all seed URLs to extract mail ids.
#AUTHOR: Reniguntla S
filename="$1"
while read -r seed_url
do
name="$seed_url"
lynx -dump $seed_url | grep "http" >> sub_urllist0.txt
done < "$filename"
cat sub_urllist0.txt | awk '{print $2}' > sub_urllist1.txt
sort sub_urllist1.txt | uniq -u > sub_urllist.txt
rm sub_urllist0.txt sub_urllist1.txt
echo "Extraction of sub URLs of seed URLs done"
mode=1
if [ "$mode" -eq "1" ];then
echo "Be patient, It will take some time :-)"
cat sub_urllist.txt | while read url
do
echo $url
curl $url > url_text.txt
grep -E -o "\b[a-zA-Z0-9.-]+@[a-zA-Z0-9.-]+\.[a-zA-Z0-9.-]+\b" url_text.txt >> mailids.txt
done
fi
sort mailids.txt | uniq >> extracted_mail_list.txt
rm mailids.txt url_text.txt
echo "Email ids extraction done. Check extracted_mail_list.txt file"
exit 0
# End of script
Monday, March 14, 2016
Shell Script to Extract Email ids from Website URL
Posted by
umencs
It is always tedious work to extract e-mail ids from websites for publicity purpose. So I created a shell script which automate the extraction of mail ids from URLs.
Subscribe to:
Post Comments (Atom)
Nice work sir!
ReplyDeleteThank You Abhishek :-)
Delete