how to get alexa rank with python xml and save to csv using pandas


How to get alexa rank

xml demo

content = """
<ALEXA VER="0.9" URL="kezunlin.me/" HOME="0" AID="=" IDN="kezunlin.me/">
    <RLS PREFIX="http://" more="0"> </RLS>
    <SD TITLE="A" FLAGS="" HOST="kezunlin.me"> </SD>
    <SD>
        <POPULARITY URL="kezunlin.me/" TEXT="2489312" SOURCE="panel"/>
        <REACH RANK="2141483"/>
        <RANK DELTA="-1406486"/>
    </SD>
</ALEXA>
"""

root = ElementTree.fromstring(content) 
# obj.tag, obj.attrib 

for name, value in root.attrib.items():
    print('{0}="{1}"'.format(name, value))
for child in root:
    print(child.tag, child.attrib)

pandas demo

def demo_create_dataframe_1():
    # initialize list of lists 
    data = [['tom', 10], ['nick', 15], ['juli', 14]] 

    # Create the pandas DataFrame 
    df = pd.DataFrame(data, columns = ['Name', 'Age']) 

    # print dataframe. 
    print(df) 

def demo_create_dataframe_2():
    # intialise data of lists. 
    data = {'Name':['Tom', 'nick', 'krish', 'jack'], 'Age':[20, 21, 19, 18]} 

    # Create DataFrame 
    df = pd.DataFrame(data) 

    # Print the output. 
    print(df) 

def demo_append_dataframe():
    # Creating the first Dataframe using dictionary 
    df1 = df = pd.DataFrame({"a":[1, 2, 3, 4], 
                            "b":[5, 6, 7, 8]}) 

    # Creating the Second Dataframe using dictionary 
    df2 = pd.DataFrame({"a":[1, 2, 3], 
                        "b":[5, 6, 7]}) 

    # Print df1 
    print(df1, "\n") 

    # to append df2 at the end of df1 dataframe 
    new_df = df1.append(df2, ignore_index = True)  # 更新index
    print(new_df)

def demo_pandas_index():
    data = {'Name':['Tom', 'nick', 'krish', 'jack'], 'Age':[20, 21, 19, 18]} 
    df = pd.DataFrame(data) 

    # case1:
    df.to_csv('data.csv',index=False) 
    df = pd.read_csv('file_name.csv')

    # case2
    df.to_csv('data.csv ', index=True) # default with index
    df_new = pd.read_csv('data.csv').drop(['unnamed 0'],axis=1)

get rank and save to csv

#!/usr/bin/python
# -*- coding: UTF-8 -*-

import os 
import datetime
import pandas as pd
import requests
from xml.etree import ElementTree

def get_alexa_rank(site="kezunlin.me"):
    url = "http://data.alexa.com/data?cli=10&dat=snbamz&url={}".format(site)
    #print(url)
    r = requests.get(url)
    rank = 0
    if r.status_code == 200:
        #print(r.content)
        root = ElementTree.fromstring(r.content) 
        rank = int(root[2][0].attrib["TEXT"]) # by index
    return rank 

"""
<ALEXA VER="0.9" URL="kezunlin.me/" HOME="0" AID="=" IDN="kezunlin.me/">
    <RLS PREFIX="http://" more="0"> </RLS>
    <SD TITLE="A" FLAGS="" HOST="kezunlin.me"> </SD>
    <SD>
        <POPULARITY URL="kezunlin.me/" TEXT="2489312" SOURCE="panel"/>
        <REACH RANK="2141483"/>
        <RANK DELTA="-1406486"/>
    </SD>
</ALEXA>
"""

def save_to_csv(date_str,rank,csv_filepath='rank.csv'):
    # load existing df1
    df1 = pd.read_csv(csv_filepath)
    #print(df1)

    # create df2
    data = {'Date':[date_str], 'Rank':[rank]} 
    df2 = pd.DataFrame(data) 
    #print(df2)

    # append 
    df = df1.append(df2, ignore_index = True)  # 更新index

    # save new df to csv
    df.to_csv(csv_filepath, index=False, sep=',', encoding='utf-8')
    #print(df)
    print("Appending to {}".format(csv_filepath))

def main():
    # get rank
    rank = get_alexa_rank("kezunlin.me")
    date = datetime.datetime.now()
    date_str = date.strftime('%Y-%m-%d')
    print("date = {} , rank = {}".format(date_str, rank))

    save_to_csv(date_str,rank)

if __name__ =="__main__":
    main()

output

date = 2019-12-16 , rank = 2486318
Appending to rank.csv

view results

$ cat rank.csv 
Date,Rank
2019-12-13,2489312
2019-12-16,2486318

Reference

History

  • 2019/12/16: created.

Author: kezunlin
Reprint policy: All articles in this blog are used except for special statements CC BY 4.0 reprint polocy. If reproduced, please indicate source kezunlin !
评论
  TOC