How-To: Extract PHP-Nuke Articles from an SQL INSERT Statement in Python

I found an old SQL dump of this r3dux.org site yesterday which I thought was lost to time. The site ran php-nuke back then, and rather than importing the SQL into a database and fiddling with it to export all the old articles I thought I’d take the python hammer to it. It’s a bit of a bodge/hack – but it seems to work well enough.

The idea is you copy JUST the insert statement which deals with “stories” (nuke’s word for articles / posts) from the SQL dump and put it in a file (mine’s called “nuke_stories.sql”), then run the parse_nuke_stories python script and it’ll dump out all the articles into separate files with filenames like “Article_ID_YEAR_MONTH_DAY.html” and such.

Here’s the top few lines of my SQL insert statement:

INSERT INTO `nuke_stories` (`sid`, `catid`, `aid`, `title`, `time`, `hometext`, `bodytext`, `comments`, `counter`, `topic`, `informant`, `notes`, `ihome`, `alanguage`, `acomm`, `haspoll`, `pollID`, `score`, `ratings`, `associated`) VALUES
(1, 0, 'r3duxAdmin', 'W00t! The site it be a-changin''...', '2004-09-10 12:35:06', 'Well, PHP-Nuke seems to be up and running, so now it''s time to mess w/ it in a huge way and configure all the millions of options needed to make it just the way I want.\r\n

\r\nDaunted? Me? Er, yeah. You could say that...', '', 0, 83, 1, 'r3duxAdmin', '', 1, '', 0, 0, 0, 0, 0, ''),

Here’s what it’ll dump out into a file called Article_001_2004_09_10.html if this was all that was there:

Article ID : 1
Author : r3duxAdmin
Title : W00t! The site it be a-changin’…
Date : 2004-09-10 12:35:06

Well, PHP-Nuke seems to be up and running, so now it’s time to mess w/ it in a huge way and configure all the millions of options needed to make it just the way I want.

Daunted? Me? Er, yeah. You could say that…

And here’s the actual python (version 3) code to parse the SQL file:

# Function to find a single apostrophe that closes the field and return its location
def find_end_apostrophe(location, story):
    while True:
        # Move along
        location += 1
 
        # Grab two chars
        char1 = story[location:location+1]
        char2 = story[location+1:location+2]
 
        # If they're both apostophes then skip ahead 2 chars
        if char1 == "'" and char2 == "'":
            location += 2
        # An apostrophe and something else? It's the end apostrophe so bail!
        elif char1 == "'" and char2 != "'":
            break
 
    return location
 
# Function to parse a story into separate fields and place them into the parsed_story list
def parse_story(story):
 
    parsed_story = []
 
    # Fields:
    # 0 = `sid`, 1 = `catid`, 2 = `aid`, 3 = `title`, 4 = `time`, 5 = `hometext`, `6 = bodytext`, 7 = `comments`,
    #`8 = `counter`, 9 = `topic`, 10 = `informant`, 11 = `notes`, 12 = `ihome`, 13 = `alanguage`, 14 = `acomm`,
    # 15 = `haspoll`, 16 = `pollID`, 17 = `score`, 18 = `ratings`, 19 = `associated`
 
    # 0 - Add story ID
    temp_end = story.find(",")
    field = story[1:temp_end]
    parsed_story.append(field)
    #print("ID:", field)
 
    # 1 - Add category ID
    temp_start = temp_end + 1
    temp_end = story.find(",", temp_start)
    field = story[temp_start:temp_end]
    parsed_story.append(field)
    #print("Category:", field)
 
    # 2 - Add author
    temp_start = temp_end + 3
    temp_end = story.find("'", temp_start)
    field = story[temp_start:temp_end]
    parsed_story.append(field)
    #print("Author:", field)
 
    # 3 - Add title
    temp_start = temp_end + 4
    temp_end   = find_end_apostrophe(temp_start, story)    
    #print("Title locs:", temp_start, temp_end)
    field = story[temp_start:temp_end].replace("''", "'")
    parsed_story.append(field)
    #print("Title:", field)
 
    # 4- Add time
    temp_start = temp_end + 4
    temp_end = story.find("'", temp_start)
    #print("Time locs:", temp_start, temp_end)
    field = story[temp_start:temp_end]
    parsed_story.append(field) # initial_story_split[4][2:-1])
    #print("Date:", field)
 
    # 5 - Add home text (i.e. the first paragraph)
    temp_start = temp_end + 4
    temp_end   = find_end_apostrophe(temp_start, story)
    #print("Home locs:", temp_start, temp_end)
    field = story[temp_start:temp_end].replace("''", "'").replace("<br><br>\\r\\n", "<br/><br/>").replace("\\r\\n", " ").replace("  ", "")
    parsed_story.append(field)
    #print("Home:", field)        
 
    # 6 = Add body text
    temp_start = temp_end + 3
    temp_end   = find_end_apostrophe(temp_start, story)
    #print("Body locs:", temp_start, temp_end)
    field = story[temp_start:temp_end].replace("''", "'").replace("<br><br>\\r\\n", "<br/><br/>").replace("\\r\\n", " ").replace("  ", "") # Last one is 2 'A with a hat' chars used for bullet points
 
    # If there is no body blank the field, otherwise strip the first char
    if field == "'":
        field = ""
    else:
        field = field[1:]        
 
    parsed_story.append(field)
    #print("Body:", field)
 
    # Not bothered about the rest of the fields
 
    return parsed_story
 
# ---------------------- Main --------------------------
 
#import codecs
#sql_file = codecs.open("nuke_stories.sql", encoding="utf-8")
 
# Read the SQL dump of stories into a list of lines where each line is a story
sql_file = open("nuke_stories.sql", "r")
story_list = sql_file.readlines();
sql_file.close()
 
num_stories = len(story_list)
print("Number of stories: ", num_stories, "\n")
 
# Newline char
nl = "<br/>"
 
outfile = 0
for story_number in range(1, num_stories - 1): # Start from 1 because line 0 is the insert statement itself
 
    # Parse the story (i.e. article)
    parsed_story = parse_story( story_list[story_number] )
 
    # Grab the first section of the date only. Format: YYYY-MM-DD
    date_str = parsed_story[4][:parsed_story[4].find(" ")].replace("-", "_")
 
    # Construct the filename then create the file for writing
    filename = "Article_" + parsed_story[0].zfill(3) + "_" + date_str + ".html"
    outfile = open(filename, "w")
 
    # Dump the parsed story details to file
    outfile.write("Article ID : " + parsed_story[0] + nl)
    # Skipping Category ID - don't care.
    outfile.write("Author     : " + parsed_story[2] + nl)
    outfile.write("Title      : " + parsed_story[3] + nl)
    outfile.write("Date       : " + parsed_story[4] + nl + nl)
    outfile.write(parsed_story[5] + nl + nl) # Home text (i.e. first paragraph)
    outfile.write(parsed_story[6])           # Body text (i.e. rest of article)
 
    # Close the file to save it
    outfile.close()
 
print("All done! =D")

As mentioned, it’s a complete hack – but if you need to do something similar it may work as a starter to get you going…

How To: Convert Day Number to Month and Day

I needed to convert the day number, that is, what day it is between 1 and 365, into a month and day without using Calendar / DateTime classes and such today, so I knocked up a few simple functions that work assuming it’s not a leap year (i.e. that February has 28 days).

Here they are if they’re of any use to you:

int dateToDayNumber(int month, int day)
{
	// Catch invalid input and return early
	if (month < 1 || month > 12 || day < 1 || day > 31) return 0;
 
	if (month == 1 ) return       day;
	if (month == 2 ) return 31  + day;
	if (month == 3 ) return 59  + day;
	if (month == 4 ) return 90  + day;
	if (month == 5 ) return 120 + day;
	if (month == 6 ) return 151 + day;
	if (month == 7 ) return 181 + day;
	if (month == 8 ) return 212 + day;
	if (month == 9 ) return 243 + day;
	if (month == 10) return 273 + day;
	if (month == 11) return 304 + day;
	return 334 + day;
}
 
int dayNumberToMonth(int dayNumber)
{
	// Catch invalid input and return early
	if (dayNumber < 1 || dayNumber > 365) return 0;
 
	if (dayNumber <= 31 ) return 1;  // Jan
	if (dayNumber <= 59 ) return 2;  // Feb
	if (dayNumber <= 90 ) return 3;  // Mar
	if (dayNumber <= 120) return 4;  // Apr
	if (dayNumber <= 151) return 5;  // May
	if (dayNumber <= 181) return 6;  // Jun
	if (dayNumber <= 212) return 7;  // Jul
	if (dayNumber <= 243) return 8;  // Aug
	if (dayNumber <= 273) return 9;  // Sep
	if (dayNumber <= 304) return 10; // Oct
	if (dayNumber <= 334) return 11; // Nov
	return 12;                       // Dec
}
 
int dayNumberToDayOfMonth(int dayNumber)
{
	// Catch invalid input and return early
	if (dayNumber < 1 || dayNumber > 365) return 0;
 
	if (dayNumber <= 31 ) return dayNumber;       // Jan
	if (dayNumber <= 59 ) return dayNumber - 31;  // Feb
	if (dayNumber <= 90 ) return dayNumber - 59;  // Mar
	if (dayNumber <= 120) return dayNumber - 90;  // Apr
	if (dayNumber <= 151) return dayNumber - 120; // May
	if (dayNumber <= 181) return dayNumber - 151; // Jun
	if (dayNumber <= 212) return dayNumber - 181; // Jul
	if (dayNumber <= 243) return dayNumber - 212; // Aug
	if (dayNumber <= 273) return dayNumber - 243; // Sep
	if (dayNumber <= 304) return dayNumber - 273; // Oct
	if (dayNumber <= 334) return dayNumber - 304; // Nov
	return dayNumber - 334;                       // Dec
}

A touch cheap, but it gets the job done.

How To: Identify which pointer moved in Android

Heads up: When I say pointer in this article, I mean a finger or a stylus – anything in contact with the touchscreen.

Multi-touch in Android is a bit of a strange beast, although you could argue that it’s by necessity. With ACTION_DOWN (primary pointer in contact) or ACTION_POINTER_DOWN (non-primary pointer in contact), or even the corresponding ACTION_UP or ACTION_POINTER_UP events it’s fine. But when it comes to ACTION_MOVE events – you can’t just ask which finger moved, all you’re told is that A finger moved, and you have to figure out which one for yourself as far as I can tell.

This is what I’ve come up with to figure out which pointer has actually moved – it falls behind by one ACTION_MOVE event because there doesn’t seem to be a way to ask for the current X location of a pointer by its index location (that is, there’s no getX(pointerIndex), only a getHistoricalX(pointerIndex, historyPosition)) – so this code misses the very first move event when a pointer moves – BUT – moving your finger generates a large number of move events, so I’m thinking this might not be that much of an issue.

Update: I’d forgotten you can getX(pointerIndex) and as well as getHistoricalX(pointerIndex, historyLocation) – so I’ve modified the code below to do that, which means I don’t think we miss any move events anymore because we’re now comparing current location to historical(0) rather than historical(0) to historical(1).

Anyway, here’s my code which addresses this finicky problem:

@Override
public boolean onTouchEvent(MotionEvent event)
{
    // Get pointer index from the event object...
    int pointerIndex = event.getActionIndex();
 
    // ...which we can use to find the pointer ID!
    int pointerId = event.getPointerId(pointerIndex);
 
    // Get masked action (i.e. action which is not specific to a pointer)
    int maskedAction = event.getActionMasked();
 
    // Depending on what the action was, act appropriately...
    switch (maskedAction)
    {
        case MotionEvent.ACTION_DOWN:
            System.out.println("Pointer with Id: " + pointerId + " at index " + pointerIndex + " down i.e. Primary pointer down!");
            break;
 
        case MotionEvent.ACTION_POINTER_DOWN:
            System.out.println("Pointer with Id: " + pointerId + " at index " + pointerIndex + " down i.e. NON-Primary pointer down!");
            break;
 
        // The MOVE event must be done separately. This is because the ACTION_MOVE event always gets
        // zero as the pointerId for some strange reason. As such, you always have to loop over the
        // pointer indexes and compare positions to the pointer's previous (i.e. historical) position.
        case MotionEvent.ACTION_MOVE:
            int pointerCount = event.getPointerCount();
            for(int i = 0; i < pointerCount; ++i)
            {
                // To find out WHICH pointer moved we must compare pointer historical locations
                if (event.getHistorySize() > 0)
                {
                    // X or Y location for that pointer index moved?
                    // Corner-case: Pointer index changed (pointer up or down promoted or demoted pointer index while moving?) 
                    // Fix: Track by pointer Id via sparse array as outlined in second potential solution below.
                    if ( (int)event.getX(i) != (int)event.getHistoricalX(i,0) || (int)event.getY(i) != (int)event.getHistoricalY(i, 0) )
                    {
                        pointerId = event.getPointerId(i);
                        System.out.println("Pointer with Id: " + pointerId + " at index " + i + " moved!");
                    }
                }
            }
            break;
 
        case MotionEvent.ACTION_UP:
            System.out.println("Pointer with Id: " + pointerId + " at index " + pointerIndex + " up i.e. Primary pointer up!");
            break;
 
        case MotionEvent.ACTION_POINTER_UP:
            System.out.println("Pointer with Id: " + pointerId + " at index " + pointerIndex + " up i.e. NON-Primary pointer up!");
            break;
    }
 
    // Consume the event so that it is not processed any further
    return true;
 
} // End of onTouchEvent method

As I’ve been playing around with this it seems to match up with what I’m doing very well without issue, for example:

Pointer with Id: 0 at index 0 down i.e. Primary pointer down!      <--- 1st finger down
Pointer with Id: 0 at index 0 moved!                               <--- 1st finger moved
Pointer with Id: 0 at index 0 moved!
...
Pointer with Id: 1 at index 1 down i.e. NON-Primary pointer down!  <--- 2nd finger down
Pointer with Id: 1 at index 1 moved!                               <--- 2nd finger moved
Pointer with Id: 1 at index 1 moved!
...
Pointer with Id: 0 at index 0 up i.e. NON-Primary pointer up!      <--- 1st finger up, promoting 2nd finger to primary!
Pointer with Id: 1 at index 0 moved!                               <--- 2nd finger (now primary) moved
Pointer with Id: 1 at index 0 moved!
...
Pointer with Id: 0 at index 0 down i.e. NON-Primary pointer down!  <--- 1st finger back down, DEMOTING 2nd finger from primary!
Pointer with Id: 0 at index 0 moved!                               <--- 1st finger moved
...
Pointer with Id: 1 at index 1 up i.e. NON-Primary pointer up!      <--- 2nd finger up
Pointer with Id: 0 at index 0 up i.e. Primary pointer up!          <--- 1st finger up

Alternative Solution

Keep a sparse array of whatever you're keeping track of - for example, let's say we've got the world's simplest Circle class:

public class Circle
{
	public float x;
	public float y;
}

Then in your class handling the onTouchEvent() method you could have something like this (in this particular example we have a view which responds to multi-touch events):

// imports here...
 
public class MultiTouchCircleView extends View
{
    private static final int SIZE = 150;
 
    // Note: A SparseArray is kind-of like a hash-map, it maps an
    // integer to an object in a key/value manner.
    private SparseArray<Circle> circleArray;
 
    private Paint paint;
 
    // Array of 10 colours
    private int[] colours = {Color.BLUE, Color.GREEN, Color.RED,
            Color.YELLOW, Color.CYAN, Color.GRAY, Color.MAGENTA, Color.DKGRAY,
            Color.LTGRAY, Color.YELLOW};
 
    private Paint textPaint;
 
    public MultiTouchCircleView(Context context, AttributeSet attrs)
    {
        super(context, attrs);
        initView();
    }
 
    private void initView()
    {
        circleArray = new SparseArray<Circle>();
        paint = new Paint(Paint.ANTI_ALIAS_FLAG);
 
        // set painter color to a color you like
        paint.setColor(Color.BLUE);
        paint.setStyle(Paint.Style.FILL_AND_STROKE);
        textPaint = new Paint(Paint.ANTI_ALIAS_FLAG);
        textPaint.setTextSize(50);
        textPaint.setColor(Color.BLUE);
    }
 
    @Override
    public boolean onTouchEvent(MotionEvent event) {
        // Get pointer index from the event object
        int pointerIndex = event.getActionIndex();
 
        // Get pointer ID
        int pointerId = event.getPointerId(pointerIndex);
 
        // Get masked (not specific to a pointer) action
        int maskedAction = event.getActionMasked();
 
        switch (maskedAction) {
 
            case MotionEvent.ACTION_DOWN:
            case MotionEvent.ACTION_POINTER_DOWN: {
                // A finger has touched the screen - so let's create a new
                // Circle at the touch location and add it to the list!
                Circle circle = new Circle();
                circle.x = event.getX(pointerIndex);
                circle.y = event.getY(pointerIndex);
                circleArray.put(pointerId, circle);
                break;
            }
 
            case MotionEvent.ACTION_MOVE: {
                // How many pointers are in contact with the screen?
                int pointerCount = event.getPointerCount();
 
                // Loop over them all...
                for (int i = 0; i < pointerCount; ++i) {
                    // i is the pointer index but we'll update our pointerIndex variable for clarity
                    pointerIndex = i;
 
                    // Get the pointerId at that pointerIndex (pointerId never changes while pointer is in contact)
                    pointerId = event.getPointerId(pointerIndex);
 
                    // Get access to the circle with that pointer id...
                    Circle circle = circleArray.get(pointerId);
 
                    // ...and update the circle's location.
                    circle.x = event.getX(pointerIndex);
                    circle.y = event.getY(pointerIndex);
                }
                break;
            }
 
            case MotionEvent.ACTION_UP:
            case MotionEvent.ACTION_POINTER_UP: {
                // Finger has left contact - remove the circle from the array
                circleArray.remove(pointerId);
                break;
            }
 
        } // End of switch block
 
        // Trigger redraw to display any changes
        invalidate();
 
        // Return true to consume the MotionEvent
        return true;
    }
 
    @Override
    protected void onDraw(Canvas canvas)
    {
        super.onDraw(canvas);
 
        // Draw circles for each pointer in contact with the screen
        int numCircles = circleArray.size();
        for (int i = 0; i < numCircles; ++i)
        {
            // Get the circle at index location i
            Circle circle = circleArray.valueAt(i);
 
            // ...to choose a colour between 0 and 9.
            if (circle != null)
            {
                int colourNum = i % 9;
                paint.setColor(colours[colourNum]);
                paint.setColor(colours[i]);
            }
 
            // Draw a circle in our chosen colour
            canvas.drawCircle(circle.x, circle.y, SIZE, paint);
        }
        canvas.drawText("Total pointers: " + numCircles, 10, 100, textPaint);
 
    } // End of onDraw method
 
} // End of MultiTouchCircleView class

This technique doesn't miss any ACTION_MOVE events because it looks them up by pointerId in the SparseArray - which is potentially a nicer solution. Go with whatever works for you.